"""DataTools โ€” Data Cleaning Mastery Suite. Launch: streamlit run src/gui/app.py """ from __future__ import annotations import sys from pathlib import Path import streamlit as st # Ensure project root is on sys.path so `src.core` imports work _project_root = Path(__file__).resolve().parent.parent.parent if str(_project_root) not in sys.path: sys.path.insert(0, str(_project_root)) # --------------------------------------------------------------------------- # Page config # --------------------------------------------------------------------------- from src.gui.components import hide_streamlit_chrome st.set_page_config( page_title="DataTools โ€” Data Cleaning Mastery", page_icon="๐Ÿงน", layout="wide", ) hide_streamlit_chrome() # --------------------------------------------------------------------------- # Home page # --------------------------------------------------------------------------- st.title("๐Ÿงน DataTools โ€” Data Cleaning Mastery") st.caption("A 9-tool suite for cleaning, standardizing, and validating tabular data. Runs 100% locally.") st.divider() # --------------------------------------------------------------------------- # Tool cards # --------------------------------------------------------------------------- TOOLS = [ { "icon": "๐Ÿ”", "name": "Deduplicator", "description": "Fuzzy matching, normalization, survivor selection, and interactive review.", "status": "Ready", "page": "1_Deduplicator", }, { "icon": "โœ‚๏ธ", "name": "Text Cleaner", "description": "Whitespace trim, multi-space collapse, Unicode normalization, BOM and line-ending handling.", "status": "Coming Soon", "page": "2_Text_Cleaner", }, { "icon": "๐Ÿ“", "name": "Format Standardizer", "description": "Standardize dates, currencies, names, phone numbers, and addresses.", "status": "Coming Soon", "page": "3_Format_Standardizer", }, { "icon": "๐Ÿ•ณ๏ธ", "name": "Missing Value Handler", "description": "Detect disguised nulls, missingness analysis, and imputation strategies.", "status": "Coming Soon", "page": "4_Missing_Values", }, { "icon": "๐Ÿ—‚๏ธ", "name": "Column Mapper", "description": "Rename columns, enforce a target schema, and coerce types.", "status": "Coming Soon", "page": "5_Column_Mapper", }, { "icon": "๐Ÿ“Š", "name": "Outlier Detector", "description": "Z-score, IQR, and MAD detection with domain-rule violations and winsorization.", "status": "Coming Soon", "page": "6_Outlier_Detector", }, { "icon": "๐Ÿ“Ž", "name": "Multi-File Merger", "description": "Combine multiple CSV/Excel files with schema alignment.", "status": "Coming Soon", "page": "7_Multi_File_Merger", }, { "icon": "โœ…", "name": "Validator & Reporter", "description": "Validate against rules and generate PDF/Excel quality reports.", "status": "Coming Soon", "page": "8_Validator_Reporter", }, { "icon": "โš™๏ธ", "name": "Pipeline Runner", "description": "Chain tools in recommended order and pass output between steps.", "status": "Coming Soon", "page": "9_Pipeline_Runner", }, ] # Render tool cards in a 3-column grid for row_start in range(0, len(TOOLS), 3): cols = st.columns(3) for i, col in enumerate(cols): idx = row_start + i if idx >= len(TOOLS): break tool = TOOLS[idx] with col: status_color = "green" if tool["status"] == "Ready" else "orange" st.markdown( f"### {tool['icon']} {tool['name']}\n\n" f"{tool['description']}\n\n" f":{status_color}[**{tool['status']}**]" ) # --------------------------------------------------------------------------- # Footer # --------------------------------------------------------------------------- st.divider() st.caption( "Runs locally. Your data never leaves this computer. " "| DataTools v3.0" )