feat: refactor GUI to multi-page Streamlit app with 9 tool pages

Convert single-page deduplicator into a multi-page suite. Home page shows tool card grid. Deduplicator extracted to its own page (fully working). 8 stub pages added for Text Cleaner, Format Standardizer, Missing Values, Column Mapper, Outlier Detector, Multi-File Merger, Validator & Reporter, and Pipeline Runner — each with functional file upload and coming-soon UI. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-29 01:16:12 +00:00
parent 9ec371a85f
commit f2fdc10af7
10 changed files with 1175 additions and 330 deletions
--- a/src/gui/pages/4_Missing_Values.py
+++ b/src/gui/pages/4_Missing_Values.py
@@ -0,0 +1,102 @@
+"""DataTools Missing Value Handler — stub page."""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+import streamlit as st
+
+_project_root = Path(__file__).resolve().parent.parent.parent.parent
+if str(_project_root) not in sys.path:
+    sys.path.insert(0, str(_project_root))
+
+# ---------------------------------------------------------------------------
+# Header
+# ---------------------------------------------------------------------------
+
+st.title("🕳️ Missing Value Handler")
+st.caption("Detect, analyze, and handle missing values in your data.")
+
+st.info("This tool is under development.")
+
+# ---------------------------------------------------------------------------
+# What this tool will do
+# ---------------------------------------------------------------------------
+
+st.markdown("""
+**Features:**
+- Detect disguised nulls (empty strings, "N/A", "n/a", "-", "NULL", "None", etc.)
+- Missingness analysis: per-column counts, percentages, and patterns
+- Visualize missing data heatmap
+- Imputation strategies: drop rows/columns, fill with mean/median/mode, forward-fill, backward-fill
+- Custom sentinel value replacement
+- Before/after comparison
+""")
+
+st.divider()
+
+# ---------------------------------------------------------------------------
+# File upload (functional)
+# ---------------------------------------------------------------------------
+
+uploaded = st.file_uploader(
+    "Upload CSV or Excel file",
+    type=["csv", "tsv", "xlsx", "xls"],
+    help="Upload a file to preview. Processing is not yet available.",
+    key="missing_file_upload",
+)
+
+if uploaded is not None:
+    import pandas as pd
+    try:
+        if uploaded.name.endswith((".xlsx", ".xls")):
+            df = pd.read_excel(uploaded)
+        else:
+            df = pd.read_csv(uploaded)
+        st.subheader(f"Preview: {uploaded.name}")
+        st.caption(f"{len(df)} rows, {len(df.columns)} columns")
+        st.dataframe(df.head(10), use_container_width=True)
+    except Exception as e:
+        st.error(f"Failed to read file: {e}")
+
+# ---------------------------------------------------------------------------
+# Placeholder options
+# ---------------------------------------------------------------------------
+
+st.subheader("Detection Settings")
+
+st.text_input(
+    "Null patterns (comma-separated)",
+    value="N/A, n/a, NA, -, NULL, None, empty, .",
+    disabled=True,
+    help="Values to treat as missing.",
+)
+
+st.subheader("Handling Strategy")
+
+st.selectbox("Strategy", [
+    "Drop rows with any missing",
+    "Drop rows above threshold",
+    "Fill with mean (numeric)",
+    "Fill with median (numeric)",
+    "Fill with mode (categorical)",
+    "Forward-fill",
+    "Backward-fill",
+    "Custom value",
+], disabled=True)
+
+st.slider("Drop threshold (%)", 0, 100, 50, disabled=True, help="Drop rows missing more than this % of columns.")
+
+st.divider()
+st.button("Handle Missing Values", type="primary", use_container_width=True, disabled=True)
+
+# ---------------------------------------------------------------------------
+# Footer
+# ---------------------------------------------------------------------------
+
+st.divider()
+st.caption(
+    "Runs locally. Your data never leaves this computer. "
+    "| DataTools v3.0"
+)