feat: refactor GUI to multi-page Streamlit app with 9 tool pages

Convert single-page deduplicator into a multi-page suite. Home page shows tool card grid. Deduplicator extracted to its own page (fully working). 8 stub pages added for Text Cleaner, Format Standardizer, Missing Values, Column Mapper, Outlier Detector, Multi-File Merger, Validator & Reporter, and Pipeline Runner — each with functional file upload and coming-soon UI. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-29 01:16:12 +00:00
parent 9ec371a85f
commit f2fdc10af7
10 changed files with 1175 additions and 330 deletions
--- a/src/gui/pages/9_Pipeline_Runner.py
+++ b/src/gui/pages/9_Pipeline_Runner.py
@@ -0,0 +1,95 @@
+"""DataTools Pipeline Runner — stub page."""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+import streamlit as st
+
+_project_root = Path(__file__).resolve().parent.parent.parent.parent
+if str(_project_root) not in sys.path:
+    sys.path.insert(0, str(_project_root))
+
+# ---------------------------------------------------------------------------
+# Header
+# ---------------------------------------------------------------------------
+
+st.title("⚙️ Pipeline Runner")
+st.caption("Chain tools in sequence and pass output between steps automatically.")
+
+st.info("This tool is under development.")
+
+# ---------------------------------------------------------------------------
+# What this tool will do
+# ---------------------------------------------------------------------------
+
+st.markdown("""
+**Features:**
+- Select tools to run in sequence
+- Recommended order: Text Cleaner → Format Standardizer → Missing Values → Deduplicator → Validator
+- Each step's output feeds into the next step's input
+- Per-step configuration overrides
+- Progress tracking across all steps
+- Final combined report
+""")
+
+st.divider()
+
+# ---------------------------------------------------------------------------
+# File upload (functional)
+# ---------------------------------------------------------------------------
+
+uploaded = st.file_uploader(
+    "Upload CSV or Excel file",
+    type=["csv", "tsv", "xlsx", "xls"],
+    help="Upload a file to preview. Processing is not yet available.",
+    key="pipeline_file_upload",
+)
+
+if uploaded is not None:
+    import pandas as pd
+    try:
+        if uploaded.name.endswith((".xlsx", ".xls")):
+            df = pd.read_excel(uploaded)
+        else:
+            df = pd.read_csv(uploaded)
+        st.subheader(f"Preview: {uploaded.name}")
+        st.caption(f"{len(df)} rows, {len(df.columns)} columns")
+        st.dataframe(df.head(10), use_container_width=True)
+    except Exception as e:
+        st.error(f"Failed to read file: {e}")
+
+# ---------------------------------------------------------------------------
+# Pipeline steps (checklist)
+# ---------------------------------------------------------------------------
+
+st.subheader("Pipeline Steps")
+st.caption("Select tools to include in the pipeline (recommended order):")
+
+st.checkbox("1. Text Cleaner", value=True, disabled=True)
+st.checkbox("2. Format Standardizer", value=True, disabled=True)
+st.checkbox("3. Missing Value Handler", value=True, disabled=True)
+st.checkbox("4. Column Mapper", value=False, disabled=True)
+st.checkbox("5. Outlier Detector", value=False, disabled=True)
+st.checkbox("6. Deduplicator", value=True, disabled=True)
+st.checkbox("7. Multi-File Merger", value=False, disabled=True)
+st.checkbox("8. Validator & Reporter", value=True, disabled=True)
+
+st.subheader("Pipeline Configuration")
+
+st.selectbox("On error", ["Stop pipeline", "Skip step and continue", "Prompt for decision"], disabled=True)
+st.checkbox("Generate combined report at end", value=True, disabled=True)
+
+st.divider()
+st.button("Run Pipeline", type="primary", use_container_width=True, disabled=True)
+
+# ---------------------------------------------------------------------------
+# Footer
+# ---------------------------------------------------------------------------
+
+st.divider()
+st.caption(
+    "Runs locally. Your data never leaves this computer. "
+    "| DataTools v3.0"
+)