feat: refactor GUI to multi-page Streamlit app with 9 tool pages

Convert single-page deduplicator into a multi-page suite. Home page shows tool card grid. Deduplicator extracted to its own page (fully working). 8 stub pages added for Text Cleaner, Format Standardizer, Missing Values, Column Mapper, Outlier Detector, Multi-File Merger, Validator & Reporter, and Pipeline Runner — each with functional file upload and coming-soon UI. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-29 01:16:12 +00:00
parent 9ec371a85f
commit f2fdc10af7
10 changed files with 1175 additions and 330 deletions
--- a/src/gui/app.py
+++ b/src/gui/app.py
@@ -1,4 +1,4 @@
-"""DataTools Deduplicator — Streamlit GUI.
+"""DataTools — Data Cleaning Mastery Suite.
 Launch:
    streamlit run src/gui/app.py
@@ -6,11 +6,9 @@ Launch:
 from __future__ import annotations
 import io
 import sys
 from pathlib import Path
 import pandas as pd
 import streamlit as st
 # Ensure project root is on sys.path so `src.core` imports work
@@ -18,24 +16,14 @@ _project_root = Path(__file__).resolve().parent.parent.parent
 if str(_project_root) not in sys.path:
    sys.path.insert(0, str(_project_root))
 from src.core.dedup import deduplicate, build_default_strategies, DeduplicationResult
 from src.core.io import read_file, list_sheets, detect_encoding, detect_delimiter
 from src.core.config import DeduplicationConfig
 from src.gui.components import (
    apply_review_decisions,
    config_panel,
    match_group_card,
    results_summary,
 )
 # ---------------------------------------------------------------------------
 # Page config
 # ---------------------------------------------------------------------------
 st.set_page_config(
-    page_title="DataTools Deduplicator",
+    page_title="DataTools — Data Cleaning Mastery",
-    page_icon="🔍",
+    page_icon="🧹",
    layout="wide",
 )
@@ -45,331 +33,101 @@ st.markdown(
    unsafe_allow_html=True,
 )
 # ---------------------------------------------------------------------------
 # Session state defaults
 # ---------------------------------------------------------------------------
 _DEFAULTS = {
    "df": None,
    "result": None,
    "review_decisions": {},
    "config": None,
    "file_name": "",
    "sheet_names": [],
    "detected_delimiter": ",",
 }
 for key, default in _DEFAULTS.items():
    if key not in st.session_state:
        st.session_state[key] = default
 # ---------------------------------------------------------------------------
-# Header
+# Home page
 # ---------------------------------------------------------------------------
-st.title("DataTools Deduplicator")
+st.title("🧹 DataTools — Data Cleaning Mastery")
-st.caption("Find and remove duplicate rows in CSV, delimited text, and Excel files.")
+st.caption("A 9-tool suite for cleaning, standardizing, and validating tabular data. Runs 100% locally.")
 st.divider()
 # ---------------------------------------------------------------------------
-# File upload
+# Tool cards
 # ---------------------------------------------------------------------------
-uploaded = st.file_uploader(
+TOOLS = [
-    "Upload CSV or Excel file",
+    {
-    type=["csv", "tsv", "xlsx", "xls"],
+        "icon": "🔍",
-    help="Supports CSV, TSV, and Excel files. Encoding and delimiters are auto-detected.",
+        "name": "Deduplicator",
-)
+        "description": "Fuzzy matching, normalization, survivor selection, and interactive review.",
        "status": "Ready",
        "page": "1_Deduplicator",
    },
    {
        "icon": "✂️",
        "name": "Text Cleaner",
        "description": "Whitespace trim, multi-space collapse, Unicode normalization, BOM and line-ending handling.",
        "status": "Coming Soon",
        "page": "2_Text_Cleaner",
    },
    {
        "icon": "📐",
        "name": "Format Standardizer",
        "description": "Standardize dates, currencies, names, phone numbers, and addresses.",
        "status": "Coming Soon",
        "page": "3_Format_Standardizer",
    },
    {
        "icon": "🕳️",
        "name": "Missing Value Handler",
        "description": "Detect disguised nulls, missingness analysis, and imputation strategies.",
        "status": "Coming Soon",
        "page": "4_Missing_Values",
    },
    {
        "icon": "🗂️",
        "name": "Column Mapper",
        "description": "Rename columns, enforce a target schema, and coerce types.",
        "status": "Coming Soon",
        "page": "5_Column_Mapper",
    },
    {
        "icon": "📊",
        "name": "Outlier Detector",
        "description": "Z-score, IQR, and MAD detection with domain-rule violations and winsorization.",
        "status": "Coming Soon",
        "page": "6_Outlier_Detector",
    },
    {
        "icon": "📎",
        "name": "Multi-File Merger",
        "description": "Combine multiple CSV/Excel files with schema alignment.",
        "status": "Coming Soon",
        "page": "7_Multi_File_Merger",
    },
    {
        "icon": "✅",
        "name": "Validator & Reporter",
        "description": "Validate against rules and generate PDF/Excel quality reports.",
        "status": "Coming Soon",
        "page": "8_Validator_Reporter",
    },
    {
        "icon": "⚙️",
        "name": "Pipeline Runner",
        "description": "Chain tools in recommended order and pass output between steps.",
        "status": "Coming Soon",
        "page": "9_Pipeline_Runner",
    },
 ]
-if uploaded is not None:
+# Render tool cards in a 3-column grid
-    # Detect if file changed
+for row_start in range(0, len(TOOLS), 3):
-    if uploaded.name != st.session_state["file_name"]:
+    cols = st.columns(3)
-        st.session_state["file_name"] = uploaded.name
+    for i, col in enumerate(cols):
-        st.session_state["result"] = None
+        idx = row_start + i
-        st.session_state["review_decisions"] = {}
+        if idx >= len(TOOLS):
-
+            break
-        # Read the file
+        tool = TOOLS[idx]
-        try:
+        with col:
-            # Write to a temp file for read_file() which needs a path
+            status_color = "green" if tool["status"] == "Ready" else "orange"
-            import tempfile
+            st.markdown(
-            suffix = Path(uploaded.name).suffix
+                f"### {tool['icon']} {tool['name']}\n\n"
-            with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
+                f"{tool['description']}\n\n"
-                tmp.write(uploaded.getvalue())
+                f":{status_color}[**{tool['status']}**]"
                tmp_path = Path(tmp.name)
            # Check for Excel sheets / detect delimiter
            if suffix.lower() in (".xlsx", ".xls"):
                st.session_state["sheet_names"] = list_sheets(tmp_path)
                st.session_state["detected_delimiter"] = ","
            else:
                st.session_state["sheet_names"] = []
                enc = detect_encoding(tmp_path)
                st.session_state["detected_delimiter"] = detect_delimiter(tmp_path, enc)
            df = read_file(tmp_path)
            if not isinstance(df, pd.DataFrame):
                df = pd.concat(list(df), ignore_index=True)
            st.session_state["df"] = df
            # Clean up temp file
            tmp_path.unlink(missing_ok=True)
        except Exception as e:
            st.error(f"Failed to read file: {e}")
            st.session_state["df"] = None
    df = st.session_state["df"]
    if df is not None:
        # Sheet selector for Excel files
        if st.session_state["sheet_names"] and len(st.session_state["sheet_names"]) > 1:
            sheet = st.selectbox(
                "Select sheet",
                st.session_state["sheet_names"],
            )
            if sheet != st.session_state.get("_current_sheet"):
                st.session_state["_current_sheet"] = sheet
                suffix = Path(uploaded.name).suffix
                import tempfile
                with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
                    tmp.write(uploaded.getvalue())
                    tmp_path = Path(tmp.name)
                df = read_file(tmp_path, sheet_name=sheet)
                if not isinstance(df, pd.DataFrame):
                    df = pd.concat(list(df), ignore_index=True)
                st.session_state["df"] = df
                st.session_state["result"] = None
                st.session_state["review_decisions"] = {}
                tmp_path.unlink(missing_ok=True)
        # Delimiter selector for CSV/TSV files
        is_csv = Path(uploaded.name).suffix.lower() not in (".xlsx", ".xls")
        if is_csv:
            _DELIMITERS = {
                "Comma (,)": ",",
                "Tab (\\t)": "\t",
                "Semicolon (;)": ";",
                "Pipe (|)": "|",
                "Other": None,
            }
            _DELIM_LABELS = list(_DELIMITERS.keys())
            _DELIM_VALUES = list(_DELIMITERS.values())
            detected = st.session_state.get("detected_delimiter", ",")
            default_idx = _DELIM_VALUES.index(detected) if detected in _DELIM_VALUES else 0
            chosen_label = st.selectbox(
                "Delimiter",
                _DELIM_LABELS,
                index=default_idx,
                help="Auto-detected on upload. Change if the preview looks wrong.",
            )
            if chosen_label == "Other":
                custom_delim = st.text_input(
                    "Enter delimiter character",
                    max_chars=5,
                    help="Enter the character(s) used to separate fields.",
                )
                chosen_delim = custom_delim if custom_delim else ","
            else:
                chosen_delim = _DELIMITERS[chosen_label]
            if chosen_delim != st.session_state.get("_current_delimiter"):
                st.session_state["_current_delimiter"] = chosen_delim
                import tempfile
                suffix = Path(uploaded.name).suffix
                with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
                    tmp.write(uploaded.getvalue())
                    tmp_path = Path(tmp.name)
                df = read_file(tmp_path, delimiter=chosen_delim)
                if not isinstance(df, pd.DataFrame):
                    df = pd.concat(list(df), ignore_index=True)
                st.session_state["df"] = df
                st.session_state["result"] = None
                st.session_state["review_decisions"] = {}
                tmp_path.unlink(missing_ok=True)
        # Preview
        st.subheader(f"Preview: {uploaded.name}")
        st.caption(f"{len(df)} rows, {len(df.columns)} columns")
        st.dataframe(df.head(10), use_container_width=True)
        # Advanced options
        settings = config_panel(df)
        # Apply loaded config if present
        loaded_cfg = st.session_state.get("loaded_config")
        if loaded_cfg is not None:
            settings["strategies"] = loaded_cfg.to_strategies()
            settings["survivor_rule"] = loaded_cfg.to_survivor_rule()
            settings["date_column"] = loaded_cfg.date_column
            settings["merge"] = loaded_cfg.merge
            # Clear so it doesn't override on every rerun
            del st.session_state["loaded_config"]
        # ---------------------------------------------------------------------------
        # Find Duplicates button
        # ---------------------------------------------------------------------------
        st.divider()
        if st.button("Find Duplicates", type="primary", use_container_width=True):
            progress_bar = st.progress(0, text="Comparing rows...")
            def _gui_progress(current: int, total: int) -> None:
                if total > 0:
                    pct = min(current / total, 1.0)
                    progress_bar.progress(pct, text=f"Comparing rows... {current:,}/{total:,}")
            with st.spinner("Running deduplication..."):
                result = deduplicate(
                    df,
                    strategies=settings["strategies"],
                    survivor_rule=settings["survivor_rule"],
                    date_column=settings["date_column"],
                    merge=settings["merge"],
                    preview=False,
                    progress_callback=_gui_progress,
                )
            progress_bar.empty()
            st.session_state["result"] = result
            st.session_state["review_decisions"] = {}
        # ---------------------------------------------------------------------------
        # Results
        # ---------------------------------------------------------------------------
        result: DeduplicationResult | None = st.session_state["result"]
        if result is not None:
            st.divider()
            st.subheader("Results")
            # Summary + download buttons
            results_summary(result, df)
            # Match group review
            if result.match_groups:
                st.divider()
                st.subheader("Match Groups")
                # Batch actions
                def _accept_all():
                    for g in result.match_groups:
                        st.session_state["review_decisions"][g.group_id] = {
                            "keep_indices": [g.survivor_index],
                            "overrides": {},
                        }
                def _reject_all():
                    for g in result.match_groups:
                        st.session_state["review_decisions"][g.group_id] = {
                            "keep_indices": list(g.row_indices),
                            "overrides": {},
                        }
                def _clear_all():
                    st.session_state["review_decisions"] = {}
                    for k in list(st.session_state):
                        if k.startswith("editor_"):
                            del st.session_state[k]
                action_left, action_mid, action_right = st.columns(3)
                with action_left:
                    st.button("Accept All", on_click=_accept_all)
                with action_mid:
                    st.button("Reject All", on_click=_reject_all)
                with action_right:
                    st.button("Clear Decisions", on_click=_clear_all)
                # Individual group cards
                decisions = st.session_state["review_decisions"]
                for i, group in enumerate(result.match_groups):
                    match_group_card(group, df, group_num=i + 1)
                # Show decision summary
                if decisions:
                    st.divider()
                    merged = 0
                    customized = 0
                    split = 0
                    kept_all = 0
                    for v in decisions.values():
                        if not isinstance(v, dict):
                            continue
                        ki = v.get("keep_indices", [])
                        # Find the matching group size
                        gid_for_v = next(
                            (gid for gid, d in decisions.items() if d is v),
                            None,
                        )
                        group_size = next(
                            (len(g.row_indices) for g in result.match_groups
                             if g.group_id == gid_for_v),
                            0,
                        )
                        if len(ki) == group_size:
                            kept_all += 1
                        elif len(ki) == 1:
                            if v.get("overrides"):
                                customized += 1
                            else:
                                merged += 1
                        else:
                            split += 1
                    pending = len(result.match_groups) - len(decisions)
                    parts = []
                    if merged:
                        parts.append(f"{merged} merged")
                    if customized:
                        parts.append(f"{customized} customized")
                    if split:
                        parts.append(f"{split} split")
                    if kept_all:
                        parts.append(f"{kept_all} kept all")
                    parts.append(f"{pending} pending")
                    st.caption("Decisions: " + ", ".join(parts))
                    # Apply decisions and offer download
                    if st.button(
                        "Apply Review Decisions & Download",
                        type="primary",
                        use_container_width=True,
                    ):
                        reviewed_df, reviewed_removed = apply_review_decisions(
                            df, result.match_groups, decisions,
                        )
                        csv_bytes = reviewed_df.to_csv(
                            index=False
                        ).encode("utf-8-sig")
                        st.download_button(
                            "Download Reviewed & Deduplicated CSV",
                            data=csv_bytes,
                            file_name="deduplicated_reviewed.csv",
                            mime="text/csv",
                            key="reviewed_download",
                        )
                        if not reviewed_removed.empty:
                            removed_bytes = reviewed_removed.to_csv(
                                index=False
                            ).encode("utf-8-sig")
                            st.download_button(
                                "Download Reviewed Removed Rows",
                                data=removed_bytes,
                                file_name="removed_reviewed.csv",
                                mime="text/csv",
                                key="reviewed_removed_download",
                            )
            # Log entries
            if result.log_entries:
                with st.expander("Processing Log"):
                    st.code("\n".join(result.log_entries))
 else:
    # No file uploaded — show placeholder
    st.info("Upload a file to get started.")
 # ---------------------------------------------------------------------------
@@ -379,5 +137,5 @@ else:
 st.divider()
 st.caption(
    "Runs locally. Your data never leaves this computer. "
-    "| DataTools Deduplicator v3.0"
+    "| DataTools v3.0"
 )
--- a/src/gui/pages/1_Deduplicator.py
+++ b/src/gui/pages/1_Deduplicator.py
@@ -0,0 +1,355 @@
 """DataTools Deduplicator — full working tool page."""
 from __future__ import annotations
 import sys
 import tempfile
 from pathlib import Path
 import pandas as pd
 import streamlit as st
 # Ensure project root is on sys.path so `src.core` imports work
 _project_root = Path(__file__).resolve().parent.parent.parent.parent
 if str(_project_root) not in sys.path:
    sys.path.insert(0, str(_project_root))
 from src.core.dedup import deduplicate, DeduplicationResult
 from src.core.io import read_file, list_sheets, detect_encoding, detect_delimiter
 from src.gui.components import (
    apply_review_decisions,
    config_panel,
    match_group_card,
    results_summary,
 )
 # ---------------------------------------------------------------------------
 # Session state defaults
 # ---------------------------------------------------------------------------
 _DEFAULTS = {
    "df": None,
    "result": None,
    "review_decisions": {},
    "config": None,
    "file_name": "",
    "sheet_names": [],
    "detected_delimiter": ",",
 }
 for key, default in _DEFAULTS.items():
    if key not in st.session_state:
        st.session_state[key] = default
 # ---------------------------------------------------------------------------
 # Header
 # ---------------------------------------------------------------------------
 st.title("🔍 Deduplicator")
 st.caption("Find and remove duplicate rows in CSV, delimited text, and Excel files.")
 # ---------------------------------------------------------------------------
 # File upload
 # ---------------------------------------------------------------------------
 uploaded = st.file_uploader(
    "Upload CSV or Excel file",
    type=["csv", "tsv", "xlsx", "xls"],
    help="Supports CSV, TSV, and Excel files. Encoding and delimiters are auto-detected.",
    key="dedup_file_upload",
 )
 if uploaded is not None:
    # Detect if file changed
    if uploaded.name != st.session_state["file_name"]:
        st.session_state["file_name"] = uploaded.name
        st.session_state["result"] = None
        st.session_state["review_decisions"] = {}
        # Read the file
        try:
            suffix = Path(uploaded.name).suffix
            with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
                tmp.write(uploaded.getvalue())
                tmp_path = Path(tmp.name)
            # Check for Excel sheets / detect delimiter
            if suffix.lower() in (".xlsx", ".xls"):
                st.session_state["sheet_names"] = list_sheets(tmp_path)
                st.session_state["detected_delimiter"] = ","
            else:
                st.session_state["sheet_names"] = []
                enc = detect_encoding(tmp_path)
                st.session_state["detected_delimiter"] = detect_delimiter(tmp_path, enc)
            df = read_file(tmp_path)
            if not isinstance(df, pd.DataFrame):
                df = pd.concat(list(df), ignore_index=True)
            st.session_state["df"] = df
            tmp_path.unlink(missing_ok=True)
        except Exception as e:
            st.error(f"Failed to read file: {e}")
            st.session_state["df"] = None
    df = st.session_state["df"]
    if df is not None:
        # Sheet selector for Excel files
        if st.session_state["sheet_names"] and len(st.session_state["sheet_names"]) > 1:
            sheet = st.selectbox(
                "Select sheet",
                st.session_state["sheet_names"],
            )
            if sheet != st.session_state.get("_current_sheet"):
                st.session_state["_current_sheet"] = sheet
                suffix = Path(uploaded.name).suffix
                with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
                    tmp.write(uploaded.getvalue())
                    tmp_path = Path(tmp.name)
                df = read_file(tmp_path, sheet_name=sheet)
                if not isinstance(df, pd.DataFrame):
                    df = pd.concat(list(df), ignore_index=True)
                st.session_state["df"] = df
                st.session_state["result"] = None
                st.session_state["review_decisions"] = {}
                tmp_path.unlink(missing_ok=True)
        # Delimiter selector for CSV/TSV files
        is_csv = Path(uploaded.name).suffix.lower() not in (".xlsx", ".xls")
        if is_csv:
            _DELIMITERS = {
                "Comma (,)": ",",
                "Tab (\\t)": "\t",
                "Semicolon (;)": ";",
                "Pipe (|)": "|",
                "Other": None,
            }
            _DELIM_LABELS = list(_DELIMITERS.keys())
            _DELIM_VALUES = list(_DELIMITERS.values())
            detected = st.session_state.get("detected_delimiter", ",")
            default_idx = _DELIM_VALUES.index(detected) if detected in _DELIM_VALUES else 0
            chosen_label = st.selectbox(
                "Delimiter",
                _DELIM_LABELS,
                index=default_idx,
                help="Auto-detected on upload. Change if the preview looks wrong.",
            )
            if chosen_label == "Other":
                custom_delim = st.text_input(
                    "Enter delimiter character",
                    max_chars=5,
                    help="Enter the character(s) used to separate fields.",
                )
                chosen_delim = custom_delim if custom_delim else ","
            else:
                chosen_delim = _DELIMITERS[chosen_label]
            if chosen_delim != st.session_state.get("_current_delimiter"):
                st.session_state["_current_delimiter"] = chosen_delim
                suffix = Path(uploaded.name).suffix
                with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
                    tmp.write(uploaded.getvalue())
                    tmp_path = Path(tmp.name)
                df = read_file(tmp_path, delimiter=chosen_delim)
                if not isinstance(df, pd.DataFrame):
                    df = pd.concat(list(df), ignore_index=True)
                st.session_state["df"] = df
                st.session_state["result"] = None
                st.session_state["review_decisions"] = {}
                tmp_path.unlink(missing_ok=True)
        # Preview
        st.subheader(f"Preview: {uploaded.name}")
        st.caption(f"{len(df)} rows, {len(df.columns)} columns")
        st.dataframe(df.head(10), use_container_width=True)
        # Advanced options
        settings = config_panel(df)
        # Apply loaded config if present
        loaded_cfg = st.session_state.get("loaded_config")
        if loaded_cfg is not None:
            settings["strategies"] = loaded_cfg.to_strategies()
            settings["survivor_rule"] = loaded_cfg.to_survivor_rule()
            settings["date_column"] = loaded_cfg.date_column
            settings["merge"] = loaded_cfg.merge
            del st.session_state["loaded_config"]
        # -------------------------------------------------------------------
        # Find Duplicates button
        # -------------------------------------------------------------------
        st.divider()
        if st.button("Find Duplicates", type="primary", use_container_width=True):
            progress_bar = st.progress(0, text="Comparing rows...")
            def _gui_progress(current: int, total: int) -> None:
                if total > 0:
                    pct = min(current / total, 1.0)
                    progress_bar.progress(pct, text=f"Comparing rows... {current:,}/{total:,}")
            with st.spinner("Running deduplication..."):
                result = deduplicate(
                    df,
                    strategies=settings["strategies"],
                    survivor_rule=settings["survivor_rule"],
                    date_column=settings["date_column"],
                    merge=settings["merge"],
                    preview=False,
                    progress_callback=_gui_progress,
                )
            progress_bar.empty()
            st.session_state["result"] = result
            st.session_state["review_decisions"] = {}
        # -------------------------------------------------------------------
        # Results
        # -------------------------------------------------------------------
        result: DeduplicationResult | None = st.session_state["result"]
        if result is not None:
            st.divider()
            st.subheader("Results")
            # Summary + download buttons
            results_summary(result, df)
            # Match group review
            if result.match_groups:
                st.divider()
                st.subheader("Match Groups")
                # Batch actions
                def _accept_all():
                    for g in result.match_groups:
                        st.session_state["review_decisions"][g.group_id] = {
                            "keep_indices": [g.survivor_index],
                            "overrides": {},
                        }
                def _reject_all():
                    for g in result.match_groups:
                        st.session_state["review_decisions"][g.group_id] = {
                            "keep_indices": list(g.row_indices),
                            "overrides": {},
                        }
                def _clear_all():
                    st.session_state["review_decisions"] = {}
                    for k in list(st.session_state):
                        if k.startswith("editor_"):
                            del st.session_state[k]
                action_left, action_mid, action_right = st.columns(3)
                with action_left:
                    st.button("Accept All", on_click=_accept_all)
                with action_mid:
                    st.button("Reject All", on_click=_reject_all)
                with action_right:
                    st.button("Clear Decisions", on_click=_clear_all)
                # Individual group cards
                decisions = st.session_state["review_decisions"]
                for i, group in enumerate(result.match_groups):
                    match_group_card(group, df, group_num=i + 1)
                # Show decision summary
                if decisions:
                    st.divider()
                    merged = 0
                    customized = 0
                    split = 0
                    kept_all = 0
                    for v in decisions.values():
                        if not isinstance(v, dict):
                            continue
                        ki = v.get("keep_indices", [])
                        gid_for_v = next(
                            (gid for gid, d in decisions.items() if d is v),
                            None,
                        )
                        group_size = next(
                            (len(g.row_indices) for g in result.match_groups
                             if g.group_id == gid_for_v),
                            0,
                        )
                        if len(ki) == group_size:
                            kept_all += 1
                        elif len(ki) == 1:
                            if v.get("overrides"):
                                customized += 1
                            else:
                                merged += 1
                        else:
                            split += 1
                    pending = len(result.match_groups) - len(decisions)
                    parts = []
                    if merged:
                        parts.append(f"{merged} merged")
                    if customized:
                        parts.append(f"{customized} customized")
                    if split:
                        parts.append(f"{split} split")
                    if kept_all:
                        parts.append(f"{kept_all} kept all")
                    parts.append(f"{pending} pending")
                    st.caption("Decisions: " + ", ".join(parts))
                    # Apply decisions and offer download
                    if st.button(
                        "Apply Review Decisions & Download",
                        type="primary",
                        use_container_width=True,
                    ):
                        reviewed_df, reviewed_removed = apply_review_decisions(
                            df, result.match_groups, decisions,
                        )
                        csv_bytes = reviewed_df.to_csv(
                            index=False
                        ).encode("utf-8-sig")
                        st.download_button(
                            "Download Reviewed & Deduplicated CSV",
                            data=csv_bytes,
                            file_name="deduplicated_reviewed.csv",
                            mime="text/csv",
                            key="reviewed_download",
                        )
                        if not reviewed_removed.empty:
                            removed_bytes = reviewed_removed.to_csv(
                                index=False
                            ).encode("utf-8-sig")
                            st.download_button(
                                "Download Reviewed Removed Rows",
                                data=removed_bytes,
                                file_name="removed_reviewed.csv",
                                mime="text/csv",
                                key="reviewed_removed_download",
                            )
            # Log entries
            if result.log_entries:
                with st.expander("Processing Log"):
                    st.code("\n".join(result.log_entries))
 else:
    # No file uploaded — show placeholder
    st.info("Upload a file to get started.")
 # ---------------------------------------------------------------------------
 # Footer
 # ---------------------------------------------------------------------------
 st.divider()
 st.caption(
    "Runs locally. Your data never leaves this computer. "
    "| DataTools Deduplicator v3.0"
 )
--- a/src/gui/pages/2_Text_Cleaner.py
+++ b/src/gui/pages/2_Text_Cleaner.py
@@ -0,0 +1,89 @@
 """DataTools Text Cleaner — stub page."""
 from __future__ import annotations
 import sys
 from pathlib import Path
 import streamlit as st
 _project_root = Path(__file__).resolve().parent.parent.parent.parent
 if str(_project_root) not in sys.path:
    sys.path.insert(0, str(_project_root))
 # ---------------------------------------------------------------------------
 # Header
 # ---------------------------------------------------------------------------
 st.title("✂️ Text Cleaner")
 st.caption("Clean and normalize text content across your data.")
 st.info("This tool is under development.")
 # ---------------------------------------------------------------------------
 # What this tool will do
 # ---------------------------------------------------------------------------
 st.markdown("""
 **Features:**
 - Trim leading/trailing whitespace
 - Collapse multiple spaces into one
 - Unicode normalization (NFC/NFKC)
 - Strip non-printable / control characters
 - Remove BOM (byte order mark)
 - Normalize line endings (CRLF → LF)
 - Case conversion (upper, lower, title, sentence)
 """)
 st.divider()
 # ---------------------------------------------------------------------------
 # File upload (functional)
 # ---------------------------------------------------------------------------
 uploaded = st.file_uploader(
    "Upload CSV or Excel file",
    type=["csv", "tsv", "xlsx", "xls"],
    help="Upload a file to preview. Processing is not yet available.",
    key="textclean_file_upload",
 )
 if uploaded is not None:
    import pandas as pd
    try:
        if uploaded.name.endswith((".xlsx", ".xls")):
            df = pd.read_excel(uploaded)
        else:
            df = pd.read_csv(uploaded)
        st.subheader(f"Preview: {uploaded.name}")
        st.caption(f"{len(df)} rows, {len(df.columns)} columns")
        st.dataframe(df.head(10), use_container_width=True)
    except Exception as e:
        st.error(f"Failed to read file: {e}")
 # ---------------------------------------------------------------------------
 # Placeholder options
 # ---------------------------------------------------------------------------
 st.subheader("Operations")
 st.checkbox("Trim whitespace", value=True, disabled=True)
 st.checkbox("Collapse multiple spaces", value=True, disabled=True)
 st.checkbox("Unicode normalization (NFC)", value=False, disabled=True)
 st.checkbox("Strip non-printable characters", value=False, disabled=True)
 st.checkbox("Remove BOM", value=False, disabled=True)
 st.checkbox("Normalize line endings", value=False, disabled=True)
 st.selectbox("Case conversion", ["None", "UPPER", "lower", "Title Case", "Sentence case"], disabled=True)
 st.divider()
 st.button("Clean Text", type="primary", use_container_width=True, disabled=True)
 # ---------------------------------------------------------------------------
 # Footer
 # ---------------------------------------------------------------------------
 st.divider()
 st.caption(
    "Runs locally. Your data never leaves this computer. "
    "| DataTools v3.0"
 )
--- a/src/gui/pages/3_Format_Standardizer.py
+++ b/src/gui/pages/3_Format_Standardizer.py
@@ -0,0 +1,86 @@
 """DataTools Format Standardizer — stub page."""
 from __future__ import annotations
 import sys
 from pathlib import Path
 import streamlit as st
 _project_root = Path(__file__).resolve().parent.parent.parent.parent
 if str(_project_root) not in sys.path:
    sys.path.insert(0, str(_project_root))
 # ---------------------------------------------------------------------------
 # Header
 # ---------------------------------------------------------------------------
 st.title("📐 Format Standardizer")
 st.caption("Standardize formats across columns for consistency.")
 st.info("This tool is under development.")
 # ---------------------------------------------------------------------------
 # What this tool will do
 # ---------------------------------------------------------------------------
 st.markdown("""
 **Features:**
 - Date format standardization (e.g., MM/DD/YYYY → YYYY-MM-DD)
 - Phone number formatting (E.164, national, international)
 - Currency normalization ($1,000.00 → 1000.00)
 - Name casing (JOHN DOE → John Doe)
 - Address abbreviation expansion (St. → Street, Ave. → Avenue)
 - Boolean standardization (Yes/No/Y/N/1/0 → True/False)
 """)
 st.divider()
 # ---------------------------------------------------------------------------
 # File upload (functional)
 # ---------------------------------------------------------------------------
 uploaded = st.file_uploader(
    "Upload CSV or Excel file",
    type=["csv", "tsv", "xlsx", "xls"],
    help="Upload a file to preview. Processing is not yet available.",
    key="fmtstd_file_upload",
 )
 if uploaded is not None:
    import pandas as pd
    try:
        if uploaded.name.endswith((".xlsx", ".xls")):
            df = pd.read_excel(uploaded)
        else:
            df = pd.read_csv(uploaded)
        st.subheader(f"Preview: {uploaded.name}")
        st.caption(f"{len(df)} rows, {len(df.columns)} columns")
        st.dataframe(df.head(10), use_container_width=True)
    except Exception as e:
        st.error(f"Failed to read file: {e}")
 # ---------------------------------------------------------------------------
 # Placeholder options
 # ---------------------------------------------------------------------------
 st.subheader("Format Rules")
 st.selectbox("Date format", ["YYYY-MM-DD", "MM/DD/YYYY", "DD/MM/YYYY", "DD-Mon-YYYY"], disabled=True)
 st.selectbox("Phone format", ["E.164 (+15551234567)", "National ((555) 123-4567)", "Digits only"], disabled=True)
 st.selectbox("Currency handling", ["Strip symbols, keep number", "Normalize to 2 decimals", "Keep as-is"], disabled=True)
 st.selectbox("Name casing", ["Title Case", "UPPER", "lower", "As-is"], disabled=True)
 st.checkbox("Expand address abbreviations", value=False, disabled=True)
 st.divider()
 st.button("Standardize Formats", type="primary", use_container_width=True, disabled=True)
 # ---------------------------------------------------------------------------
 # Footer
 # ---------------------------------------------------------------------------
 st.divider()
 st.caption(
    "Runs locally. Your data never leaves this computer. "
    "| DataTools v3.0"
 )
--- a/src/gui/pages/4_Missing_Values.py
+++ b/src/gui/pages/4_Missing_Values.py
@@ -0,0 +1,102 @@
 """DataTools Missing Value Handler — stub page."""
 from __future__ import annotations
 import sys
 from pathlib import Path
 import streamlit as st
 _project_root = Path(__file__).resolve().parent.parent.parent.parent
 if str(_project_root) not in sys.path:
    sys.path.insert(0, str(_project_root))
 # ---------------------------------------------------------------------------
 # Header
 # ---------------------------------------------------------------------------
 st.title("🕳️ Missing Value Handler")
 st.caption("Detect, analyze, and handle missing values in your data.")
 st.info("This tool is under development.")
 # ---------------------------------------------------------------------------
 # What this tool will do
 # ---------------------------------------------------------------------------
 st.markdown("""
 **Features:**
 - Detect disguised nulls (empty strings, "N/A", "n/a", "-", "NULL", "None", etc.)
 - Missingness analysis: per-column counts, percentages, and patterns
 - Visualize missing data heatmap
 - Imputation strategies: drop rows/columns, fill with mean/median/mode, forward-fill, backward-fill
 - Custom sentinel value replacement
 - Before/after comparison
 """)
 st.divider()
 # ---------------------------------------------------------------------------
 # File upload (functional)
 # ---------------------------------------------------------------------------
 uploaded = st.file_uploader(
    "Upload CSV or Excel file",
    type=["csv", "tsv", "xlsx", "xls"],
    help="Upload a file to preview. Processing is not yet available.",
    key="missing_file_upload",
 )
 if uploaded is not None:
    import pandas as pd
    try:
        if uploaded.name.endswith((".xlsx", ".xls")):
            df = pd.read_excel(uploaded)
        else:
            df = pd.read_csv(uploaded)
        st.subheader(f"Preview: {uploaded.name}")
        st.caption(f"{len(df)} rows, {len(df.columns)} columns")
        st.dataframe(df.head(10), use_container_width=True)
    except Exception as e:
        st.error(f"Failed to read file: {e}")
 # ---------------------------------------------------------------------------
 # Placeholder options
 # ---------------------------------------------------------------------------
 st.subheader("Detection Settings")
 st.text_input(
    "Null patterns (comma-separated)",
    value="N/A, n/a, NA, -, NULL, None, empty, .",
    disabled=True,
    help="Values to treat as missing.",
 )
 st.subheader("Handling Strategy")
 st.selectbox("Strategy", [
    "Drop rows with any missing",
    "Drop rows above threshold",
    "Fill with mean (numeric)",
    "Fill with median (numeric)",
    "Fill with mode (categorical)",
    "Forward-fill",
    "Backward-fill",
    "Custom value",
 ], disabled=True)
 st.slider("Drop threshold (%)", 0, 100, 50, disabled=True, help="Drop rows missing more than this % of columns.")
 st.divider()
 st.button("Handle Missing Values", type="primary", use_container_width=True, disabled=True)
 # ---------------------------------------------------------------------------
 # Footer
 # ---------------------------------------------------------------------------
 st.divider()
 st.caption(
    "Runs locally. Your data never leaves this computer. "
    "| DataTools v3.0"
 )
--- a/src/gui/pages/5_Column_Mapper.py
+++ b/src/gui/pages/5_Column_Mapper.py
@@ -0,0 +1,93 @@
 """DataTools Column Mapper — stub page."""
 from __future__ import annotations
 import sys
 from pathlib import Path
 import streamlit as st
 _project_root = Path(__file__).resolve().parent.parent.parent.parent
 if str(_project_root) not in sys.path:
    sys.path.insert(0, str(_project_root))
 # ---------------------------------------------------------------------------
 # Header
 # ---------------------------------------------------------------------------
 st.title("🗂️ Column Mapper")
 st.caption("Rename columns, enforce a target schema, and coerce types.")
 st.info("This tool is under development.")
 # ---------------------------------------------------------------------------
 # What this tool will do
 # ---------------------------------------------------------------------------
 st.markdown("""
 **Features:**
 - Rename columns via interactive mapping table
 - Load a target schema (JSON/CSV) to auto-map columns
 - Fuzzy column name matching for automatic suggestions
 - Type coercion (string → int, string → date, etc.)
 - Drop unmapped columns or keep as-is
 - Reorder columns to match target schema
 """)
 st.divider()
 # ---------------------------------------------------------------------------
 # File upload (functional)
 # ---------------------------------------------------------------------------
 uploaded = st.file_uploader(
    "Upload CSV or Excel file",
    type=["csv", "tsv", "xlsx", "xls"],
    help="Upload a file to preview. Processing is not yet available.",
    key="colmap_file_upload",
 )
 if uploaded is not None:
    import pandas as pd
    try:
        if uploaded.name.endswith((".xlsx", ".xls")):
            df = pd.read_excel(uploaded)
        else:
            df = pd.read_csv(uploaded)
        st.subheader(f"Preview: {uploaded.name}")
        st.caption(f"{len(df)} rows, {len(df.columns)} columns")
        st.dataframe(df.head(10), use_container_width=True)
        st.subheader("Column Mapping")
        st.caption("Map source columns to target names. (Interactive mapping coming soon.)")
        mapping_data = pd.DataFrame({
            "Source Column": df.columns.tolist(),
            "Target Column": df.columns.tolist(),
            "Type": ["auto"] * len(df.columns),
        })
        st.dataframe(mapping_data, use_container_width=True, hide_index=True)
    except Exception as e:
        st.error(f"Failed to read file: {e}")
 # ---------------------------------------------------------------------------
 # Placeholder options
 # ---------------------------------------------------------------------------
 st.subheader("Schema Options")
 st.file_uploader("Load target schema (JSON)", type=["json"], disabled=True, key="colmap_schema")
 st.checkbox("Drop unmapped columns", value=False, disabled=True)
 st.checkbox("Reorder to match schema", value=True, disabled=True)
 st.divider()
 st.button("Apply Column Mapping", type="primary", use_container_width=True, disabled=True)
 # ---------------------------------------------------------------------------
 # Footer
 # ---------------------------------------------------------------------------
 st.divider()
 st.caption(
    "Runs locally. Your data never leaves this computer. "
    "| DataTools v3.0"
 )
--- a/src/gui/pages/6_Outlier_Detector.py
+++ b/src/gui/pages/6_Outlier_Detector.py
@@ -0,0 +1,88 @@
 """DataTools Outlier Detector — stub page."""
 from __future__ import annotations
 import sys
 from pathlib import Path
 import streamlit as st
 _project_root = Path(__file__).resolve().parent.parent.parent.parent
 if str(_project_root) not in sys.path:
    sys.path.insert(0, str(_project_root))
 # ---------------------------------------------------------------------------
 # Header
 # ---------------------------------------------------------------------------
 st.title("📊 Outlier Detector")
 st.caption("Detect and handle outliers in numeric columns.")
 st.info("This tool is under development.")
 # ---------------------------------------------------------------------------
 # What this tool will do
 # ---------------------------------------------------------------------------
 st.markdown("""
 **Features:**
 - Z-score detection (configurable threshold)
 - IQR (interquartile range) detection
 - MAD (median absolute deviation) detection
 - Domain-rule violations (e.g., age < 0, price > $1M)
 - Visual outlier highlighting in data preview
 - Handling: flag only, remove, cap/winsorize to bounds
 """)
 st.divider()
 # ---------------------------------------------------------------------------
 # File upload (functional)
 # ---------------------------------------------------------------------------
 uploaded = st.file_uploader(
    "Upload CSV or Excel file",
    type=["csv", "tsv", "xlsx", "xls"],
    help="Upload a file to preview. Processing is not yet available.",
    key="outlier_file_upload",
 )
 if uploaded is not None:
    import pandas as pd
    try:
        if uploaded.name.endswith((".xlsx", ".xls")):
            df = pd.read_excel(uploaded)
        else:
            df = pd.read_csv(uploaded)
        st.subheader(f"Preview: {uploaded.name}")
        st.caption(f"{len(df)} rows, {len(df.columns)} columns")
        st.dataframe(df.head(10), use_container_width=True)
    except Exception as e:
        st.error(f"Failed to read file: {e}")
 # ---------------------------------------------------------------------------
 # Placeholder options
 # ---------------------------------------------------------------------------
 st.subheader("Detection Method")
 st.selectbox("Method", ["Z-Score", "IQR (Interquartile Range)", "MAD (Median Absolute Deviation)"], disabled=True)
 st.slider("Z-Score threshold", 1.0, 5.0, 3.0, 0.1, disabled=True)
 st.slider("IQR multiplier", 1.0, 3.0, 1.5, 0.1, disabled=True)
 st.subheader("Handling")
 st.selectbox("Action", ["Flag only (add column)", "Remove outlier rows", "Cap / Winsorize to bounds"], disabled=True)
 st.divider()
 st.button("Detect Outliers", type="primary", use_container_width=True, disabled=True)
 # ---------------------------------------------------------------------------
 # Footer
 # ---------------------------------------------------------------------------
 st.divider()
 st.caption(
    "Runs locally. Your data never leaves this computer. "
    "| DataTools v3.0"
 )
--- a/src/gui/pages/7_Multi_File_Merger.py
+++ b/src/gui/pages/7_Multi_File_Merger.py
@@ -0,0 +1,86 @@
 """DataTools Multi-File Merger — stub page."""
 from __future__ import annotations
 import sys
 from pathlib import Path
 import streamlit as st
 _project_root = Path(__file__).resolve().parent.parent.parent.parent
 if str(_project_root) not in sys.path:
    sys.path.insert(0, str(_project_root))
 # ---------------------------------------------------------------------------
 # Header
 # ---------------------------------------------------------------------------
 st.title("📎 Multi-File Merger")
 st.caption("Combine multiple CSV and Excel files into one dataset.")
 st.info("This tool is under development.")
 # ---------------------------------------------------------------------------
 # What this tool will do
 # ---------------------------------------------------------------------------
 st.markdown("""
 **Features:**
 - Upload multiple CSV/Excel files at once
 - Automatic schema alignment (matching columns by name)
 - Append mode: stack files vertically (union)
 - Join mode: merge files on shared key columns
 - Handle mismatched columns (fill missing with nulls or drop)
 - Source file tracking column
 """)
 st.divider()
 # ---------------------------------------------------------------------------
 # Multi-file upload (functional)
 # ---------------------------------------------------------------------------
 uploaded_files = st.file_uploader(
    "Upload CSV or Excel files",
    type=["csv", "tsv", "xlsx", "xls"],
    accept_multiple_files=True,
    help="Upload multiple files to preview. Processing is not yet available.",
    key="merger_file_upload",
 )
 if uploaded_files:
    import pandas as pd
    for f in uploaded_files:
        try:
            if f.name.endswith((".xlsx", ".xls")):
                df = pd.read_excel(f)
            else:
                df = pd.read_csv(f)
            st.subheader(f"Preview: {f.name}")
            st.caption(f"{len(df)} rows, {len(df.columns)} columns — Columns: {', '.join(df.columns[:10])}{'...' if len(df.columns) > 10 else ''}")
            st.dataframe(df.head(5), use_container_width=True)
        except Exception as e:
            st.error(f"Failed to read {f.name}: {e}")
 # ---------------------------------------------------------------------------
 # Placeholder options
 # ---------------------------------------------------------------------------
 st.subheader("Merge Strategy")
 st.selectbox("Mode", ["Append (stack vertically)", "Join on key columns", "Schema alignment (smart merge)"], disabled=True)
 st.selectbox("Mismatched columns", ["Fill with null", "Drop non-shared columns", "Error"], disabled=True)
 st.checkbox("Add source filename column", value=True, disabled=True)
 st.divider()
 st.button("Merge Files", type="primary", use_container_width=True, disabled=True)
 # ---------------------------------------------------------------------------
 # Footer
 # ---------------------------------------------------------------------------
 st.divider()
 st.caption(
    "Runs locally. Your data never leaves this computer. "
    "| DataTools v3.0"
 )
--- a/src/gui/pages/8_Validator_Reporter.py
+++ b/src/gui/pages/8_Validator_Reporter.py
@@ -0,0 +1,93 @@
 """DataTools Validator & Reporter — stub page."""
 from __future__ import annotations
 import sys
 from pathlib import Path
 import streamlit as st
 _project_root = Path(__file__).resolve().parent.parent.parent.parent
 if str(_project_root) not in sys.path:
    sys.path.insert(0, str(_project_root))
 # ---------------------------------------------------------------------------
 # Header
 # ---------------------------------------------------------------------------
 st.title("✅ Validator & Reporter")
 st.caption("Validate data against rules and generate quality reports.")
 st.info("This tool is under development.")
 # ---------------------------------------------------------------------------
 # What this tool will do
 # ---------------------------------------------------------------------------
 st.markdown("""
 **Features:**
 - Column-level validation rules (not null, unique, regex pattern, range, enum)
 - Cross-column validation (e.g., start_date < end_date)
 - Data quality score per column and overall
 - Generate PDF quality report
 - Generate Excel report with flagged rows highlighted
 - Summary dashboard: pass/fail counts, severity breakdown
 """)
 st.divider()
 # ---------------------------------------------------------------------------
 # File upload (functional)
 # ---------------------------------------------------------------------------
 uploaded = st.file_uploader(
    "Upload CSV or Excel file",
    type=["csv", "tsv", "xlsx", "xls"],
    help="Upload a file to preview. Processing is not yet available.",
    key="validator_file_upload",
 )
 if uploaded is not None:
    import pandas as pd
    try:
        if uploaded.name.endswith((".xlsx", ".xls")):
            df = pd.read_excel(uploaded)
        else:
            df = pd.read_csv(uploaded)
        st.subheader(f"Preview: {uploaded.name}")
        st.caption(f"{len(df)} rows, {len(df.columns)} columns")
        st.dataframe(df.head(10), use_container_width=True)
    except Exception as e:
        st.error(f"Failed to read file: {e}")
 # ---------------------------------------------------------------------------
 # Placeholder options
 # ---------------------------------------------------------------------------
 st.subheader("Validation Rules")
 st.file_uploader("Load rules file (JSON)", type=["json"], disabled=True, key="validator_rules")
 st.multiselect("Quick checks", [
    "No null values",
    "No duplicate rows",
    "All emails valid",
    "All dates parseable",
    "Numeric columns in range",
 ], disabled=True)
 st.subheader("Report Format")
 st.selectbox("Output format", ["Excel (flagged rows)", "PDF summary", "Both"], disabled=True)
 st.divider()
 st.button("Validate & Generate Report", type="primary", use_container_width=True, disabled=True)
 # ---------------------------------------------------------------------------
 # Footer
 # ---------------------------------------------------------------------------
 st.divider()
 st.caption(
    "Runs locally. Your data never leaves this computer. "
    "| DataTools v3.0"
 )
--- a/src/gui/pages/9_Pipeline_Runner.py
+++ b/src/gui/pages/9_Pipeline_Runner.py
@@ -0,0 +1,95 @@
 """DataTools Pipeline Runner — stub page."""
 from __future__ import annotations
 import sys
 from pathlib import Path
 import streamlit as st
 _project_root = Path(__file__).resolve().parent.parent.parent.parent
 if str(_project_root) not in sys.path:
    sys.path.insert(0, str(_project_root))
 # ---------------------------------------------------------------------------
 # Header
 # ---------------------------------------------------------------------------
 st.title("⚙️ Pipeline Runner")
 st.caption("Chain tools in sequence and pass output between steps automatically.")
 st.info("This tool is under development.")
 # ---------------------------------------------------------------------------
 # What this tool will do
 # ---------------------------------------------------------------------------
 st.markdown("""
 **Features:**
 - Select tools to run in sequence
 - Recommended order: Text Cleaner → Format Standardizer → Missing Values → Deduplicator → Validator
 - Each step's output feeds into the next step's input
 - Per-step configuration overrides
 - Progress tracking across all steps
 - Final combined report
 """)
 st.divider()
 # ---------------------------------------------------------------------------
 # File upload (functional)
 # ---------------------------------------------------------------------------
 uploaded = st.file_uploader(
    "Upload CSV or Excel file",
    type=["csv", "tsv", "xlsx", "xls"],
    help="Upload a file to preview. Processing is not yet available.",
    key="pipeline_file_upload",
 )
 if uploaded is not None:
    import pandas as pd
    try:
        if uploaded.name.endswith((".xlsx", ".xls")):
            df = pd.read_excel(uploaded)
        else:
            df = pd.read_csv(uploaded)
        st.subheader(f"Preview: {uploaded.name}")
        st.caption(f"{len(df)} rows, {len(df.columns)} columns")
        st.dataframe(df.head(10), use_container_width=True)
    except Exception as e:
        st.error(f"Failed to read file: {e}")
 # ---------------------------------------------------------------------------
 # Pipeline steps (checklist)
 # ---------------------------------------------------------------------------
 st.subheader("Pipeline Steps")
 st.caption("Select tools to include in the pipeline (recommended order):")
 st.checkbox("1. Text Cleaner", value=True, disabled=True)
 st.checkbox("2. Format Standardizer", value=True, disabled=True)
 st.checkbox("3. Missing Value Handler", value=True, disabled=True)
 st.checkbox("4. Column Mapper", value=False, disabled=True)
 st.checkbox("5. Outlier Detector", value=False, disabled=True)
 st.checkbox("6. Deduplicator", value=True, disabled=True)
 st.checkbox("7. Multi-File Merger", value=False, disabled=True)
 st.checkbox("8. Validator & Reporter", value=True, disabled=True)
 st.subheader("Pipeline Configuration")
 st.selectbox("On error", ["Stop pipeline", "Skip step and continue", "Prompt for decision"], disabled=True)
 st.checkbox("Generate combined report at end", value=True, disabled=True)
 st.divider()
 st.button("Run Pipeline", type="primary", use_container_width=True, disabled=True)
 # ---------------------------------------------------------------------------
 # Footer
 # ---------------------------------------------------------------------------
 st.divider()
 st.caption(
    "Runs locally. Your data never leaves this computer. "
    "| DataTools v3.0"
 )