"""DataTools Pipeline Runner — stub page.""" from __future__ import annotations import sys from pathlib import Path import streamlit as st _project_root = Path(__file__).resolve().parent.parent.parent.parent if str(_project_root) not in sys.path: sys.path.insert(0, str(_project_root)) from src.gui.components import hide_streamlit_chrome, require_normalization_gate hide_streamlit_chrome() require_normalization_gate() # --------------------------------------------------------------------------- # Header # --------------------------------------------------------------------------- st.title("⚙️ Pipeline Runner") st.caption("Chain tools in sequence and pass output between steps automatically.") st.info("This tool is under development.") # --------------------------------------------------------------------------- # What this tool will do # --------------------------------------------------------------------------- st.markdown(""" **Features:** - Select tools to run in sequence - Recommended order: Text Cleaner → Format Standardizer → Missing Values → Deduplicator → Validator - Each step's output feeds into the next step's input - Per-step configuration overrides - Progress tracking across all steps - Final combined report """) st.divider() # --------------------------------------------------------------------------- # File upload (functional) # --------------------------------------------------------------------------- uploaded = st.file_uploader( "Upload CSV or Excel file", type=["csv", "tsv", "xlsx", "xls"], help="Upload a file to preview. Processing is not yet available.", key="pipeline_file_upload", ) if uploaded is not None: import pandas as pd try: if uploaded.name.endswith((".xlsx", ".xls")): df = pd.read_excel(uploaded) else: df = pd.read_csv(uploaded) st.subheader(f"Preview: {uploaded.name}") st.caption(f"{len(df)} rows, {len(df.columns)} columns") st.dataframe(df.head(10), use_container_width=True) except Exception as e: st.error(f"Failed to read file: {e}") # --------------------------------------------------------------------------- # Pipeline steps (checklist) # --------------------------------------------------------------------------- st.subheader("Pipeline Steps") st.caption("Select tools to include in the pipeline (recommended order):") st.checkbox("1. Text Cleaner", value=True, disabled=True) st.checkbox("2. Format Standardizer", value=True, disabled=True) st.checkbox("3. Missing Value Handler", value=True, disabled=True) st.checkbox("4. Column Mapper", value=False, disabled=True) st.checkbox("5. Outlier Detector", value=False, disabled=True) st.checkbox("6. Deduplicator", value=True, disabled=True) st.checkbox("7. Multi-File Merger", value=False, disabled=True) st.checkbox("8. Validator & Reporter", value=True, disabled=True) st.subheader("Pipeline Configuration") st.selectbox("On error", ["Stop pipeline", "Skip step and continue", "Prompt for decision"], disabled=True) st.checkbox("Generate combined report at end", value=True, disabled=True) st.divider() st.button("Run Pipeline", type="primary", use_container_width=True, disabled=True) # --------------------------------------------------------------------------- # Footer # --------------------------------------------------------------------------- st.divider() st.caption( "Runs locally. Your data never leaves this computer. " "| DataTools v3.0" )