feat: refactor GUI to multi-page Streamlit app with 9 tool pages

Convert single-page deduplicator into a multi-page suite. Home page shows
tool card grid. Deduplicator extracted to its own page (fully working).
8 stub pages added for Text Cleaner, Format Standardizer, Missing Values,
Column Mapper, Outlier Detector, Multi-File Merger, Validator & Reporter,
and Pipeline Runner — each with functional file upload and coming-soon UI.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-29 01:16:12 +00:00
parent 9ec371a85f
commit f2fdc10af7
10 changed files with 1175 additions and 330 deletions

View File

@@ -0,0 +1,86 @@
"""DataTools Format Standardizer — stub page."""
from __future__ import annotations
import sys
from pathlib import Path
import streamlit as st
_project_root = Path(__file__).resolve().parent.parent.parent.parent
if str(_project_root) not in sys.path:
sys.path.insert(0, str(_project_root))
# ---------------------------------------------------------------------------
# Header
# ---------------------------------------------------------------------------
st.title("📐 Format Standardizer")
st.caption("Standardize formats across columns for consistency.")
st.info("This tool is under development.")
# ---------------------------------------------------------------------------
# What this tool will do
# ---------------------------------------------------------------------------
st.markdown("""
**Features:**
- Date format standardization (e.g., MM/DD/YYYY → YYYY-MM-DD)
- Phone number formatting (E.164, national, international)
- Currency normalization ($1,000.00 → 1000.00)
- Name casing (JOHN DOE → John Doe)
- Address abbreviation expansion (St. → Street, Ave. → Avenue)
- Boolean standardization (Yes/No/Y/N/1/0 → True/False)
""")
st.divider()
# ---------------------------------------------------------------------------
# File upload (functional)
# ---------------------------------------------------------------------------
uploaded = st.file_uploader(
"Upload CSV or Excel file",
type=["csv", "tsv", "xlsx", "xls"],
help="Upload a file to preview. Processing is not yet available.",
key="fmtstd_file_upload",
)
if uploaded is not None:
import pandas as pd
try:
if uploaded.name.endswith((".xlsx", ".xls")):
df = pd.read_excel(uploaded)
else:
df = pd.read_csv(uploaded)
st.subheader(f"Preview: {uploaded.name}")
st.caption(f"{len(df)} rows, {len(df.columns)} columns")
st.dataframe(df.head(10), use_container_width=True)
except Exception as e:
st.error(f"Failed to read file: {e}")
# ---------------------------------------------------------------------------
# Placeholder options
# ---------------------------------------------------------------------------
st.subheader("Format Rules")
st.selectbox("Date format", ["YYYY-MM-DD", "MM/DD/YYYY", "DD/MM/YYYY", "DD-Mon-YYYY"], disabled=True)
st.selectbox("Phone format", ["E.164 (+15551234567)", "National ((555) 123-4567)", "Digits only"], disabled=True)
st.selectbox("Currency handling", ["Strip symbols, keep number", "Normalize to 2 decimals", "Keep as-is"], disabled=True)
st.selectbox("Name casing", ["Title Case", "UPPER", "lower", "As-is"], disabled=True)
st.checkbox("Expand address abbreviations", value=False, disabled=True)
st.divider()
st.button("Standardize Formats", type="primary", use_container_width=True, disabled=True)
# ---------------------------------------------------------------------------
# Footer
# ---------------------------------------------------------------------------
st.divider()
st.caption(
"Runs locally. Your data never leaves this computer. "
"| DataTools v3.0"
)