Files
datatools-dev/src/gui/app.py
Michael ff2eaeb6c4 feat(home): multi-file upload + per-file analysis, drop tool grid
Home is now upload + analysis only. The page accepts multiple files in
one go, analyzes each independently, and renders findings grouped by
filename in bordered containers. The 3-section tool-card grid is gone —
discovery happens via the sidebar now.

Mechanics:
- file_uploader uses accept_multiple_files=True. Each file's findings
  cache in session_state["home_findings_by_file"] keyed by filename so
  removing a file via Streamlit's "x" button drops its findings too,
  and re-clicking Run only re-analyzes pending files.
- The first uploaded file is mirrored into the singular
  home_uploaded_{name,bytes,size} keys so tool pages continue to pick
  up an "active" upload through pickup_or_upload — no tool-page changes.
- New i18n keys: upload.intro_multi, upload.uploader_label_multi,
  upload.clear_results, upload.empty_state. upload.heading text is
  updated to "Upload one or more files to start" (EN + ES).

Dropped tests pinning the tool grid:
- TestHomeToolGridLocalization (test_chrome.py)
- test_home_tool_card_uses_es_name (test_smoke.py)
- TestLiteHomeGridBadges (test_lite_tier.py — locked-card lock-badge
  assertions; locking is still enforced per-tool-page via
  require_feature_or_render_upgrade)

2009 tests pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-16 20:12:48 +00:00

212 lines
6.8 KiB
Python

"""DataTools — Data Cleaning Mastery Suite.
Launch:
streamlit run src/gui/app.py
This module is the navigation manager for the full GUI: it registers
every tool page with ``st.navigation`` so the sidebar can render
section headers ("Data Review", "Data Cleaners", "Transformations",
"Automations") instead of the flat numbered list Streamlit's
auto-page-discovery would produce. The Home page itself is registered
as a callable defined below so the entry script remains the single
file users invoke.
"""
from __future__ import annotations
import sys
from pathlib import Path
import streamlit as st
# Ensure project root is on sys.path so `src.core` imports work
_project_root = Path(__file__).resolve().parent.parent.parent
if str(_project_root) not in sys.path:
sys.path.insert(0, str(_project_root))
# ---------------------------------------------------------------------------
# Home page (rendered when the user selects the default nav entry)
# ---------------------------------------------------------------------------
def _home_page() -> None:
"""Render the home page — multi-file upload + per-file analysis."""
from src.gui.components import hide_streamlit_chrome, render_findings_panel
from src.gui.components._legacy import _run_analysis_on_upload
from src.i18n import t
st.set_page_config(
page_title=t("home.page_title"),
page_icon="🧹",
layout="wide",
)
hide_streamlit_chrome()
st.title(t("home.title"))
st.caption(t("home.caption"))
st.divider()
st.markdown(f"### {t('upload.heading')}")
st.caption(t("upload.intro_multi"))
uploaded_files = st.file_uploader(
t("upload.uploader_label_multi"),
type=["csv", "tsv", "xlsx", "xls"],
accept_multiple_files=True,
key="home_upload",
help=t("upload.uploader_help"),
)
if not uploaded_files:
st.info(t("upload.empty_state"))
return
# Keep tool pages working: they consume a single ``home_uploaded_*``
# set via ``pickup_or_upload``. Expose the first uploaded file as
# the "active" upload for that contract; the rest live alongside
# for per-file analysis on this page.
first = uploaded_files[0]
if (
st.session_state.get("home_uploaded_name") != first.name
or st.session_state.get("home_uploaded_size") != first.size
):
st.session_state["home_uploaded_name"] = first.name
st.session_state["home_uploaded_size"] = first.size
st.session_state["home_uploaded_bytes"] = first.getvalue()
# Per-file findings live in a dict so removing a file from the
# uploader (Streamlit's "x" button) drops its results too. We only
# re-analyze files we haven't already analyzed in this session.
findings_by_file: dict = st.session_state.setdefault(
"home_findings_by_file", {}
)
current_names = {f.name for f in uploaded_files}
findings_by_file = {
name: result for name, result in findings_by_file.items()
if name in current_names
}
st.session_state["home_findings_by_file"] = findings_by_file
pending = [f for f in uploaded_files if f.name not in findings_by_file]
col_run, col_clear, _ = st.columns([1, 1, 4])
with col_run:
run_clicked = st.button(
t("upload.run_button"),
type="primary",
key="home_run_analysis",
disabled=not pending,
use_container_width=True,
)
with col_clear:
clear_clicked = st.button(
t("upload.clear_results"),
key="home_clear_results",
disabled=not findings_by_file,
use_container_width=True,
)
if clear_clicked:
st.session_state["home_findings_by_file"] = {}
st.rerun()
if run_clicked:
progress = st.progress(0.0, text=t("upload.scanning"))
for i, f in enumerate(pending, start=1):
findings_by_file[f.name] = _run_analysis_on_upload(f)
progress.progress(i / len(pending), text=f"{f.name}")
st.session_state["home_findings_by_file"] = findings_by_file
progress.empty()
st.rerun()
if findings_by_file:
st.divider()
# Preserve uploader order so the user sees results in the same
# order they appear in the file list above.
for f in uploaded_files:
if f.name not in findings_by_file:
continue
findings = findings_by_file[f.name]
with st.container(border=True):
if not findings:
st.markdown(f"### 📄 {f.name}")
st.success(t("findings.none"))
else:
render_findings_panel(findings, header=f"📄 {f.name}")
st.divider()
st.caption(t("chrome.footer"))
# ---------------------------------------------------------------------------
# Navigation registration
# ---------------------------------------------------------------------------
#
# ``st.navigation`` overrides Streamlit's auto-discovery of the
# ``pages/`` directory, so every page we want in the sidebar must be
# listed here. The dict key becomes the section header rendered above
# the entries; an empty-string key suppresses the header so the Home
# entry sits at the top without a label above it.
from src.gui.tools_registry import TOOLS, section_label # noqa: E402
from src.i18n import t as _t # noqa: E402
def _page_for(tool_id: str, *, page_slug: str, icon: str, title: str) -> "st.Page":
return st.Page(
f"pages/{page_slug}.py",
title=title,
icon=icon,
url_path=tool_id,
)
def _build_navigation() -> dict[str, list]:
by_section: dict[str, list] = {
"cleaners": [],
"transformations": [],
"automations": [],
}
for tool in TOOLS:
by_section[tool.section].append(
_page_for(
tool.tool_id,
page_slug=tool.page_slug,
icon=tool.icon,
title=tool.name,
)
)
home = st.Page(
_home_page,
title=_t("nav.home_page_title") or "Home",
icon="🧹",
default=True,
url_path="home",
)
activate = st.Page(
"pages/_Activate.py",
title=_t("nav.activate_title") or "Activate",
icon="🔑",
url_path="activate",
)
close = st.Page(
"pages/99_Close.py",
title=_t("nav.close_title") or "Close",
icon="🛑",
url_path="close",
)
account_header = _t("nav.section_account") or "Account"
return {
"": [home],
section_label("cleaners"): by_section["cleaners"],
section_label("transformations"): by_section["transformations"],
section_label("automations"): by_section["automations"],
account_header: [activate, close],
}
pg = st.navigation(_build_navigation())
pg.run()