feat(gui): add the one-click "Clean these files for me" front door
Issue #1 (the make-or-break UX fix): after the analyzer runs, Home now leads with a primary "Clean these files for me" CTA that runs the recommended pipeline (Clean Text -> Standardize -> Fix Missing -> Find Duplicates, in order) on every imported file and hands back a cleaned CSV per file — collapsing "which tool, what order" to one click. The existing per-finding cards remain, reframed as "Or fix issues one at a time" for users who want manual control. - Reuses the core API verbatim (recommended_pipeline + run_pipeline); reader mirrors 9_Pipeline_Runner._read_uploaded so files load the same way the standalone orchestrator loads them. - Per-file errors are captured so one bad file doesn't kill the batch; cleaned CSVs are cached in session_state so downloads survive reruns and are pruned when a file is removed or re-analyzed. Verified: the read -> run_pipeline -> CSV data path executes correctly (compile + a non-Streamlit functional smoke test). The Streamlit UI scaffolding (button / download_button / progress / session_state) mirrors the proven runner page but still needs a `streamlit run` check. Front-door copy is English literals for now; i18n keys are a follow-up. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
122
src/gui/_home.py
122
src/gui/_home.py
@@ -146,6 +146,101 @@ def _sync_uploader_to_home_uploads() -> None:
|
|||||||
st.session_state["home_findings_by_file"] = findings
|
st.session_state["home_findings_by_file"] = findings
|
||||||
|
|
||||||
|
|
||||||
|
def _read_upload_df(name: str, data: bytes):
|
||||||
|
"""Bytes -> DataFrame. Mirrors the Automated Workflows page reader:
|
||||||
|
Excel by extension, else CSV with encoding fallbacks. Kept in step
|
||||||
|
with ``9_Pipeline_Runner._read_uploaded`` so the one-click clean
|
||||||
|
reads files exactly as the standalone orchestrator would."""
|
||||||
|
import io as _io
|
||||||
|
from pathlib import Path as _Path
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
suffix = _Path(name).suffix.lower()
|
||||||
|
bio = _io.BytesIO(data)
|
||||||
|
if suffix in (".xlsx", ".xls"):
|
||||||
|
return pd.read_excel(bio)
|
||||||
|
for enc in ("utf-8", "utf-8-sig", "latin-1"):
|
||||||
|
try:
|
||||||
|
bio.seek(0)
|
||||||
|
sep = "\t" if suffix == ".tsv" else ","
|
||||||
|
return pd.read_csv(bio, encoding=enc, sep=sep, on_bad_lines="warn")
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
continue
|
||||||
|
bio.seek(0)
|
||||||
|
return pd.read_csv(bio, encoding="latin-1")
|
||||||
|
|
||||||
|
|
||||||
|
def _run_recommended_clean(home_uploads: dict) -> None:
|
||||||
|
"""Front-door action: run the recommended pipeline (Clean Text ->
|
||||||
|
Standardize -> Fix Missing -> Find Duplicates, in that order) on
|
||||||
|
every imported file and stash a cleaned CSV per file in
|
||||||
|
``session_state`` for download. This is the orchestrator wearing a
|
||||||
|
friendly face — it consumes the same ``recommended_pipeline`` the
|
||||||
|
Automated Workflows page builds. Per-file errors are captured so one
|
||||||
|
bad file doesn't kill the batch."""
|
||||||
|
from src.core.pipeline import recommended_pipeline, run_pipeline
|
||||||
|
from src.core.errors import format_for_user
|
||||||
|
from src.audit import log_event
|
||||||
|
|
||||||
|
pipeline = recommended_pipeline()
|
||||||
|
names = list(home_uploads.keys())
|
||||||
|
results: dict = {}
|
||||||
|
progress = st.progress(0.0, text="Cleaning…")
|
||||||
|
for i, name in enumerate(names, start=1):
|
||||||
|
progress.progress((i - 1) / max(len(names), 1), text=name)
|
||||||
|
try:
|
||||||
|
df = _read_upload_df(name, home_uploads[name]["bytes"])
|
||||||
|
res = run_pipeline(df, pipeline, stop_on_error=False)
|
||||||
|
results[name] = {
|
||||||
|
"csv": res.final_df.to_csv(index=False).encode("utf-8"),
|
||||||
|
"initial_rows": res.initial_rows,
|
||||||
|
"final_rows": res.final_rows,
|
||||||
|
"error": None,
|
||||||
|
}
|
||||||
|
except Exception as e: # noqa: BLE001 — surface per file, keep the batch alive
|
||||||
|
results[name] = {"csv": None, "error": format_for_user(e)}
|
||||||
|
progress.empty()
|
||||||
|
log_event("tool_run", "Home one-click recommended clean", files=names)
|
||||||
|
st.session_state["home_clean_results"] = results
|
||||||
|
st.rerun()
|
||||||
|
|
||||||
|
|
||||||
|
def _render_clean_results() -> None:
|
||||||
|
"""Render per-file cleaned-CSV download buttons + a short summary from
|
||||||
|
the stash produced by :func:`_run_recommended_clean`. Only files
|
||||||
|
still present in ``home_uploads`` are shown, so removing a file
|
||||||
|
drops its stale result."""
|
||||||
|
import hashlib as _hashlib
|
||||||
|
|
||||||
|
results: dict = st.session_state.get("home_clean_results", {})
|
||||||
|
if not results:
|
||||||
|
return
|
||||||
|
current = st.session_state.get("home_uploads", {})
|
||||||
|
for name, r in results.items():
|
||||||
|
if name not in current:
|
||||||
|
continue
|
||||||
|
digest = _hashlib.sha1(
|
||||||
|
name.encode("utf-8"), usedforsecurity=False,
|
||||||
|
).hexdigest()[:10]
|
||||||
|
if r.get("error"):
|
||||||
|
st.error(f"**Could not clean `{name}`**\n\n```\n{r['error']}\n```")
|
||||||
|
continue
|
||||||
|
stem = name.rsplit(".", 1)[0]
|
||||||
|
st.download_button(
|
||||||
|
f"⬇ Download cleaned {name}",
|
||||||
|
data=r["csv"],
|
||||||
|
file_name=f"{stem}_cleaned.csv",
|
||||||
|
mime="text/csv",
|
||||||
|
key=f"home_clean_dl_{digest}",
|
||||||
|
width="stretch",
|
||||||
|
)
|
||||||
|
removed = r["initial_rows"] - r["final_rows"]
|
||||||
|
st.caption(
|
||||||
|
f"{r['final_rows']:,} rows kept"
|
||||||
|
+ (f" · {removed:,} removed" if removed else " · nothing to remove")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _home_page() -> None:
|
def _home_page() -> None:
|
||||||
"""Render the home page — multi-file upload + per-file analysis.
|
"""Render the home page — multi-file upload + per-file analysis.
|
||||||
|
|
||||||
@@ -443,6 +538,7 @@ def _home_page() -> None:
|
|||||||
|
|
||||||
if clear_clicked:
|
if clear_clicked:
|
||||||
st.session_state["home_findings_by_file"] = {}
|
st.session_state["home_findings_by_file"] = {}
|
||||||
|
st.session_state["home_clean_results"] = {}
|
||||||
st.rerun()
|
st.rerun()
|
||||||
|
|
||||||
if run_clicked:
|
if run_clicked:
|
||||||
@@ -458,6 +554,8 @@ def _home_page() -> None:
|
|||||||
findings_by_file[name] = _run_analysis_on_upload(stashed)
|
findings_by_file[name] = _run_analysis_on_upload(stashed)
|
||||||
progress.progress(i / len(pending), text=name)
|
progress.progress(i / len(pending), text=name)
|
||||||
st.session_state["home_findings_by_file"] = findings_by_file
|
st.session_state["home_findings_by_file"] = findings_by_file
|
||||||
|
# A fresh analysis invalidates any prior one-click clean outputs.
|
||||||
|
st.session_state["home_clean_results"] = {}
|
||||||
progress.empty()
|
progress.empty()
|
||||||
st.rerun()
|
st.rerun()
|
||||||
|
|
||||||
@@ -468,6 +566,30 @@ def _home_page() -> None:
|
|||||||
# 4-card summary above the findings panels so the user can
|
# 4-card summary above the findings panels so the user can
|
||||||
# eyeball the run before expanding any one file.
|
# eyeball the run before expanding any one file.
|
||||||
_render_stats_overview(findings_by_file)
|
_render_stats_overview(findings_by_file)
|
||||||
|
|
||||||
|
# ---- Front door: one-click recommended clean (primary path) ----
|
||||||
|
# The analyzer has the findings; the majority case is "just fix
|
||||||
|
# it." This primary button runs the recommended pipeline in the
|
||||||
|
# correct order and hands back a cleaned file per upload, so the
|
||||||
|
# user never has to decide which tool or what order. The per-file
|
||||||
|
# findings below remain the "fix one thing at a time" path.
|
||||||
|
if st.button(
|
||||||
|
"✨ Clean these files for me",
|
||||||
|
type="primary",
|
||||||
|
key="home_clean_all",
|
||||||
|
width="stretch",
|
||||||
|
):
|
||||||
|
_run_recommended_clean(home_uploads)
|
||||||
|
st.caption(
|
||||||
|
"Recommended: cleans text, standardizes formats, fills blanks, "
|
||||||
|
"and removes duplicates — in the right order — then gives you the "
|
||||||
|
"cleaned file."
|
||||||
|
)
|
||||||
|
_render_clean_results()
|
||||||
|
|
||||||
|
# ---- Manual path: per-file findings, fix one thing at a time ----
|
||||||
|
st.markdown("###### Or fix issues one at a time")
|
||||||
|
st.caption("Open any finding below to jump straight to the right tool.")
|
||||||
# Preserve the upload-stash order so the user sees results in
|
# Preserve the upload-stash order so the user sees results in
|
||||||
# the same order they appear in the file list above.
|
# the same order they appear in the file list above.
|
||||||
for name in home_uploads:
|
for name in home_uploads:
|
||||||
|
|||||||
Reference in New Issue
Block a user