Issue #1 (the make-or-break UX fix): after the analyzer runs, Home now leads with a primary "Clean these files for me" CTA that runs the recommended pipeline (Clean Text -> Standardize -> Fix Missing -> Find Duplicates, in order) on every imported file and hands back a cleaned CSV per file — collapsing "which tool, what order" to one click. The existing per-finding cards remain, reframed as "Or fix issues one at a time" for users who want manual control. - Reuses the core API verbatim (recommended_pipeline + run_pipeline); reader mirrors 9_Pipeline_Runner._read_uploaded so files load the same way the standalone orchestrator loads them. - Per-file errors are captured so one bad file doesn't kill the batch; cleaned CSVs are cached in session_state so downloads survive reruns and are pruned when a file is removed or re-analyzed. Verified: the read -> run_pipeline -> CSV data path executes correctly (compile + a non-Streamlit functional smoke test). The Streamlit UI scaffolding (button / download_button / progress / session_state) mirrors the proven runner page but still needs a `streamlit run` check. Front-door copy is English literals for now; i18n keys are a follow-up. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
617 lines
24 KiB
Python
617 lines
24 KiB
Python
"""Home-page renderer extracted into its own module.
|
||
|
||
This used to live inside ``src/gui/app.py`` as a local function. Pulling
|
||
it out into a side-effect-free module lets the ``back_to_home_link``
|
||
helper (in ``components/_legacy.py``) import the home callable to pass
|
||
into ``st.switch_page`` — without re-running ``app.py``'s navigation
|
||
setup, which would itself blow up because tool pages have a different
|
||
"main script" context that breaks the registry's relative ``pages/…``
|
||
paths.
|
||
|
||
Keep this module imports-light: nothing that runs Streamlit commands
|
||
at module top level, nothing that triggers config loads. Just the
|
||
``_home_page`` callable.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import streamlit as st
|
||
|
||
|
||
class _StashedUpload:
|
||
"""Duck-types Streamlit's ``UploadedFile`` so ``_run_analysis_on_upload``
|
||
accepts entries restored from session-state without changes. Exposes
|
||
``.name``, ``.size``, and ``.getvalue()`` — the contract used by the
|
||
analyzer's read path.
|
||
"""
|
||
|
||
__slots__ = ("name", "size", "_data")
|
||
|
||
def __init__(self, name: str, data: bytes) -> None:
|
||
self.name = name
|
||
self.size = len(data)
|
||
self._data = data
|
||
|
||
def getvalue(self) -> bytes:
|
||
return self._data
|
||
|
||
|
||
def _format_size(n: int) -> str:
|
||
"""Human-readable byte count for file sizes shown in the GUI.
|
||
|
||
Bytes are never displayed — the smallest unit is KB, even for sub-
|
||
kilobyte files (e.g. ``0.5 KB`` for 512 bytes). Steps up to MB
|
||
once the count reaches 1 MiB, then to GB at 1 GiB. Always one
|
||
decimal place.
|
||
"""
|
||
KB = 1024
|
||
MB = 1024 * 1024
|
||
GB = 1024 * 1024 * 1024
|
||
if n < MB:
|
||
return f"{n / KB:.1f} KB"
|
||
if n < GB:
|
||
return f"{n / MB:.1f} MB"
|
||
return f"{n / GB:.1f} GB"
|
||
|
||
|
||
def _render_stats_overview(findings_by_file: dict) -> None:
|
||
"""4-card grid above the per-file findings — summarizes the run.
|
||
|
||
Card layout follows ``datatools_layout_redesign2.html`` §stats:
|
||
Files analyzed, Total findings, Warnings (severity ``warn`` ∪
|
||
``error``), Info (severity ``info``). The warn + info cards are
|
||
tinted via ``.is-warn`` / ``.is-info`` modifiers that read the
|
||
severity colors theme.py declares.
|
||
"""
|
||
import html as _html
|
||
|
||
n_files = len(findings_by_file)
|
||
all_findings = [f for fs in findings_by_file.values() for f in fs]
|
||
n_total = len(all_findings)
|
||
# Mockup groups errors with warnings on the "to review" card —
|
||
# both demand the user act. ``info`` is the lower-priority pile.
|
||
n_warn = sum(1 for f in all_findings if f.severity in ("warn", "error"))
|
||
n_info = sum(1 for f in all_findings if f.severity == "info")
|
||
|
||
def _card(label: str, value: int, unit: str = "", kind: str = "") -> str:
|
||
cls = "dt-stat" + (f" {kind}" if kind else "")
|
||
unit_html = (
|
||
f'<span class="dt-stat-unit">{_html.escape(unit)}</span>'
|
||
if unit else ""
|
||
)
|
||
return (
|
||
f'<div class="{cls}">'
|
||
f'<div class="dt-stat-label">{_html.escape(label)}</div>'
|
||
f'<div class="dt-stat-value">{value}{unit_html}</div>'
|
||
f"</div>"
|
||
)
|
||
|
||
cards = (
|
||
_card("Files analyzed", n_files)
|
||
+ _card("Total findings", n_total)
|
||
+ _card(
|
||
"Warnings",
|
||
n_warn,
|
||
unit="to review" if n_warn else "",
|
||
kind="is-warn" if n_warn else "",
|
||
)
|
||
+ _card(
|
||
"Info",
|
||
n_info,
|
||
unit="suggestions" if n_info else "",
|
||
kind="is-info" if n_info else "",
|
||
)
|
||
)
|
||
|
||
st.markdown(
|
||
f'<div class="dt-stats">{cards}</div>',
|
||
unsafe_allow_html=True,
|
||
)
|
||
|
||
|
||
def _sync_uploader_to_home_uploads() -> None:
|
||
"""``on_change`` callback for the home-page file_uploader.
|
||
|
||
Reconciles ``home_uploads`` (our persistent stash) with the widget's
|
||
current value: adds newly-uploaded files, and drops files the user
|
||
explicitly removed via the widget's built-in "✕" button. Per
|
||
Streamlit semantics ``on_change`` only runs for user-initiated
|
||
value changes, so the navigation-induced ``[]`` reset never reaches
|
||
here — the stash survives intact across page switches.
|
||
"""
|
||
from src.audit import log_event
|
||
|
||
widget_files = st.session_state.get("home_upload") or []
|
||
home_uploads: dict = st.session_state.setdefault("home_uploads", {})
|
||
findings: dict = st.session_state.setdefault("home_findings_by_file", {})
|
||
|
||
widget_names = {f.name for f in widget_files}
|
||
|
||
for f in widget_files:
|
||
if f.name not in home_uploads:
|
||
home_uploads[f.name] = {"bytes": f.getvalue(), "size": f.size}
|
||
log_event("upload", f"Uploaded {f.name}", filename=f.name, bytes=f.size)
|
||
|
||
for name in list(home_uploads.keys()):
|
||
if name not in widget_names:
|
||
del home_uploads[name]
|
||
findings.pop(name, None)
|
||
log_event("upload", f"Removed {name}", filename=name)
|
||
if st.session_state.get("home_uploaded_name") == name:
|
||
st.session_state.pop("home_uploaded_name", None)
|
||
st.session_state.pop("home_uploaded_size", None)
|
||
st.session_state.pop("home_uploaded_bytes", None)
|
||
|
||
st.session_state["home_uploads"] = home_uploads
|
||
st.session_state["home_findings_by_file"] = findings
|
||
|
||
|
||
def _read_upload_df(name: str, data: bytes):
|
||
"""Bytes -> DataFrame. Mirrors the Automated Workflows page reader:
|
||
Excel by extension, else CSV with encoding fallbacks. Kept in step
|
||
with ``9_Pipeline_Runner._read_uploaded`` so the one-click clean
|
||
reads files exactly as the standalone orchestrator would."""
|
||
import io as _io
|
||
from pathlib import Path as _Path
|
||
import pandas as pd
|
||
|
||
suffix = _Path(name).suffix.lower()
|
||
bio = _io.BytesIO(data)
|
||
if suffix in (".xlsx", ".xls"):
|
||
return pd.read_excel(bio)
|
||
for enc in ("utf-8", "utf-8-sig", "latin-1"):
|
||
try:
|
||
bio.seek(0)
|
||
sep = "\t" if suffix == ".tsv" else ","
|
||
return pd.read_csv(bio, encoding=enc, sep=sep, on_bad_lines="warn")
|
||
except UnicodeDecodeError:
|
||
continue
|
||
bio.seek(0)
|
||
return pd.read_csv(bio, encoding="latin-1")
|
||
|
||
|
||
def _run_recommended_clean(home_uploads: dict) -> None:
|
||
"""Front-door action: run the recommended pipeline (Clean Text ->
|
||
Standardize -> Fix Missing -> Find Duplicates, in that order) on
|
||
every imported file and stash a cleaned CSV per file in
|
||
``session_state`` for download. This is the orchestrator wearing a
|
||
friendly face — it consumes the same ``recommended_pipeline`` the
|
||
Automated Workflows page builds. Per-file errors are captured so one
|
||
bad file doesn't kill the batch."""
|
||
from src.core.pipeline import recommended_pipeline, run_pipeline
|
||
from src.core.errors import format_for_user
|
||
from src.audit import log_event
|
||
|
||
pipeline = recommended_pipeline()
|
||
names = list(home_uploads.keys())
|
||
results: dict = {}
|
||
progress = st.progress(0.0, text="Cleaning…")
|
||
for i, name in enumerate(names, start=1):
|
||
progress.progress((i - 1) / max(len(names), 1), text=name)
|
||
try:
|
||
df = _read_upload_df(name, home_uploads[name]["bytes"])
|
||
res = run_pipeline(df, pipeline, stop_on_error=False)
|
||
results[name] = {
|
||
"csv": res.final_df.to_csv(index=False).encode("utf-8"),
|
||
"initial_rows": res.initial_rows,
|
||
"final_rows": res.final_rows,
|
||
"error": None,
|
||
}
|
||
except Exception as e: # noqa: BLE001 — surface per file, keep the batch alive
|
||
results[name] = {"csv": None, "error": format_for_user(e)}
|
||
progress.empty()
|
||
log_event("tool_run", "Home one-click recommended clean", files=names)
|
||
st.session_state["home_clean_results"] = results
|
||
st.rerun()
|
||
|
||
|
||
def _render_clean_results() -> None:
|
||
"""Render per-file cleaned-CSV download buttons + a short summary from
|
||
the stash produced by :func:`_run_recommended_clean`. Only files
|
||
still present in ``home_uploads`` are shown, so removing a file
|
||
drops its stale result."""
|
||
import hashlib as _hashlib
|
||
|
||
results: dict = st.session_state.get("home_clean_results", {})
|
||
if not results:
|
||
return
|
||
current = st.session_state.get("home_uploads", {})
|
||
for name, r in results.items():
|
||
if name not in current:
|
||
continue
|
||
digest = _hashlib.sha1(
|
||
name.encode("utf-8"), usedforsecurity=False,
|
||
).hexdigest()[:10]
|
||
if r.get("error"):
|
||
st.error(f"**Could not clean `{name}`**\n\n```\n{r['error']}\n```")
|
||
continue
|
||
stem = name.rsplit(".", 1)[0]
|
||
st.download_button(
|
||
f"⬇ Download cleaned {name}",
|
||
data=r["csv"],
|
||
file_name=f"{stem}_cleaned.csv",
|
||
mime="text/csv",
|
||
key=f"home_clean_dl_{digest}",
|
||
width="stretch",
|
||
)
|
||
removed = r["initial_rows"] - r["final_rows"]
|
||
st.caption(
|
||
f"{r['final_rows']:,} rows kept"
|
||
+ (f" · {removed:,} removed" if removed else " · nothing to remove")
|
||
)
|
||
|
||
|
||
def _home_page() -> None:
|
||
"""Render the home page — multi-file upload + per-file analysis.
|
||
|
||
Uploaded files live in ``st.session_state["home_uploads"]`` (a
|
||
dict keyed by filename), NOT in the widget's transient state.
|
||
Streamlit's ``st.file_uploader`` widget gets unmounted when the
|
||
user navigates away to a tool page, and its ``UploadedFile``
|
||
objects don't always re-attach on remount — so we capture the
|
||
bytes into our own session-state stash on first sight and treat
|
||
that stash as the source of truth for everything downstream
|
||
(active-file pickup, analysis, findings rendering).
|
||
|
||
Removing a file: per-row "✕" buttons next to each uploaded
|
||
filename. Clearing findings: the "Clear results" button only
|
||
wipes the analysis cache, not the upload stash — the files
|
||
persist until the user explicitly removes them.
|
||
"""
|
||
from src.gui.components import (
|
||
hide_streamlit_chrome,
|
||
render_findings_panel,
|
||
render_sticky_footer,
|
||
)
|
||
from src.gui.components._legacy import _run_analysis_on_upload
|
||
from src.i18n import t
|
||
|
||
from pathlib import Path as _Path
|
||
_ICON_PATH = str(_Path(__file__).parent / "assets" / "datatools_icon_256.png")
|
||
st.set_page_config(
|
||
page_title=t("home.page_title"),
|
||
page_icon=_ICON_PATH,
|
||
layout="wide",
|
||
)
|
||
hide_streamlit_chrome()
|
||
render_sticky_footer()
|
||
|
||
import html as _html
|
||
# Page header — brand block (D icon + "UNALOGIX" eyebrow over
|
||
# "DataTools" wordmark + tagline) on the left, privacy pill on
|
||
# the right. Matches the sidebar brand chip scaled up for the
|
||
# hero. Bottom border replaces the explicit ``st.divider`` that
|
||
# used to sit below the caption.
|
||
privacy_label = _html.escape(t("home.privacy_pill"))
|
||
st.markdown(
|
||
'<header class="dt-page-header">'
|
||
'<div class="dt-page-brand">'
|
||
'<div class="dt-page-brand-row">'
|
||
'<div class="dt-page-brand-mark">D</div>'
|
||
'<div class="dt-page-brand-words">'
|
||
'<span class="dt-page-eyebrow">UNALOGIX</span>'
|
||
'<h1 class="dt-page-wordmark">DataTools</h1>'
|
||
'</div>'
|
||
'</div>'
|
||
f'<p class="dt-page-subtitle">{_html.escape(t("home.caption"))}</p>'
|
||
'</div>'
|
||
'<span class="dt-privacy-pill">'
|
||
'<svg viewBox="0 0 24 24" fill="none" stroke="currentColor">'
|
||
'<rect x="4" y="11" width="16" height="10" rx="2"/>'
|
||
'<path d="M8 11V7a4 4 0 018 0v4"/>'
|
||
'</svg>'
|
||
f'{privacy_label}'
|
||
'</span>'
|
||
'</header>',
|
||
unsafe_allow_html=True,
|
||
)
|
||
|
||
# Source of truth for uploaded files. dict[name -> {"bytes", "size"}].
|
||
home_uploads: dict = st.session_state.setdefault("home_uploads", {})
|
||
|
||
# Streamlit's file_uploader is the only path that actually receives
|
||
# bytes from the browser, but we don't want its dropzone UI to
|
||
# compete with the in-card "Add more files" button below. Park the
|
||
# whole widget off-screen via the ``dt-fileuploader-offscreen``
|
||
# CSS rule (declared in ``_DESIGN_TOKENS_CSS``) while keeping the
|
||
# underlying ``<input type="file">`` reachable to JS — the Add
|
||
# button programmatically clicks it to open the OS file picker.
|
||
#
|
||
# ``on_change`` fires ONLY on user-initiated value changes (uploads
|
||
# and the widget's built-in "✕" remove). It does NOT fire on the
|
||
# remount-induced reset. That lets us treat the callback as ground
|
||
# truth for both adds AND removes.
|
||
st.markdown(
|
||
'<style>[data-testid="stFileUploader"] {'
|
||
'position:absolute!important;left:-10000px!important;'
|
||
'width:1px!important;height:1px!important;overflow:hidden!important;'
|
||
'pointer-events:none!important;}</style>',
|
||
unsafe_allow_html=True,
|
||
)
|
||
st.file_uploader(
|
||
t("upload.uploader_label_multi"),
|
||
type=["csv", "tsv", "xlsx", "xls"],
|
||
accept_multiple_files=True,
|
||
key="home_upload",
|
||
help=t("upload.uploader_help"),
|
||
on_change=_sync_uploader_to_home_uploads,
|
||
label_visibility="collapsed",
|
||
)
|
||
|
||
# ``Files`` section header — count + total size on the right, or
|
||
# "No files imported yet" when empty (mockup §section-head).
|
||
import hashlib
|
||
n_files = len(home_uploads)
|
||
if n_files:
|
||
total_bytes = sum(meta["size"] for meta in home_uploads.values())
|
||
files_word = "file" if n_files == 1 else "files"
|
||
meta_html = (
|
||
f'{n_files} {files_word} · '
|
||
f'{_html.escape(_format_size(total_bytes))} total'
|
||
)
|
||
else:
|
||
meta_html = "No files imported yet"
|
||
st.markdown(
|
||
'<div class="dt-files-section-head">'
|
||
f'<h2>Files</h2>'
|
||
f'<span class="dt-section-meta">{meta_html}</span>'
|
||
'</div>',
|
||
unsafe_allow_html=True,
|
||
)
|
||
|
||
# Files card — always rendered. Body is file rows (if any) + the
|
||
# in-card "Add more files" button that triggers the off-screen
|
||
# file_uploader. Two-phase click capture for the X buttons: walk
|
||
# all rows once, accumulate ``to_remove`` if any was clicked,
|
||
# then mutate state + rerun ONCE after the loop.
|
||
to_remove: str | None = None
|
||
_DOC_SVG = (
|
||
'<svg viewBox="0 0 24 24" fill="none" stroke="currentColor">'
|
||
'<path d="M14 2H6a2 2 0 00-2 2v16a2 2 0 002 2h12a2 2 0 002-2V8z"/>'
|
||
'<path d="M14 2v6h6"/>'
|
||
'</svg>'
|
||
)
|
||
_PLUS_SVG = (
|
||
'<svg viewBox="0 0 24 24" fill="none" stroke="currentColor">'
|
||
'<path d="M12 5v14M5 12h14"/>'
|
||
'</svg>'
|
||
)
|
||
with st.container(border=True):
|
||
for name in list(home_uploads.keys()):
|
||
digest = hashlib.sha1(
|
||
name.encode("utf-8"), usedforsecurity=False,
|
||
).hexdigest()[:10]
|
||
# X button on the LEFT of the row per UX feedback —
|
||
# ``✕ | filename + chip | size``.
|
||
col_x, col_name, col_size = st.columns([0.55, 8, 1.6])
|
||
if col_x.button(
|
||
"✕",
|
||
key=f"_home_remove_{digest}",
|
||
help=f"Remove {name}",
|
||
type="tertiary",
|
||
):
|
||
to_remove = name
|
||
col_name.markdown(
|
||
'<div class="dt-file-row">'
|
||
f'<span class="dt-file-icon-chip">{_DOC_SVG}</span>'
|
||
f'<span class="dt-file-name">{_html.escape(name)}</span>'
|
||
'</div>',
|
||
unsafe_allow_html=True,
|
||
)
|
||
col_size.markdown(
|
||
f'<div style="text-align:right;">'
|
||
f'<span class="dt-file-size">'
|
||
f'{_html.escape(_format_size(home_uploads[name]["size"]))}'
|
||
'</span></div>',
|
||
unsafe_allow_html=True,
|
||
)
|
||
# In-card "Add more files" — clicks the (off-screen)
|
||
# ``stFileUploaderDropzoneInput`` so the OS file picker opens.
|
||
# Inline ``onclick`` would be cleanest but Streamlit's HTML
|
||
# sanitizer strips event-handler attributes from
|
||
# ``unsafe_allow_html`` content; the wiring is done from
|
||
# ``_ADD_FILES_BUTTON_JS`` further down via ``st.iframe``.
|
||
st.markdown(
|
||
'<button class="dt-file-add" type="button">'
|
||
f'{_PLUS_SVG} Add more files'
|
||
'</button>',
|
||
unsafe_allow_html=True,
|
||
)
|
||
|
||
# Wire the in-card "Add more files" button to the off-screen
|
||
# ``stFileUploaderDropzoneInput`` (Streamlit strips inline
|
||
# ``onclick`` attributes; we have to do the binding from a real
|
||
# script element, which Streamlit only ships through component
|
||
# iframes — same pattern as the sticky footer + Upload→Import
|
||
# rewriter). A ``MutationObserver`` re-wires after reruns when
|
||
# Streamlit remounts the button.
|
||
st.iframe(
|
||
"""
|
||
<script>
|
||
(function () {
|
||
function wire(doc) {
|
||
var btn = doc.querySelector('button.dt-file-add');
|
||
var input = doc.querySelector('[data-testid="stFileUploaderDropzoneInput"]');
|
||
if (!btn || !input) return;
|
||
if (btn.dataset.dtWired === '1') return;
|
||
btn.dataset.dtWired = '1';
|
||
btn.addEventListener('click', function (e) {
|
||
e.preventDefault();
|
||
input.click();
|
||
});
|
||
}
|
||
var doc;
|
||
try { doc = window.parent.document; }
|
||
catch (e) { doc = document; }
|
||
wire(doc);
|
||
var win = doc.defaultView || window.parent || window;
|
||
if ('MutationObserver' in win) {
|
||
var raf = 0;
|
||
try {
|
||
new win.MutationObserver(function () {
|
||
if (raf) return;
|
||
raf = win.requestAnimationFrame(function () { raf = 0; wire(doc); });
|
||
}).observe(doc.body, { childList: true, subtree: true });
|
||
} catch (e) {}
|
||
}
|
||
})();
|
||
</script>
|
||
""",
|
||
height=1,
|
||
)
|
||
|
||
if to_remove is not None:
|
||
from src.audit import log_event
|
||
log_event(
|
||
"upload",
|
||
f"Removed {to_remove}",
|
||
filename=to_remove,
|
||
)
|
||
del home_uploads[to_remove]
|
||
# Drop any findings/results tied to the removed file.
|
||
findings_by_file_drop = st.session_state.get(
|
||
"home_findings_by_file", {}
|
||
)
|
||
findings_by_file_drop.pop(to_remove, None)
|
||
st.session_state["home_uploads"] = home_uploads
|
||
st.session_state["home_findings_by_file"] = findings_by_file_drop
|
||
# If we just removed the active upload, also clear the
|
||
# singular ``home_uploaded_*`` keys so tool pages don't
|
||
# pick up stale bytes; the next render will repopulate
|
||
# them from whatever file is now first.
|
||
if st.session_state.get("home_uploaded_name") == to_remove:
|
||
st.session_state.pop("home_uploaded_name", None)
|
||
st.session_state.pop("home_uploaded_size", None)
|
||
st.session_state.pop("home_uploaded_bytes", None)
|
||
st.rerun()
|
||
|
||
if not home_uploads:
|
||
# Empty state — page ends cleanly after the Files card. The
|
||
# in-card "Add more files" button is the only affordance the
|
||
# user needs; the old ``upload.empty_state`` info alert was
|
||
# redundant and out of step with the mockup.
|
||
return
|
||
|
||
# Expose the first uploaded file via the singular ``home_uploaded_*``
|
||
# session keys so tool pages reached via "Open <Tool>" still find an
|
||
# active upload through ``pickup_or_upload``.
|
||
first_name = next(iter(home_uploads))
|
||
first_meta = home_uploads[first_name]
|
||
if (
|
||
st.session_state.get("home_uploaded_name") != first_name
|
||
or st.session_state.get("home_uploaded_size") != first_meta["size"]
|
||
):
|
||
st.session_state["home_uploaded_name"] = first_name
|
||
st.session_state["home_uploaded_size"] = first_meta["size"]
|
||
st.session_state["home_uploaded_bytes"] = first_meta["bytes"]
|
||
|
||
# Findings cache — drop entries whose underlying file is no longer
|
||
# in the stash (e.g. user just clicked "✕").
|
||
findings_by_file: dict = st.session_state.setdefault(
|
||
"home_findings_by_file", {}
|
||
)
|
||
findings_by_file = {
|
||
name: result for name, result in findings_by_file.items()
|
||
if name in home_uploads
|
||
}
|
||
st.session_state["home_findings_by_file"] = findings_by_file
|
||
|
||
pending = [name for name in home_uploads if name not in findings_by_file]
|
||
|
||
# Action bar — Run analysis / Clear results.
|
||
col_run, col_clear, _ = st.columns([1, 1, 4])
|
||
with col_run:
|
||
run_clicked = st.button(
|
||
t("upload.run_button"),
|
||
type="primary",
|
||
key="home_run_analysis",
|
||
disabled=not pending,
|
||
width="stretch",
|
||
)
|
||
with col_clear:
|
||
clear_clicked = st.button(
|
||
t("upload.clear_results"),
|
||
key="home_clear_results",
|
||
disabled=not findings_by_file,
|
||
width="stretch",
|
||
)
|
||
|
||
if clear_clicked:
|
||
st.session_state["home_findings_by_file"] = {}
|
||
st.session_state["home_clean_results"] = {}
|
||
st.rerun()
|
||
|
||
if run_clicked:
|
||
from src.audit import log_event
|
||
log_event(
|
||
"analyze",
|
||
f"Run analysis clicked on {len(pending)} file(s)",
|
||
files=list(pending),
|
||
)
|
||
progress = st.progress(0.0, text=t("upload.scanning"))
|
||
for i, name in enumerate(pending, start=1):
|
||
stashed = _StashedUpload(name, home_uploads[name]["bytes"])
|
||
findings_by_file[name] = _run_analysis_on_upload(stashed)
|
||
progress.progress(i / len(pending), text=name)
|
||
st.session_state["home_findings_by_file"] = findings_by_file
|
||
# A fresh analysis invalidates any prior one-click clean outputs.
|
||
st.session_state["home_clean_results"] = {}
|
||
progress.empty()
|
||
st.rerun()
|
||
|
||
if findings_by_file:
|
||
st.divider()
|
||
# Overview row before drilling into per-file detail. Mockup
|
||
# layout (datatools_layout_redesign2.html §stats) puts a
|
||
# 4-card summary above the findings panels so the user can
|
||
# eyeball the run before expanding any one file.
|
||
_render_stats_overview(findings_by_file)
|
||
|
||
# ---- Front door: one-click recommended clean (primary path) ----
|
||
# The analyzer has the findings; the majority case is "just fix
|
||
# it." This primary button runs the recommended pipeline in the
|
||
# correct order and hands back a cleaned file per upload, so the
|
||
# user never has to decide which tool or what order. The per-file
|
||
# findings below remain the "fix one thing at a time" path.
|
||
if st.button(
|
||
"✨ Clean these files for me",
|
||
type="primary",
|
||
key="home_clean_all",
|
||
width="stretch",
|
||
):
|
||
_run_recommended_clean(home_uploads)
|
||
st.caption(
|
||
"Recommended: cleans text, standardizes formats, fills blanks, "
|
||
"and removes duplicates — in the right order — then gives you the "
|
||
"cleaned file."
|
||
)
|
||
_render_clean_results()
|
||
|
||
# ---- Manual path: per-file findings, fix one thing at a time ----
|
||
st.markdown("###### Or fix issues one at a time")
|
||
st.caption("Open any finding below to jump straight to the right tool.")
|
||
# Preserve the upload-stash order so the user sees results in
|
||
# the same order they appear in the file list above.
|
||
for name in home_uploads:
|
||
if name not in findings_by_file:
|
||
continue
|
||
findings = findings_by_file[name]
|
||
with st.container(border=True):
|
||
if not findings:
|
||
st.markdown(
|
||
'<div class="dt-finding-group-head">'
|
||
'<span class="dt-severity-dot success"></span>'
|
||
f'<span class="dt-group-filename">{_html.escape(name)}</span>'
|
||
'<div class="dt-group-counts">'
|
||
'<span class="dt-count-pill success">no issues</span>'
|
||
'</div>'
|
||
'</div>',
|
||
unsafe_allow_html=True,
|
||
)
|
||
else:
|
||
render_findings_panel(
|
||
findings,
|
||
header=name,
|
||
key_namespace=name,
|
||
)
|