"""Home-page renderer extracted into its own module. This used to live inside ``src/gui/app.py`` as a local function. Pulling it out into a side-effect-free module lets the ``back_to_home_link`` helper (in ``components/_legacy.py``) import the home callable to pass into ``st.switch_page`` — without re-running ``app.py``'s navigation setup, which would itself blow up because tool pages have a different "main script" context that breaks the registry's relative ``pages/…`` paths. Keep this module imports-light: nothing that runs Streamlit commands at module top level, nothing that triggers config loads. Just the ``_home_page`` callable. """ from __future__ import annotations import streamlit as st class _StashedUpload: """Duck-types Streamlit's ``UploadedFile`` so ``_run_analysis_on_upload`` accepts entries restored from session-state without changes. Exposes ``.name``, ``.size``, and ``.getvalue()`` — the contract used by the analyzer's read path. """ __slots__ = ("name", "size", "_data") def __init__(self, name: str, data: bytes) -> None: self.name = name self.size = len(data) self._data = data def getvalue(self) -> bytes: return self._data def _sync_uploader_to_home_uploads() -> None: """``on_change`` callback for the home-page file_uploader. Reconciles ``home_uploads`` (our persistent stash) with the widget's current value: adds newly-uploaded files, and drops files the user explicitly removed via the widget's built-in "✕" button. Per Streamlit semantics ``on_change`` only runs for user-initiated value changes, so the navigation-induced ``[]`` reset never reaches here — the stash survives intact across page switches. """ from src.audit import log_event widget_files = st.session_state.get("home_upload") or [] home_uploads: dict = st.session_state.setdefault("home_uploads", {}) findings: dict = st.session_state.setdefault("home_findings_by_file", {}) widget_names = {f.name for f in widget_files} for f in widget_files: if f.name not in home_uploads: home_uploads[f.name] = {"bytes": f.getvalue(), "size": f.size} log_event("upload", f"Uploaded {f.name}", filename=f.name, bytes=f.size) for name in list(home_uploads.keys()): if name not in widget_names: del home_uploads[name] findings.pop(name, None) log_event("upload", f"Removed {name}", filename=name) if st.session_state.get("home_uploaded_name") == name: st.session_state.pop("home_uploaded_name", None) st.session_state.pop("home_uploaded_size", None) st.session_state.pop("home_uploaded_bytes", None) st.session_state["home_uploads"] = home_uploads st.session_state["home_findings_by_file"] = findings def _home_page() -> None: """Render the home page — multi-file upload + per-file analysis. Uploaded files live in ``st.session_state["home_uploads"]`` (a dict keyed by filename), NOT in the widget's transient state. Streamlit's ``st.file_uploader`` widget gets unmounted when the user navigates away to a tool page, and its ``UploadedFile`` objects don't always re-attach on remount — so we capture the bytes into our own session-state stash on first sight and treat that stash as the source of truth for everything downstream (active-file pickup, analysis, findings rendering). Removing a file: per-row "✕" buttons next to each uploaded filename. Clearing findings: the "Clear results" button only wipes the analysis cache, not the upload stash — the files persist until the user explicitly removes them. """ from src.gui.components import ( hide_streamlit_chrome, render_findings_panel, render_sticky_footer, ) from src.gui.components._legacy import _run_analysis_on_upload from src.i18n import t st.set_page_config( page_title=t("home.page_title"), page_icon="🧹", layout="wide", ) hide_streamlit_chrome() render_sticky_footer() st.title(t("home.title")) st.caption(t("home.caption")) st.divider() st.markdown(f"### {t('upload.heading')}") st.caption(t("upload.intro_multi")) # Source of truth for uploaded files. dict[name -> {"bytes", "size"}]. home_uploads: dict = st.session_state.setdefault("home_uploads", {}) # File uploader — syncs into home_uploads via on_change. We deliberately # do NOT merge widget state into home_uploads at render time: navigation # can remount the widget with value ``[]``, and a render-time merge # would mistakenly leave home_uploads untouched while the user thinks # they're looking at empty state. # # ``on_change`` fires ONLY on user-initiated value changes (uploads # and the widget's built-in "✕" remove). It does NOT fire on the # remount-induced reset. That lets us treat the callback as ground # truth for both adds AND removes — fixing the previous bug where # the widget's "✕" appeared to do nothing because the file persisted # in home_uploads and immediately re-rendered in the list below. st.file_uploader( t("upload.uploader_label_multi"), type=["csv", "tsv", "xlsx", "xls"], accept_multiple_files=True, key="home_upload", help=t("upload.uploader_help"), on_change=_sync_uploader_to_home_uploads, ) # Persistent file list with per-file remove buttons. We render this # ourselves rather than trusting Streamlit's widget chrome because # the widget's "✕" only mutates widget-state, leaving home_uploads # out of sync. # # Two-phase click capture pattern (avoids the "hit-or-miss" click # losses we had previously): # # 1. ``st.button(key=stable_hash)`` returns True on the rerun where # it was clicked. We use a sha1 hash of the filename as the key # so it's identifier-safe regardless of spaces / dots / unicode # in the file name — Streamlit's widget-identity hashing on raw # filenames was the root cause of inconsistent removals. # 2. Inside a single pass we collect WHICH file to remove (if any), # then mutate state ONCE after the loop and rerun. Mutating mid # -loop while continuing to render other buttons risked # interleaving widget-key updates with state changes. if home_uploads: import hashlib st.markdown("**Uploaded files**") to_remove: str | None = None for name in list(home_uploads.keys()): digest = hashlib.sha1( name.encode("utf-8"), usedforsecurity=False, ).hexdigest()[:10] col_file, col_remove = st.columns([8, 1]) col_file.markdown( f"📄 `{name}`   " f"" f"({home_uploads[name]['size']:,} bytes)", unsafe_allow_html=True, ) if col_remove.button( "Remove", key=f"_home_remove_{digest}", help=f"Remove {name}", type="secondary", use_container_width=True, ): to_remove = name if to_remove is not None: from src.audit import log_event log_event( "upload", f"Removed {to_remove}", filename=to_remove, ) del home_uploads[to_remove] # Drop any findings/results tied to the removed file. findings_by_file_drop = st.session_state.get( "home_findings_by_file", {} ) findings_by_file_drop.pop(to_remove, None) st.session_state["home_uploads"] = home_uploads st.session_state["home_findings_by_file"] = findings_by_file_drop # If we just removed the active upload, also clear the # singular ``home_uploaded_*`` keys so tool pages don't # pick up stale bytes; the next render will repopulate # them from whatever file is now first. if st.session_state.get("home_uploaded_name") == to_remove: st.session_state.pop("home_uploaded_name", None) st.session_state.pop("home_uploaded_size", None) st.session_state.pop("home_uploaded_bytes", None) st.rerun() if not home_uploads: st.info(t("upload.empty_state")) return # Expose the first uploaded file via the singular ``home_uploaded_*`` # session keys so tool pages reached via "Open " still find an # active upload through ``pickup_or_upload``. first_name = next(iter(home_uploads)) first_meta = home_uploads[first_name] if ( st.session_state.get("home_uploaded_name") != first_name or st.session_state.get("home_uploaded_size") != first_meta["size"] ): st.session_state["home_uploaded_name"] = first_name st.session_state["home_uploaded_size"] = first_meta["size"] st.session_state["home_uploaded_bytes"] = first_meta["bytes"] # Findings cache — drop entries whose underlying file is no longer # in the stash (e.g. user just clicked "✕"). findings_by_file: dict = st.session_state.setdefault( "home_findings_by_file", {} ) findings_by_file = { name: result for name, result in findings_by_file.items() if name in home_uploads } st.session_state["home_findings_by_file"] = findings_by_file pending = [name for name in home_uploads if name not in findings_by_file] col_run, col_clear, _ = st.columns([1, 1, 4]) with col_run: run_clicked = st.button( t("upload.run_button"), type="primary", key="home_run_analysis", disabled=not pending, use_container_width=True, ) with col_clear: clear_clicked = st.button( t("upload.clear_results"), key="home_clear_results", disabled=not findings_by_file, use_container_width=True, ) if clear_clicked: st.session_state["home_findings_by_file"] = {} st.rerun() if run_clicked: from src.audit import log_event log_event( "analyze", f"Run analysis clicked on {len(pending)} file(s)", files=list(pending), ) progress = st.progress(0.0, text=t("upload.scanning")) for i, name in enumerate(pending, start=1): stashed = _StashedUpload(name, home_uploads[name]["bytes"]) findings_by_file[name] = _run_analysis_on_upload(stashed) progress.progress(i / len(pending), text=name) st.session_state["home_findings_by_file"] = findings_by_file progress.empty() st.rerun() if findings_by_file: st.divider() # Preserve the upload-stash order so the user sees results in # the same order they appear in the file list above. for name in home_uploads: if name not in findings_by_file: continue findings = findings_by_file[name] with st.container(border=True): if not findings: st.markdown(f"### 📄 {name}") st.success(t("findings.none")) else: render_findings_panel( findings, header=f"📄 {name}", key_namespace=name, ) # TEMP: end-of-content marker — confirmed at the true bottom of # the home page's main content. The user reports content scrolls # *behind* the offending white bar, so the bar is fixed-positioned. # Only ``#datatools-sticky-footer`` is fixed at the bottom per # our CSS, but the user already confirmed the sticky footer is # NOT the offending bar. So there's a fixed element we haven't # accounted for. The JS below outlines EVERY fixed/sticky element # in the parent document with a labelled colored border so we can # see exactly what's overlaying scrolled content. st.markdown( '
' '◀ CLAUDE TEST #3 — END OF MAIN CONTENT ▶' '
', unsafe_allow_html=True, ) st.iframe( """ """, height=1, )