fix(home): persist upload list across page navigation

Reported: clicking "Back to Home" from a tool page returned the user to an empty home — their previously-uploaded files were gone. Root cause: Streamlit's ``st.file_uploader`` widget state does not reliably survive ``st.switch_page``. The widget gets unmounted on navigation, and its ``UploadedFile`` objects don't always re-attach on remount. The home page was treating the widget's return value as the source of truth, so after navigation the list was empty. Fix: introduce a session-state stash keyed by filename (``home_uploads: dict[str, {"bytes": bytes, "size": int}]``) and treat it as the source of truth for everything downstream — the active-file pickup keys for tool pages, the per-file findings cache, and the rendered file list. The widget is reduced to its narrow role of capturing NEW uploads, which we merge into the stash without ever removing. Per-file remove: a "✕" button next to each filename drops just that file (and its findings). The widget's own "✕" is bypassed by our rendering, since trusting it would let the widget's state diverge from the stash. Clear-results button is unchanged: it wipes only the analysis cache, leaving uploaded files intact (per the user's "persistent until cleared" requirement — removal is per-file via "✕"). Tool-page compatibility: the singular ``home_uploaded_{name,size, bytes}`` keys still get populated from the first entry in the stash on every render, so ``pickup_or_upload`` on a tool page keeps finding the active upload. When the user removes the active file, those keys are cleared so the next render repopulates from whatever file is now first. ``_StashedUpload`` is a small duck type ( ``.name``, ``.size``, ``.getvalue()`` ) so ``_run_analysis_on_upload`` accepts entries restored from the stash without changes. 2220 tests pass. Smoke-verified via AppTest: pre-stashed ``home_uploads`` renders the file list with per-file remove buttons, and the persistent state survives a simulated navigation round-trip. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-17 00:04:12 +00:00
parent 21fd8a4cd7
commit ecfc52499f
1 changed files with 116 additions and 33 deletions
--- a/src/gui/_home.py
+++ b/src/gui/_home.py
@@ -18,8 +18,41 @@ from __future__ import annotations
 import streamlit as st
 class _StashedUpload:
    """Duck-types Streamlit's ``UploadedFile`` so ``_run_analysis_on_upload``
    accepts entries restored from session-state without changes. Exposes
    ``.name``, ``.size``, and ``.getvalue()`` — the contract used by the
    analyzer's read path.
    """
    __slots__ = ("name", "size", "_data")
    def __init__(self, name: str, data: bytes) -> None:
        self.name = name
        self.size = len(data)
        self._data = data
    def getvalue(self) -> bytes:
        return self._data
 def _home_page() -> None:
-    """Render the home page — multi-file upload + per-file analysis."""
+    """Render the home page — multi-file upload + per-file analysis.
    Uploaded files live in ``st.session_state["home_uploads"]`` (a
    dict keyed by filename), NOT in the widget's transient state.
    Streamlit's ``st.file_uploader`` widget gets unmounted when the
    user navigates away to a tool page, and its ``UploadedFile``
    objects don't always re-attach on remount — so we capture the
    bytes into our own session-state stash on first sight and treat
    that stash as the source of truth for everything downstream
    (active-file pickup, analysis, findings rendering).
    Removing a file: per-row "✕" buttons next to each uploaded
    filename. Clearing findings: the "Clear results" button only
    wipes the analysis cache, not the upload stash — the files
    persist until the user explicitly removes them.
    """
    from src.gui.components import hide_streamlit_chrome, render_findings_panel
    from src.gui.components._legacy import _run_analysis_on_upload
    from src.i18n import t
@@ -38,45 +71,98 @@ def _home_page() -> None:
    st.markdown(f"### {t('upload.heading')}")
    st.caption(t("upload.intro_multi"))
-    uploaded_files = st.file_uploader(
+    # Source of truth for uploaded files. dict[name -> {"bytes", "size"}].
    home_uploads: dict = st.session_state.setdefault("home_uploads", {})
    # File uploader — for ADDING new files only. On every render we
    # merge widget-returned files INTO home_uploads but never remove
    # via the widget. (Widget state can return ``[]`` after navigation,
    # which we deliberately don't treat as "user cleared their files".)
    new_files = st.file_uploader(
        t("upload.uploader_label_multi"),
        type=["csv", "tsv", "xlsx", "xls"],
        accept_multiple_files=True,
        key="home_upload",
        help=t("upload.uploader_help"),
    )
    if new_files:
        changed = False
        for f in new_files:
            if f.name not in home_uploads:
                home_uploads[f.name] = {
                    "bytes": f.getvalue(),
                    "size": f.size,
                }
                changed = True
        if changed:
            st.session_state["home_uploads"] = home_uploads
-    if not uploaded_files:
+    # Persistent file list with per-file remove buttons. We render this
    # ourselves rather than trusting Streamlit's widget chrome because
    # the widget's "✕" only mutates widget-state, leaving home_uploads
    # out of sync.
    if home_uploads:
        st.markdown("**Uploaded files**")
        for name in list(home_uploads.keys()):
            col_file, col_remove = st.columns([12, 1])
            col_file.markdown(
                f"📄 `{name}` &nbsp; "
                f"<span style='opacity:0.6'>"
                f"({home_uploads[name]['size']:,} bytes)</span>",
                unsafe_allow_html=True,
            )
            if col_remove.button(
                "✕",
                key=f"_home_remove_{name}",
                help=f"Remove {name}",
            ):
                del home_uploads[name]
                # Drop any findings/results tied to the removed file.
                findings_by_file_drop = st.session_state.get(
                    "home_findings_by_file", {}
                )
                findings_by_file_drop.pop(name, None)
                st.session_state["home_uploads"] = home_uploads
                st.session_state["home_findings_by_file"] = findings_by_file_drop
                # If we just removed the active upload, also clear the
                # singular ``home_uploaded_*`` keys so tool pages don't
                # pick up stale bytes; the next render will repopulate
                # them from whatever file is now first.
                if st.session_state.get("home_uploaded_name") == name:
                    st.session_state.pop("home_uploaded_name", None)
                    st.session_state.pop("home_uploaded_size", None)
                    st.session_state.pop("home_uploaded_bytes", None)
                st.rerun()
    if not home_uploads:
        st.info(t("upload.empty_state"))
        return
-    # Keep tool pages working: they consume a single ``home_uploaded_*``
+    # Expose the first uploaded file via the singular ``home_uploaded_*``
-    # set via ``pickup_or_upload``. Expose the first uploaded file as
+    # session keys so tool pages reached via "Open <Tool>" still find an
-    # the "active" upload for that contract; the rest live alongside
+    # active upload through ``pickup_or_upload``.
-    # for per-file analysis on this page.
+    first_name = next(iter(home_uploads))
-    first = uploaded_files[0]
+    first_meta = home_uploads[first_name]
    if (
-        st.session_state.get("home_uploaded_name") != first.name
+        st.session_state.get("home_uploaded_name") != first_name
-        or st.session_state.get("home_uploaded_size") != first.size
+        or st.session_state.get("home_uploaded_size") != first_meta["size"]
    ):
-        st.session_state["home_uploaded_name"] = first.name
+        st.session_state["home_uploaded_name"] = first_name
-        st.session_state["home_uploaded_size"] = first.size
+        st.session_state["home_uploaded_size"] = first_meta["size"]
-        st.session_state["home_uploaded_bytes"] = first.getvalue()
+        st.session_state["home_uploaded_bytes"] = first_meta["bytes"]
-    # Per-file findings live in a dict so removing a file from the
+    # Findings cache — drop entries whose underlying file is no longer
-    # uploader (Streamlit's "x" button) drops its results too. We only
+    # in the stash (e.g. user just clicked "✕").
    # re-analyze files we haven't already analyzed in this session.
    findings_by_file: dict = st.session_state.setdefault(
        "home_findings_by_file", {}
    )
    current_names = {f.name for f in uploaded_files}
    findings_by_file = {
        name: result for name, result in findings_by_file.items()
-        if name in current_names
+        if name in home_uploads
    }
    st.session_state["home_findings_by_file"] = findings_by_file
-    pending = [f for f in uploaded_files if f.name not in findings_by_file]
+    pending = [name for name in home_uploads if name not in findings_by_file]
    col_run, col_clear, _ = st.columns([1, 1, 4])
    with col_run:
@@ -101,31 +187,28 @@ def _home_page() -> None:
    if run_clicked:
        progress = st.progress(0.0, text=t("upload.scanning"))
-        for i, f in enumerate(pending, start=1):
+        for i, name in enumerate(pending, start=1):
-            findings_by_file[f.name] = _run_analysis_on_upload(f)
+            stashed = _StashedUpload(name, home_uploads[name]["bytes"])
-            progress.progress(i / len(pending), text=f"{f.name}")
+            findings_by_file[name] = _run_analysis_on_upload(stashed)
            progress.progress(i / len(pending), text=name)
        st.session_state["home_findings_by_file"] = findings_by_file
        progress.empty()
        st.rerun()
    if findings_by_file:
        st.divider()
-        # Preserve uploader order so the user sees results in the same
+        # Preserve the upload-stash order so the user sees results in
-        # order they appear in the file list above. Each file's findings
+        # the same order they appear in the file list above.
-        # render via ``render_findings_panel`` so the per-tool grouping
+        for name in home_uploads:
-        # (and the "Open <Tool>" jump link under each group) is kept —
+            if name not in findings_by_file:
        # that's how the user reaches the cleaner that fixes a specific
        # finding without hunting through the sidebar.
        for f in uploaded_files:
            if f.name not in findings_by_file:
                continue
-            findings = findings_by_file[f.name]
+            findings = findings_by_file[name]
            with st.container(border=True):
                if not findings:
-                    st.markdown(f"### 📄 {f.name}")
+                    st.markdown(f"### 📄 {name}")
                    st.success(t("findings.none"))
                else:
-                    render_findings_panel(findings, header=f"📄 {f.name}")
+                    render_findings_panel(findings, header=f"📄 {name}")
    st.divider()
    st.caption(t("chrome.footer"))