"""Home-page renderer extracted into its own module.
This used to live inside ``src/gui/app.py`` as a local function. Pulling
it out into a side-effect-free module lets the ``back_to_home_link``
helper (in ``components/_legacy.py``) import the home callable to pass
into ``st.switch_page`` — without re-running ``app.py``'s navigation
setup, which would itself blow up because tool pages have a different
"main script" context that breaks the registry's relative ``pages/…``
paths.
Keep this module imports-light: nothing that runs Streamlit commands
at module top level, nothing that triggers config loads. Just the
``_home_page`` callable.
"""
from __future__ import annotations
import streamlit as st
class _StashedUpload:
"""Duck-types Streamlit's ``UploadedFile`` so ``_run_analysis_on_upload``
accepts entries restored from session-state without changes. Exposes
``.name``, ``.size``, and ``.getvalue()`` — the contract used by the
analyzer's read path.
"""
__slots__ = ("name", "size", "_data")
def __init__(self, name: str, data: bytes) -> None:
self.name = name
self.size = len(data)
self._data = data
def getvalue(self) -> bytes:
return self._data
def _sync_uploader_to_home_uploads() -> None:
"""``on_change`` callback for the home-page file_uploader.
Reconciles ``home_uploads`` (our persistent stash) with the widget's
current value: adds newly-uploaded files, and drops files the user
explicitly removed via the widget's built-in "✕" button. Per
Streamlit semantics ``on_change`` only runs for user-initiated
value changes, so the navigation-induced ``[]`` reset never reaches
here — the stash survives intact across page switches.
"""
from src.audit import log_event
widget_files = st.session_state.get("home_upload") or []
home_uploads: dict = st.session_state.setdefault("home_uploads", {})
findings: dict = st.session_state.setdefault("home_findings_by_file", {})
widget_names = {f.name for f in widget_files}
for f in widget_files:
if f.name not in home_uploads:
home_uploads[f.name] = {"bytes": f.getvalue(), "size": f.size}
log_event("upload", f"Uploaded {f.name}", filename=f.name, bytes=f.size)
for name in list(home_uploads.keys()):
if name not in widget_names:
del home_uploads[name]
findings.pop(name, None)
log_event("upload", f"Removed {name}", filename=name)
if st.session_state.get("home_uploaded_name") == name:
st.session_state.pop("home_uploaded_name", None)
st.session_state.pop("home_uploaded_size", None)
st.session_state.pop("home_uploaded_bytes", None)
st.session_state["home_uploads"] = home_uploads
st.session_state["home_findings_by_file"] = findings
def _home_page() -> None:
"""Render the home page — multi-file upload + per-file analysis.
Uploaded files live in ``st.session_state["home_uploads"]`` (a
dict keyed by filename), NOT in the widget's transient state.
Streamlit's ``st.file_uploader`` widget gets unmounted when the
user navigates away to a tool page, and its ``UploadedFile``
objects don't always re-attach on remount — so we capture the
bytes into our own session-state stash on first sight and treat
that stash as the source of truth for everything downstream
(active-file pickup, analysis, findings rendering).
Removing a file: per-row "✕" buttons next to each uploaded
filename. Clearing findings: the "Clear results" button only
wipes the analysis cache, not the upload stash — the files
persist until the user explicitly removes them.
"""
from src.gui.components import (
hide_streamlit_chrome,
render_findings_panel,
render_sticky_footer,
)
from src.gui.components._legacy import _run_analysis_on_upload
from src.i18n import t
st.set_page_config(
page_title=t("home.page_title"),
page_icon="🧹",
layout="wide",
)
hide_streamlit_chrome()
render_sticky_footer()
st.title(t("home.title"))
st.caption(t("home.caption"))
st.divider()
st.markdown(f"### {t('upload.heading')}")
st.caption(t("upload.intro_multi"))
# Source of truth for uploaded files. dict[name -> {"bytes", "size"}].
home_uploads: dict = st.session_state.setdefault("home_uploads", {})
# File uploader — syncs into home_uploads via on_change. We deliberately
# do NOT merge widget state into home_uploads at render time: navigation
# can remount the widget with value ``[]``, and a render-time merge
# would mistakenly leave home_uploads untouched while the user thinks
# they're looking at empty state.
#
# ``on_change`` fires ONLY on user-initiated value changes (uploads
# and the widget's built-in "✕" remove). It does NOT fire on the
# remount-induced reset. That lets us treat the callback as ground
# truth for both adds AND removes — fixing the previous bug where
# the widget's "✕" appeared to do nothing because the file persisted
# in home_uploads and immediately re-rendered in the list below.
st.file_uploader(
t("upload.uploader_label_multi"),
type=["csv", "tsv", "xlsx", "xls"],
accept_multiple_files=True,
key="home_upload",
help=t("upload.uploader_help"),
on_change=_sync_uploader_to_home_uploads,
)
# Persistent file list with per-file remove buttons. We render this
# ourselves rather than trusting Streamlit's widget chrome because
# the widget's "✕" only mutates widget-state, leaving home_uploads
# out of sync.
#
# Two-phase click capture pattern (avoids the "hit-or-miss" click
# losses we had previously):
#
# 1. ``st.button(key=stable_hash)`` returns True on the rerun where
# it was clicked. We use a sha1 hash of the filename as the key
# so it's identifier-safe regardless of spaces / dots / unicode
# in the file name — Streamlit's widget-identity hashing on raw
# filenames was the root cause of inconsistent removals.
# 2. Inside a single pass we collect WHICH file to remove (if any),
# then mutate state ONCE after the loop and rerun. Mutating mid
# -loop while continuing to render other buttons risked
# interleaving widget-key updates with state changes.
if home_uploads:
import hashlib
st.markdown("**Imported files**")
to_remove: str | None = None
for name in list(home_uploads.keys()):
digest = hashlib.sha1(
name.encode("utf-8"), usedforsecurity=False,
).hexdigest()[:10]
col_file, col_remove = st.columns([8, 1])
col_file.markdown(
f"📄 `{name}` "
f""
f"({home_uploads[name]['size']:,} bytes)",
unsafe_allow_html=True,
)
if col_remove.button(
"Remove",
key=f"_home_remove_{digest}",
help=f"Remove {name}",
type="secondary",
width="stretch",
):
to_remove = name
if to_remove is not None:
from src.audit import log_event
log_event(
"upload",
f"Removed {to_remove}",
filename=to_remove,
)
del home_uploads[to_remove]
# Drop any findings/results tied to the removed file.
findings_by_file_drop = st.session_state.get(
"home_findings_by_file", {}
)
findings_by_file_drop.pop(to_remove, None)
st.session_state["home_uploads"] = home_uploads
st.session_state["home_findings_by_file"] = findings_by_file_drop
# If we just removed the active upload, also clear the
# singular ``home_uploaded_*`` keys so tool pages don't
# pick up stale bytes; the next render will repopulate
# them from whatever file is now first.
if st.session_state.get("home_uploaded_name") == to_remove:
st.session_state.pop("home_uploaded_name", None)
st.session_state.pop("home_uploaded_size", None)
st.session_state.pop("home_uploaded_bytes", None)
st.rerun()
if not home_uploads:
st.info(t("upload.empty_state"))
return
# Expose the first uploaded file via the singular ``home_uploaded_*``
# session keys so tool pages reached via "Open " still find an
# active upload through ``pickup_or_upload``.
first_name = next(iter(home_uploads))
first_meta = home_uploads[first_name]
if (
st.session_state.get("home_uploaded_name") != first_name
or st.session_state.get("home_uploaded_size") != first_meta["size"]
):
st.session_state["home_uploaded_name"] = first_name
st.session_state["home_uploaded_size"] = first_meta["size"]
st.session_state["home_uploaded_bytes"] = first_meta["bytes"]
# Findings cache — drop entries whose underlying file is no longer
# in the stash (e.g. user just clicked "✕").
findings_by_file: dict = st.session_state.setdefault(
"home_findings_by_file", {}
)
findings_by_file = {
name: result for name, result in findings_by_file.items()
if name in home_uploads
}
st.session_state["home_findings_by_file"] = findings_by_file
pending = [name for name in home_uploads if name not in findings_by_file]
col_run, col_clear, _ = st.columns([1, 1, 4])
with col_run:
run_clicked = st.button(
t("upload.run_button"),
type="primary",
key="home_run_analysis",
disabled=not pending,
width="stretch",
)
with col_clear:
clear_clicked = st.button(
t("upload.clear_results"),
key="home_clear_results",
disabled=not findings_by_file,
width="stretch",
)
if clear_clicked:
st.session_state["home_findings_by_file"] = {}
st.rerun()
if run_clicked:
from src.audit import log_event
log_event(
"analyze",
f"Run analysis clicked on {len(pending)} file(s)",
files=list(pending),
)
progress = st.progress(0.0, text=t("upload.scanning"))
for i, name in enumerate(pending, start=1):
stashed = _StashedUpload(name, home_uploads[name]["bytes"])
findings_by_file[name] = _run_analysis_on_upload(stashed)
progress.progress(i / len(pending), text=name)
st.session_state["home_findings_by_file"] = findings_by_file
progress.empty()
st.rerun()
if findings_by_file:
st.divider()
# Preserve the upload-stash order so the user sees results in
# the same order they appear in the file list above.
for name in home_uploads:
if name not in findings_by_file:
continue
findings = findings_by_file[name]
with st.container(border=True):
if not findings:
st.markdown(f"### 📄 {name}")
st.success(t("findings.none"))
else:
render_findings_panel(
findings,
header=f"📄 {name}",
key_namespace=name,
)