datatools-dev/src/gui/_home.py

"""Home-page renderer extracted into its own module.

This used to live inside ``src/gui/app.py`` as a local function. Pulling
it out into a side-effect-free module lets the ``back_to_home_link``
helper (in ``components/_legacy.py``) import the home callable to pass
into ``st.switch_page`` — without re-running ``app.py``'s navigation
setup, which would itself blow up because tool pages have a different
"main script" context that breaks the registry's relative ``pages/…``
paths.

Keep this module imports-light: nothing that runs Streamlit commands
at module top level, nothing that triggers config loads. Just the
``_home_page`` callable.
"""

from __future__ import annotations

import streamlit as st


class _StashedUpload:
    """Duck-types Streamlit's ``UploadedFile`` so ``_run_analysis_on_upload``
    accepts entries restored from session-state without changes. Exposes
    ``.name``, ``.size``, and ``.getvalue()`` — the contract used by the
    analyzer's read path.
    """

    __slots__ = ("name", "size", "_data")

    def __init__(self, name: str, data: bytes) -> None:
        self.name = name
        self.size = len(data)
        self._data = data

    def getvalue(self) -> bytes:
        return self._data


def _home_page() -> None:
    """Render the home page — multi-file upload + per-file analysis.

    Uploaded files live in ``st.session_state["home_uploads"]`` (a
    dict keyed by filename), NOT in the widget's transient state.
    Streamlit's ``st.file_uploader`` widget gets unmounted when the
    user navigates away to a tool page, and its ``UploadedFile``
    objects don't always re-attach on remount — so we capture the
    bytes into our own session-state stash on first sight and treat
    that stash as the source of truth for everything downstream
    (active-file pickup, analysis, findings rendering).

    Removing a file: per-row "✕" buttons next to each uploaded
    filename. Clearing findings: the "Clear results" button only
    wipes the analysis cache, not the upload stash — the files
    persist until the user explicitly removes them.
    """
    from src.gui.components import hide_streamlit_chrome, render_findings_panel
    from src.gui.components._legacy import _run_analysis_on_upload
    from src.i18n import t

    st.set_page_config(
        page_title=t("home.page_title"),
        page_icon="🧹",
        layout="wide",
    )
    hide_streamlit_chrome()

    st.title(t("home.title"))
    st.caption(t("home.caption"))
    st.divider()

    st.markdown(f"### {t('upload.heading')}")
    st.caption(t("upload.intro_multi"))

    # Source of truth for uploaded files. dict[name -> {"bytes", "size"}].
    home_uploads: dict = st.session_state.setdefault("home_uploads", {})

    # File uploader — for ADDING new files only. On every render we
    # merge widget-returned files INTO home_uploads but never remove
    # via the widget. (Widget state can return ``[]`` after navigation,
    # which we deliberately don't treat as "user cleared their files".)
    new_files = st.file_uploader(
        t("upload.uploader_label_multi"),
        type=["csv", "tsv", "xlsx", "xls"],
        accept_multiple_files=True,
        key="home_upload",
        help=t("upload.uploader_help"),
    )
    if new_files:
        from src.audit import log_event
        changed = False
        for f in new_files:
            if f.name not in home_uploads:
                home_uploads[f.name] = {
                    "bytes": f.getvalue(),
                    "size": f.size,
                }
                changed = True
                log_event(
                    "upload",
                    f"Uploaded {f.name}",
                    filename=f.name,
                    bytes=f.size,
                )
        if changed:
            st.session_state["home_uploads"] = home_uploads

    # Persistent file list with per-file remove buttons. We render this
    # ourselves rather than trusting Streamlit's widget chrome because
    # the widget's "✕" only mutates widget-state, leaving home_uploads
    # out of sync.
    #
    # Two-phase click capture pattern (avoids the "hit-or-miss" click
    # losses we had previously):
    #
    # 1. ``st.button(key=stable_hash)`` returns True on the rerun where
    #    it was clicked. We use a sha1 hash of the filename as the key
    #    so it's identifier-safe regardless of spaces / dots / unicode
    #    in the file name — Streamlit's widget-identity hashing on raw
    #    filenames was the root cause of inconsistent removals.
    # 2. Inside a single pass we collect WHICH file to remove (if any),
    #    then mutate state ONCE after the loop and rerun. Mutating mid
    #    -loop while continuing to render other buttons risked
    #    interleaving widget-key updates with state changes.
    if home_uploads:
        import hashlib
        st.markdown("**Uploaded files**")
        to_remove: str | None = None
        for name in list(home_uploads.keys()):
            digest = hashlib.sha1(
                name.encode("utf-8"), usedforsecurity=False,
            ).hexdigest()[:10]
            col_file, col_remove = st.columns([8, 1])
            col_file.markdown(
                f"📄 `{name}` &nbsp; "
                f"<span style='opacity:0.6'>"
                f"({home_uploads[name]['size']:,} bytes)</span>",
                unsafe_allow_html=True,
            )
            if col_remove.button(
                "Remove",
                key=f"_home_remove_{digest}",
                help=f"Remove {name}",
                type="secondary",
                use_container_width=True,
            ):
                to_remove = name

        if to_remove is not None:
            from src.audit import log_event
            log_event(
                "upload",
                f"Removed {to_remove}",
                filename=to_remove,
            )
            del home_uploads[to_remove]
            # Drop any findings/results tied to the removed file.
            findings_by_file_drop = st.session_state.get(
                "home_findings_by_file", {}
            )
            findings_by_file_drop.pop(to_remove, None)
            st.session_state["home_uploads"] = home_uploads
            st.session_state["home_findings_by_file"] = findings_by_file_drop
            # If we just removed the active upload, also clear the
            # singular ``home_uploaded_*`` keys so tool pages don't
            # pick up stale bytes; the next render will repopulate
            # them from whatever file is now first.
            if st.session_state.get("home_uploaded_name") == to_remove:
                st.session_state.pop("home_uploaded_name", None)
                st.session_state.pop("home_uploaded_size", None)
                st.session_state.pop("home_uploaded_bytes", None)
            st.rerun()

    if not home_uploads:
        st.info(t("upload.empty_state"))
        return

    # Expose the first uploaded file via the singular ``home_uploaded_*``
    # session keys so tool pages reached via "Open <Tool>" still find an
    # active upload through ``pickup_or_upload``.
    first_name = next(iter(home_uploads))
    first_meta = home_uploads[first_name]
    if (
        st.session_state.get("home_uploaded_name") != first_name
        or st.session_state.get("home_uploaded_size") != first_meta["size"]
    ):
        st.session_state["home_uploaded_name"] = first_name
        st.session_state["home_uploaded_size"] = first_meta["size"]
        st.session_state["home_uploaded_bytes"] = first_meta["bytes"]

    # Findings cache — drop entries whose underlying file is no longer
    # in the stash (e.g. user just clicked "✕").
    findings_by_file: dict = st.session_state.setdefault(
        "home_findings_by_file", {}
    )
    findings_by_file = {
        name: result for name, result in findings_by_file.items()
        if name in home_uploads
    }
    st.session_state["home_findings_by_file"] = findings_by_file

    pending = [name for name in home_uploads if name not in findings_by_file]

    col_run, col_clear, _ = st.columns([1, 1, 4])
    with col_run:
        run_clicked = st.button(
            t("upload.run_button"),
            type="primary",
            key="home_run_analysis",
            disabled=not pending,
            use_container_width=True,
        )
    with col_clear:
        clear_clicked = st.button(
            t("upload.clear_results"),
            key="home_clear_results",
            disabled=not findings_by_file,
            use_container_width=True,
        )

    if clear_clicked:
        st.session_state["home_findings_by_file"] = {}
        st.rerun()

    if run_clicked:
        from src.audit import log_event
        log_event(
            "analyze",
            f"Run analysis clicked on {len(pending)} file(s)",
            files=list(pending),
        )
        progress = st.progress(0.0, text=t("upload.scanning"))
        for i, name in enumerate(pending, start=1):
            stashed = _StashedUpload(name, home_uploads[name]["bytes"])
            findings_by_file[name] = _run_analysis_on_upload(stashed)
            progress.progress(i / len(pending), text=name)
        st.session_state["home_findings_by_file"] = findings_by_file
        progress.empty()
        st.rerun()

    if findings_by_file:
        st.divider()
        # Preserve the upload-stash order so the user sees results in
        # the same order they appear in the file list above.
        for name in home_uploads:
            if name not in findings_by_file:
                continue
            findings = findings_by_file[name]
            with st.container(border=True):
                if not findings:
                    st.markdown(f"### 📄 {name}")
                    st.success(t("findings.none"))
                else:
                    render_findings_panel(findings, header=f"📄 {name}")

    st.divider()
    st.caption(t("chrome.footer"))