The prior attempt used data-testid=stSidebarNavSectionHeader, which is not what Streamlit 1.57 emits — the correct testid is stNavSectionHeader (verified against the bundled JS in streamlit/static/static/js/). The section header is also a <div> with onClick, not a <button>, and the React component keeps the expanded state in a prop without surfacing aria-expanded on the DOM. Pure CSS can therefore neither locate the header nor switch the glyph by state, which is why the chevron was unchanged in the rendered UI. Switch strategies: - CSS now targets the correct stNavSectionHeader / stIconMaterial selectors, drops the Material Symbols font from the icon span, and restyles it so a plain ascii character reads as proper typography (size, weight, color, hover). - Add _SWAP_NAV_SECTION_INDICATOR_JS — small inline script that rewrites the icon's text node from "expand_more"/"expand_less" to "+"/"−" (U+2212), throttled via requestAnimationFrame, re-applied on every DOM mutation by a MutationObserver. Bundled into the same iframe injection as the existing brand/upload/findings scripts. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
3680 lines
136 KiB
Python
3680 lines
136 KiB
Python
"""Reusable Streamlit widgets for the DataTools GUI."""
|
||
|
||
from __future__ import annotations
|
||
|
||
import io
|
||
import os
|
||
import sys
|
||
import threading
|
||
import time
|
||
from typing import Optional
|
||
|
||
import pandas as pd
|
||
import streamlit as st
|
||
|
||
from src.i18n import t as _t
|
||
from src.core.dedup import (
|
||
Algorithm,
|
||
ColumnMatchStrategy,
|
||
DeduplicationResult,
|
||
MatchResult,
|
||
MatchStrategy,
|
||
SurvivorRule,
|
||
)
|
||
from src.core.config import (
|
||
ColumnStrategyConfig,
|
||
DeduplicationConfig,
|
||
StrategyConfig,
|
||
)
|
||
from src.core.normalizers import NormalizerType
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# App chrome — hide Streamlit default UI for app-like feel
|
||
# ---------------------------------------------------------------------------
|
||
|
||
_HIDE_CHROME_CSS = """
|
||
<style>
|
||
/* Make the Streamlit header transparent and out of the way, but DO NOT
|
||
`display: none` it — the sidebar's collapsed-state expand button is
|
||
anchored in the header region, and removing the header makes a
|
||
collapsed sidebar impossible to reopen. */
|
||
header[data-testid="stHeader"] {
|
||
background: transparent !important;
|
||
height: 0 !important;
|
||
}
|
||
/* Hide the noisy Streamlit-shipped icon buttons in the header band
|
||
(hamburger menu, deploy button, status / running indicator). We
|
||
deliberately do NOT hide ``stToolbar`` or ``stToolbarActions`` as
|
||
containers — those wrap ``stExpandSidebarButton`` which is the
|
||
ONLY path back to an expanded sidebar after the user collapses it.
|
||
``toolbarMode = "viewer"`` already suppresses most of these icons;
|
||
the CSS belt-and-suspenders the visibility for newer Streamlit
|
||
releases that keep emitting them with inline styles. */
|
||
#MainMenu,
|
||
[data-testid="stMainMenu"],
|
||
[data-testid="stAppDeployButton"],
|
||
[data-testid="stStatusWidget"],
|
||
[data-testid="stDecoration"] {
|
||
display: none !important;
|
||
}
|
||
/* Keep the sidebar expand control visible and clickable above page content. */
|
||
[data-testid="stSidebarCollapsedControl"] {
|
||
display: flex !important;
|
||
visibility: visible !important;
|
||
z-index: 999 !important;
|
||
}
|
||
/* Hide footer */
|
||
footer {
|
||
display: none !important;
|
||
}
|
||
/* Hide the Activate + Close entries from the sidebar nav. Both
|
||
pages stay registered (so /activate and /close remain
|
||
URL-routable) but are reached from the sticky-footer Help
|
||
popover instead of the sidebar. They are grouped under the
|
||
unlabeled section alongside Home in ``app.py`` so hiding the
|
||
two links here leaves no orphan section header behind. We
|
||
target the LinkContainer (Streamlit's per-entry wrapper) so the
|
||
list item collapses, not just the inner anchor — otherwise the
|
||
container's spacing would still occupy a row. */
|
||
[data-testid="stSidebarNav"] [data-testid="stSidebarNavLinkContainer"]:has(a[href$="/activate"]),
|
||
[data-testid="stSidebarNav"] [data-testid="stSidebarNavLinkContainer"]:has(a[href$="/activate/"]),
|
||
[data-testid="stSidebarNav"] [data-testid="stSidebarNavLinkContainer"]:has(a[href$="/logs"]),
|
||
[data-testid="stSidebarNav"] [data-testid="stSidebarNavLinkContainer"]:has(a[href$="/logs/"]),
|
||
[data-testid="stSidebarNav"] [data-testid="stSidebarNavLinkContainer"]:has(a[href$="/close"]),
|
||
[data-testid="stSidebarNav"] [data-testid="stSidebarNavLinkContainer"]:has(a[href$="/close/"]) {
|
||
display: none !important;
|
||
}
|
||
/* Defensive fallback for browsers without :has() support — at
|
||
least hide the anchor itself so the entry isn't clickable. */
|
||
[data-testid="stSidebarNav"] a[href$="/activate"],
|
||
[data-testid="stSidebarNav"] a[href$="/activate/"],
|
||
[data-testid="stSidebarNav"] a[href$="/logs"],
|
||
[data-testid="stSidebarNav"] a[href$="/logs/"],
|
||
[data-testid="stSidebarNav"] a[href$="/close"],
|
||
[data-testid="stSidebarNav"] a[href$="/close/"] {
|
||
display: none !important;
|
||
}
|
||
/* Reclaim top padding lost from hidden header. Streamlit's default
|
||
block-container padding-top is ~6rem (room for the header it ships).
|
||
We hide the header so reclaim that space — the page title should sit
|
||
close to the top edge. Slim the bottom too — Streamlit's default
|
||
leaves several rems below the last widget.
|
||
The testid was renamed from ``stAppViewBlockContainer`` (legacy) to
|
||
``stMainBlockContainer`` in the current Streamlit release; match
|
||
both so the rule keeps working across versions. */
|
||
.stAppViewBlockContainer,
|
||
[data-testid="stAppViewBlockContainer"],
|
||
.stMainBlockContainer,
|
||
[data-testid="stMainBlockContainer"] {
|
||
padding-top: 0.5rem !important;
|
||
/* The fixed Help/Close footer sits over the bottom of the
|
||
viewport (~36px tall) — the block container needs enough
|
||
bottom padding that the LAST line of content clears it cleanly
|
||
when scrolled all the way down. 4rem turned out to be just
|
||
enough to graze the footer's top; 7rem buys clear breathing
|
||
room on every page including the long tool pages. */
|
||
padding-bottom: 7rem !important;
|
||
}
|
||
/* Scale content to fit app window */
|
||
.stApp {
|
||
zoom: 0.85;
|
||
}
|
||
|
||
/* ---------- Compact-spacing layer ---------- */
|
||
/* Streamlit ships generous vertical rhythm (~1rem gap between every
|
||
block, 1.5rem+ above each heading, 1rem on dividers). For a desktop
|
||
data app that's a lot of empty space. Tighten the gaps without
|
||
making the layout look cramped. */
|
||
|
||
[data-testid="stVerticalBlock"] { gap: 0.5rem !important; }
|
||
[data-testid="stHorizontalBlock"] { gap: 0.5rem !important; }
|
||
|
||
/* Headings — tighter top space + a hair less below. */
|
||
.stApp h1 { margin-top: 0.25rem !important; margin-bottom: 0.5rem !important; }
|
||
.stApp h2 { margin-top: 0.5rem !important; margin-bottom: 0.4rem !important; }
|
||
.stApp h3 { margin-top: 0.4rem !important; margin-bottom: 0.3rem !important; }
|
||
.stApp h4 { margin-top: 0.3rem !important; margin-bottom: 0.25rem !important; }
|
||
|
||
/* st.divider() — Streamlit's default hr has 1rem above and below. */
|
||
[data-testid="stMarkdownContainer"] hr,
|
||
hr { margin-top: 0.4rem !important; margin-bottom: 0.4rem !important; }
|
||
|
||
/* Markdown paragraphs + captions — slim trailing space. */
|
||
[data-testid="stMarkdownContainer"] p { margin-bottom: 0.25rem; }
|
||
[data-testid="stCaption"],
|
||
[data-testid="stCaptionContainer"] { margin-bottom: 0.25rem; }
|
||
|
||
/* Expander header padding — Streamlit's default is roomy. */
|
||
[data-testid="stExpander"] details > summary {
|
||
padding-top: 0.35rem;
|
||
padding-bottom: 0.35rem;
|
||
}
|
||
|
||
/* Buttons / file-uploader / metric tiles — tighter spacing. */
|
||
[data-testid="stButton"],
|
||
[data-testid="stDownloadButton"] { margin-top: 0; margin-bottom: 0; }
|
||
[data-testid="stFileUploader"] { margin-bottom: 0.25rem; }
|
||
[data-testid="stMetric"] {
|
||
padding-top: 0.25rem;
|
||
padding-bottom: 0.25rem;
|
||
}
|
||
</style>
|
||
"""
|
||
|
||
|
||
# Component-level styling that rides on top of the canonical typography
|
||
# + color tokens declared in ``src/gui/theme.py`` (``apply_theme``).
|
||
# This block does NOT redeclare the type scale or the ``--font-sans`` /
|
||
# ``--ink`` etc. variables — that is theme.py's job per
|
||
# ``geist_spec.md`` §9 ("Out of scope: button/input/widget styling.
|
||
# Type only."). Everything below extends the spec with widget chrome
|
||
# (buttons, sidebar, file uploader, expanders, alerts) that the mockup
|
||
# wants but the spec leaves unowned.
|
||
#
|
||
# Reads from theme.py's :root: ``--font-sans``, ``--font-mono``,
|
||
# ``--ink``, ``--ink-secondary``, ``--ink-tertiary``, ``--bg``,
|
||
# ``--surface``, ``--surface-hover``, ``--border``, ``--border-strong``,
|
||
# ``--accent``, ``--accent-hover``, ``--accent-fill``, the severity
|
||
# extensions ``--warn(-fill)`` / ``--info(-fill)`` / ``--success(-fill)``
|
||
# / ``--danger(-fill)``, and the radius scale ``--r-sm/md/lg``.
|
||
_DESIGN_TOKENS_CSS = """
|
||
<style>
|
||
/* ---------- Page surface ---------- */
|
||
body, .stApp {
|
||
background: var(--bg) !important;
|
||
font-family: var(--font-sans) !important;
|
||
color: var(--ink) !important;
|
||
-webkit-font-smoothing: antialiased;
|
||
}
|
||
|
||
/* ---------- Sidebar — cream paper, soft right edge ---------- */
|
||
[data-testid="stSidebar"] {
|
||
background: #f5f4ef !important;
|
||
border-right: 1px solid var(--border) !important;
|
||
}
|
||
[data-testid="stSidebar"] > div:first-child {
|
||
background: #f5f4ef !important;
|
||
}
|
||
|
||
/* Brand block at the top of the sidebar (mockup §brand) — a 28px
|
||
ink-filled rounded square with the wordmark "D" + "DataTools"
|
||
text. Injected into ``[data-testid="stSidebarHeader"]`` by the JS
|
||
below; ``stLogoSpacer`` is hidden so the brand block takes its
|
||
place flush against the left edge of the sidebar header. */
|
||
/* The brand sits next to Streamlit's sidebar collapse button inside
|
||
``stSidebarHeader``. ``flex: 1`` would steal all the horizontal
|
||
space and squash the collapse chevron out of view — once collapsed
|
||
the user would have no way to reopen the sidebar. Keep the brand
|
||
at its natural width and let the header's flex layout leave room
|
||
for the chevron on the right. */
|
||
.dt-brand {
|
||
display: flex !important;
|
||
align-items: center;
|
||
gap: 10px;
|
||
padding: 0 0 0 4px;
|
||
margin: 0 auto 0 0;
|
||
}
|
||
/* Belt-and-suspenders: keep the in-sidebar collapse button + the
|
||
out-of-sidebar collapsed control reachable. The latter is what
|
||
appears on the page edge once the sidebar slides shut. */
|
||
[data-testid="stSidebarCollapseButton"],
|
||
[data-testid="stSidebarCollapseButton"] button {
|
||
display: inline-flex !important;
|
||
visibility: visible !important;
|
||
opacity: 1 !important;
|
||
}
|
||
[data-testid="stSidebarCollapsedControl"] {
|
||
display: flex !important;
|
||
visibility: visible !important;
|
||
z-index: 9999 !important;
|
||
}
|
||
/* "Letter D (sans)" wordmark per Business/DataTools/app_icons.html
|
||
§03: 28px ink-filled rounded square, cream "D" in Geist 700 with
|
||
-0.04em tracking. Same shape used for the favicon SVG so the chip
|
||
in the sidebar reads as a scaled-up copy of the OS app icon. */
|
||
.dt-brand-mark {
|
||
width: 28px;
|
||
height: 28px;
|
||
border-radius: 7px;
|
||
background: var(--ink);
|
||
display: inline-flex;
|
||
align-items: center;
|
||
justify-content: center;
|
||
color: var(--accent-fill);
|
||
font-family: var(--font-sans);
|
||
font-weight: 700;
|
||
font-size: 16px;
|
||
letter-spacing: -0.04em;
|
||
line-height: 1;
|
||
flex-shrink: 0;
|
||
}
|
||
.dt-brand-name {
|
||
font-family: var(--font-sans);
|
||
font-weight: 600;
|
||
font-size: 15px;
|
||
letter-spacing: -0.02em;
|
||
color: var(--ink);
|
||
line-height: 1.05;
|
||
display: flex;
|
||
flex-direction: column;
|
||
gap: 1px;
|
||
}
|
||
.dt-brand-eyebrow {
|
||
font-size: 9.5px;
|
||
font-weight: 600;
|
||
letter-spacing: 0.14em;
|
||
text-transform: uppercase;
|
||
color: var(--ink-tertiary);
|
||
line-height: 1;
|
||
}
|
||
/* The stock Streamlit logo placeholder takes 100x32 of space; hide
|
||
it so the injected brand has room to breathe. */
|
||
[data-testid="stLogoSpacer"]:not(:has(.dt-brand)) {
|
||
display: none !important;
|
||
}
|
||
|
||
/* Section labels in the page-nav: tiny uppercase tracking — the
|
||
"Eyebrow" row from spec §4. Streamlit renders these as <span> nodes
|
||
with class ``st-emotion-cache-…`` inside ``stSidebarNav`` — class
|
||
hashes are unstable across versions, so we lean on the structural
|
||
position (the bare span / h2 directly inside the nav list) rather
|
||
than emotion classes. */
|
||
[data-testid="stSidebarNav"] h2,
|
||
[data-testid="stSidebarNav"] h3,
|
||
[data-testid="stSidebarNavSeparator"] span,
|
||
[data-testid="stSidebarNavSectionHeader"] {
|
||
font-family: var(--font-sans) !important;
|
||
font-size: 11.5px !important;
|
||
text-transform: uppercase !important;
|
||
letter-spacing: 0.08em !important;
|
||
color: var(--ink-tertiary) !important;
|
||
font-weight: 500 !important;
|
||
padding-top: 14px !important;
|
||
padding-bottom: 4px !important;
|
||
margin: 0 !important;
|
||
}
|
||
|
||
/* Nav items match the sticky-footer Help/Close button style: ink-
|
||
secondary text, transparent surface, soft hover tint, no border or
|
||
active-state pill. Sizes line up with ``.datatools-footer-btn``
|
||
(13px / 500 / 1.3 line-height, 5px×10px padding, 8px icon gap) so
|
||
the sidebar and footer feel like the same family. */
|
||
[data-testid="stSidebarNav"] a[data-testid="stSidebarNavLink"],
|
||
[data-testid="stSidebarNav"] [data-testid="stSidebarNavLinkContainer"] a {
|
||
color: var(--ink-secondary) !important;
|
||
font-size: 13px !important;
|
||
font-weight: 500 !important;
|
||
line-height: 1.3 !important;
|
||
padding: 5px 10px !important;
|
||
gap: 8px !important;
|
||
border: none !important;
|
||
border-radius: var(--r-sm) !important;
|
||
transition: background 0.12s ease, color 0.12s ease;
|
||
}
|
||
[data-testid="stSidebarNav"] li,
|
||
[data-testid="stSidebarNavItems"] > li {
|
||
margin-bottom: 1px !important;
|
||
}
|
||
[data-testid="stSidebarNavSectionHeader"] {
|
||
padding-top: 10px !important;
|
||
padding-bottom: 2px !important;
|
||
}
|
||
[data-testid="stSidebarNav"] a[data-testid="stSidebarNavLink"]:hover,
|
||
[data-testid="stSidebarNav"] [data-testid="stSidebarNavLinkContainer"] a:hover {
|
||
background: rgba(0,0,0,0.04) !important;
|
||
color: var(--ink) !important;
|
||
}
|
||
/* Active item — soft hover-tint background + ink text + heavier
|
||
weight. No white pill, no shadow. Mirrors the footer buttons,
|
||
which carry no special "active" treatment. */
|
||
[data-testid="stSidebarNav"] a[aria-current="page"] {
|
||
background: rgba(0,0,0,0.04) !important;
|
||
color: var(--ink) !important;
|
||
font-weight: 600 !important;
|
||
box-shadow: none !important;
|
||
}
|
||
|
||
/* ---------- Section header expand indicator ----------
|
||
Streamlit's nav section header uses a Material Symbols ligature
|
||
icon (``expand_more`` / ``expand_less``) and does NOT expose
|
||
``aria-expanded`` on the header — the React component keeps that
|
||
state internally. Pure CSS therefore can't switch the glyph based
|
||
on state, so the visible swap is performed by
|
||
``_SWAP_NAV_SECTION_INDICATOR_JS`` (rewrites the icon's text node
|
||
to ``+`` / ``−`` and re-applies on mutation). This block only
|
||
handles the static styling so the rewritten glyph reads as a
|
||
normal typographic plus/minus instead of a Material font ligature
|
||
that would still try to resolve ``+`` as an icon name. */
|
||
[data-testid="stNavSectionHeader"] {
|
||
position: relative !important;
|
||
}
|
||
[data-testid="stNavSectionHeader"] [data-testid="stIconMaterial"] {
|
||
/* Drop the Material Symbols font so the JS-swapped ``+`` / ``−``
|
||
characters render as plain typography. ``font-feature-settings``
|
||
is reset so no ligature kicks in. */
|
||
font-family: var(--font-sans) !important;
|
||
font-feature-settings: normal !important;
|
||
-webkit-font-feature-settings: normal !important;
|
||
-moz-font-feature-settings: normal !important;
|
||
font-weight: 500 !important;
|
||
font-size: 16px !important;
|
||
line-height: 1 !important;
|
||
color: var(--ink-tertiary) !important;
|
||
width: auto !important;
|
||
height: auto !important;
|
||
transition: color 0.15s ease !important;
|
||
}
|
||
[data-testid="stNavSectionHeader"]:hover [data-testid="stIconMaterial"] {
|
||
color: var(--ink) !important;
|
||
}
|
||
|
||
/* Inline + block code → mono with subtle accent chip. theme.py owns
|
||
the family + size; this layer adds the warm-fill background. */
|
||
[data-testid="stMarkdownContainer"] code {
|
||
background: var(--accent-fill) !important;
|
||
color: var(--accent-hover) !important;
|
||
padding: 1px 5px !important;
|
||
border-radius: 4px !important;
|
||
}
|
||
[data-testid="stCode"] pre {
|
||
padding: 12px 14px !important;
|
||
background: var(--surface-hover) !important;
|
||
color: var(--ink) !important;
|
||
border: 1px solid var(--border) !important;
|
||
border-radius: var(--r-md) !important;
|
||
}
|
||
|
||
/* ---------- Buttons — ink primary, outlined secondary ---------- */
|
||
[data-testid="stButton"] button,
|
||
[data-testid="stDownloadButton"] button {
|
||
border-radius: var(--r-md) !important;
|
||
font-family: var(--font-sans) !important;
|
||
font-weight: 500 !important;
|
||
font-size: 13.5px !important;
|
||
letter-spacing: -0.005em !important;
|
||
line-height: 1 !important;
|
||
padding: 9px 16px !important;
|
||
transition: background 0.12s ease, border-color 0.12s ease, color 0.12s ease;
|
||
}
|
||
/* Primary = dark ink (mockup ``.btn-primary``). Color is set on the
|
||
button AND every descendant text node — the inner
|
||
``stMarkdownContainer`` and its ``<p>`` would otherwise pick up
|
||
``color: var(--ink)`` from theme.py's base rule and turn the label
|
||
nearly invisible against the dark background. */
|
||
[data-testid="stButton"] button[kind="primary"],
|
||
[data-testid="stButton"] button[data-testid="stBaseButton-primary"],
|
||
[data-testid="stDownloadButton"] button[kind="primary"] {
|
||
background: var(--ink) !important;
|
||
color: var(--bg) !important;
|
||
border: 1px solid var(--ink) !important;
|
||
}
|
||
[data-testid="stButton"] button[kind="primary"] *,
|
||
[data-testid="stButton"] button[data-testid="stBaseButton-primary"] *,
|
||
[data-testid="stDownloadButton"] button[kind="primary"] * {
|
||
color: var(--bg) !important;
|
||
}
|
||
[data-testid="stButton"] button[kind="primary"]:hover,
|
||
[data-testid="stButton"] button[data-testid="stBaseButton-primary"]:hover,
|
||
[data-testid="stDownloadButton"] button[kind="primary"]:hover {
|
||
background: #292524 !important;
|
||
border-color: #292524 !important;
|
||
color: var(--bg) !important;
|
||
}
|
||
/* Secondary = paper surface + warm border. */
|
||
[data-testid="stButton"] button[kind="secondary"],
|
||
[data-testid="stButton"] button[data-testid="stBaseButton-secondary"],
|
||
[data-testid="stDownloadButton"] button[kind="secondary"] {
|
||
background: var(--surface) !important;
|
||
color: var(--ink) !important;
|
||
border: 1px solid var(--border-strong) !important;
|
||
}
|
||
[data-testid="stButton"] button[kind="secondary"]:hover,
|
||
[data-testid="stButton"] button[data-testid="stBaseButton-secondary"]:hover {
|
||
background: var(--surface-hover) !important;
|
||
border-color: var(--ink-tertiary) !important;
|
||
}
|
||
/* Tertiary = icon-button style — transparent surface, tertiary ink,
|
||
danger tint on hover. Used for the X "remove file" affordance and
|
||
other quiet inline actions. */
|
||
[data-testid="stButton"] button[kind="tertiary"],
|
||
[data-testid="stButton"] button[data-testid="stBaseButton-tertiary"] {
|
||
background: transparent !important;
|
||
color: var(--ink-tertiary) !important;
|
||
border: none !important;
|
||
padding: 4px 8px !important;
|
||
min-height: 0 !important;
|
||
}
|
||
[data-testid="stButton"] button[kind="tertiary"]:hover,
|
||
[data-testid="stButton"] button[data-testid="stBaseButton-tertiary"]:hover {
|
||
background: var(--danger-fill) !important;
|
||
color: var(--danger) !important;
|
||
}
|
||
/* The button label is in a child p; force it to inherit the button's
|
||
color so the danger tint shows through on hover. */
|
||
[data-testid="stButton"] button[kind="tertiary"] * {
|
||
color: inherit !important;
|
||
}
|
||
|
||
/* Disabled state — same low-contrast look for primary and secondary
|
||
kinds. Selector list explicitly includes
|
||
``button[data-testid="stBaseButton-primary"]:disabled`` so this rule
|
||
beats the primary-button block's identically-shaped selector on
|
||
specificity tie-breaks — without that, the primary's dark-ink
|
||
background stays and the disabled state reads as a black button
|
||
with greyed-out text. */
|
||
[data-testid="stButton"] button:disabled,
|
||
[data-testid="stButton"] button[kind="primary"]:disabled,
|
||
[data-testid="stButton"] button[kind="secondary"]:disabled,
|
||
[data-testid="stButton"] button[data-testid="stBaseButton-primary"]:disabled,
|
||
[data-testid="stButton"] button[data-testid="stBaseButton-secondary"]:disabled {
|
||
background: var(--surface-hover) !important;
|
||
color: var(--ink-tertiary) !important;
|
||
border: 1px solid var(--border) !important;
|
||
cursor: not-allowed !important;
|
||
}
|
||
/* Override the "every descendant gets ``--bg``" rule the primary
|
||
block declares so the label inside a disabled primary button
|
||
inherits the tertiary ink color too, not the cream ``--bg``. */
|
||
[data-testid="stButton"] button[kind="primary"]:disabled *,
|
||
[data-testid="stButton"] button[data-testid="stBaseButton-primary"]:disabled * {
|
||
color: var(--ink-tertiary) !important;
|
||
}
|
||
|
||
/* ---------- File uploader — soft cream dropzone ---------- */
|
||
[data-testid="stFileUploader"] section,
|
||
[data-testid="stFileUploaderDropzone"] {
|
||
background: var(--surface-hover) !important;
|
||
border: 1px dashed var(--border-strong) !important;
|
||
border-radius: var(--r-md) !important;
|
||
}
|
||
[data-testid="stFileUploader"] button {
|
||
border-radius: var(--r-md) !important;
|
||
}
|
||
/* The per-file chip rows the uploader emits after a file is staged. */
|
||
[data-testid="stFileUploaderFile"] {
|
||
background: var(--surface) !important;
|
||
border: 1px solid var(--border) !important;
|
||
border-radius: var(--r-sm) !important;
|
||
}
|
||
/* Hide Streamlit's built-in compact file-chip row once files exist —
|
||
the home page renders its own canonical "Imported files" list with
|
||
a Remove button beneath the uploader, so the chip row is redundant
|
||
and visually doubles up on filenames. The dropzone's borderless
|
||
``+`` button is left in place as the "add more files" affordance.
|
||
First-child wrapper of ``stFileChips`` holds the chip flex container;
|
||
collapsing it lets the ``+`` button hug the dropzone's left edge. */
|
||
[data-testid="stFileChip"] { display: none !important; }
|
||
[data-testid="stFileChips"] > div:first-child { display: none !important; }
|
||
|
||
/* ---------- Expanders + bordered containers → editorial cards ---------- */
|
||
[data-testid="stExpander"] details,
|
||
[data-testid="stExpander"] {
|
||
background: var(--surface) !important;
|
||
border: 1px solid var(--border) !important;
|
||
border-radius: var(--r-lg) !important;
|
||
overflow: hidden !important;
|
||
box-shadow: 0 1px 2px rgba(28,25,23,0.03);
|
||
}
|
||
[data-testid="stExpander"] details > summary {
|
||
background: var(--surface-hover) !important;
|
||
border-bottom: 1px solid var(--border) !important;
|
||
padding: 12px 16px !important;
|
||
font-weight: 500 !important;
|
||
color: var(--ink) !important;
|
||
}
|
||
[data-testid="stExpander"] details[open] > summary {
|
||
border-bottom: 1px solid var(--border) !important;
|
||
}
|
||
[data-testid="stExpander"] details > div {
|
||
padding: 14px 16px !important;
|
||
}
|
||
|
||
/* ``st.container(border=True)`` — same card treatment. */
|
||
[data-testid="stVerticalBlockBorderWrapper"],
|
||
div[data-testid="stContainer"][data-border="true"] {
|
||
background: var(--surface) !important;
|
||
border: 1px solid var(--border) !important;
|
||
border-radius: var(--r-lg) !important;
|
||
box-shadow: 0 1px 2px rgba(28,25,23,0.03);
|
||
}
|
||
/* Tighten the inter-row gap inside bordered containers — applies to
|
||
the Files card rows after import and the findings-card rows alike,
|
||
so the dense card body has less wasted vertical whitespace. */
|
||
[data-testid="stVerticalBlockBorderWrapper"] [data-testid="stVerticalBlock"] {
|
||
gap: 0.25rem !important;
|
||
}
|
||
|
||
/* ---------- Alerts — soft fills, no harsh borders ---------- */
|
||
[data-testid="stAlert"] [data-testid="stAlertContainer"],
|
||
[data-testid="stAlertContainer"] {
|
||
border-radius: var(--r-md) !important;
|
||
border: 1px solid transparent !important;
|
||
padding: 10px 14px !important;
|
||
font-size: 13.5px !important;
|
||
}
|
||
/* Streamlit tags each alert kind on the wrapper; target both the
|
||
legacy class hooks and the newer per-kind ``data-baseweb-color``. */
|
||
[data-testid="stAlertContainer"][kind="info"],
|
||
.stAlert[data-baseweb="notification"][kind="info"] {
|
||
background: var(--info-fill) !important;
|
||
color: var(--info) !important;
|
||
}
|
||
[data-testid="stAlertContainer"][kind="success"],
|
||
.stAlert[data-baseweb="notification"][kind="success"] {
|
||
background: var(--success-fill) !important;
|
||
color: var(--success) !important;
|
||
}
|
||
[data-testid="stAlertContainer"][kind="warning"],
|
||
.stAlert[data-baseweb="notification"][kind="warning"] {
|
||
background: var(--warn-fill) !important;
|
||
color: var(--warn) !important;
|
||
}
|
||
[data-testid="stAlertContainer"][kind="error"],
|
||
.stAlert[data-baseweb="notification"][kind="error"] {
|
||
background: var(--danger-fill) !important;
|
||
color: var(--danger) !important;
|
||
}
|
||
|
||
/* ---------- Inputs (text, select, multiselect) — paper surface ---------- */
|
||
[data-testid="stTextInput"] input,
|
||
[data-testid="stTextArea"] textarea,
|
||
[data-testid="stNumberInput"] input,
|
||
[data-testid="stSelectbox"] div[role="combobox"],
|
||
[data-testid="stMultiSelect"] div[role="combobox"],
|
||
[data-baseweb="select"] > div {
|
||
background: var(--surface) !important;
|
||
border-radius: var(--r-sm) !important;
|
||
border-color: var(--border-strong) !important;
|
||
font-family: var(--font-sans) !important;
|
||
}
|
||
|
||
/* Sidebar widget labels — render as the "Eyebrow" row from spec §4
|
||
(tiny uppercase tracking, tertiary ink) so the ``Language`` /
|
||
``Core · 1820 days left`` blocks at the bottom of the sidebar
|
||
match the section-title rhythm of the nav above. */
|
||
[data-testid="stSidebar"] [data-testid="stWidgetLabel"] p,
|
||
[data-testid="stSidebar"] label[data-testid="stWidgetLabel"] {
|
||
font-size: 11.5px !important;
|
||
font-weight: 500 !important;
|
||
text-transform: uppercase !important;
|
||
letter-spacing: 0.08em !important;
|
||
color: var(--ink-tertiary) !important;
|
||
margin-bottom: 4px !important;
|
||
}
|
||
/* Sidebar selectbox — quiet outline, cream surface that reads as
|
||
part of the sidebar rather than a Streamlit-default white island. */
|
||
[data-testid="stSidebar"] [data-testid="stSelectbox"] div[role="combobox"],
|
||
[data-testid="stSidebar"] [data-baseweb="select"] > div {
|
||
background: var(--surface) !important;
|
||
border: 1px solid var(--border) !important;
|
||
border-radius: var(--r-sm) !important;
|
||
font-size: 13px !important;
|
||
min-height: 32px !important;
|
||
}
|
||
[data-testid="stSidebar"] [data-testid="stSelectbox"] div[role="combobox"]:hover,
|
||
[data-testid="stSidebar"] [data-baseweb="select"] > div:hover {
|
||
border-color: var(--border-strong) !important;
|
||
}
|
||
/* Streamlit pads the selectbox internals; tighten the chevron column
|
||
so the control isn't taller than the nav items above it. */
|
||
[data-testid="stSidebar"] [data-baseweb="select"] > div > div {
|
||
padding: 4px 8px !important;
|
||
}
|
||
|
||
/* Divider — softer warm gray instead of cool Streamlit default. */
|
||
[data-testid="stMarkdownContainer"] hr,
|
||
.stApp hr {
|
||
border-color: var(--border) !important;
|
||
}
|
||
|
||
/* Tabs — pill-style with active underline in accent. */
|
||
[data-testid="stTabs"] [role="tab"] {
|
||
font-family: var(--font-sans) !important;
|
||
font-size: 13.5px !important;
|
||
color: var(--ink-secondary) !important;
|
||
}
|
||
[data-testid="stTabs"] [role="tab"][aria-selected="true"] {
|
||
color: var(--ink) !important;
|
||
font-weight: 500 !important;
|
||
}
|
||
|
||
/* DataFrame surface — warm card, mono cells. */
|
||
[data-testid="stDataFrame"] {
|
||
border-radius: var(--r-md) !important;
|
||
border: 1px solid var(--border) !important;
|
||
overflow: hidden !important;
|
||
}
|
||
|
||
/* ---------- Page header (brand block + privacy pill) ---------- */
|
||
.dt-page-header {
|
||
display: flex;
|
||
align-items: center;
|
||
justify-content: space-between;
|
||
gap: 24px;
|
||
margin: 0 0 24px;
|
||
padding-bottom: 22px;
|
||
border-bottom: 1px solid var(--border);
|
||
}
|
||
/* The brand block stacks two pieces vertically: the D-chip + words
|
||
row up top, then the tagline beneath. The D mark vertically
|
||
centres with the words column (eyebrow + wordmark), exactly like
|
||
the sidebar chip. */
|
||
.dt-page-brand {
|
||
display: flex;
|
||
flex-direction: column;
|
||
gap: 8px;
|
||
}
|
||
.dt-page-brand-row {
|
||
display: flex;
|
||
align-items: center;
|
||
gap: 18px;
|
||
}
|
||
.dt-page-brand-words {
|
||
display: flex;
|
||
flex-direction: column;
|
||
gap: 2px;
|
||
line-height: 1;
|
||
}
|
||
/* Streamlit wraps the h1 in an emotion-cache div that adds ~3px top
|
||
padding + ~8px bottom margin. Flatten every descendant so the
|
||
eyebrow + wordmark stack hugs the chip height. */
|
||
.dt-page-brand-words *,
|
||
.dt-page-brand-words > div {
|
||
margin: 0 !important;
|
||
padding: 0 !important;
|
||
}
|
||
.dt-page-brand-words .dt-page-wordmark {
|
||
line-height: 1 !important;
|
||
}
|
||
/* Same "Letter D (sans)" wordmark as the sidebar chip and favicon
|
||
— scaled up to hero size. Ink ground, cream D, Geist 700, -0.04em
|
||
tracking. */
|
||
.dt-page-brand-mark {
|
||
width: 56px;
|
||
height: 56px;
|
||
border-radius: 14px;
|
||
background: var(--ink);
|
||
color: var(--accent-fill);
|
||
display: inline-flex;
|
||
align-items: center;
|
||
justify-content: center;
|
||
font-family: var(--font-sans);
|
||
font-weight: 700;
|
||
font-size: 32px;
|
||
letter-spacing: -0.04em;
|
||
line-height: 1;
|
||
flex-shrink: 0;
|
||
}
|
||
.dt-page-eyebrow {
|
||
font-family: var(--font-sans) !important;
|
||
font-size: 11.5px;
|
||
font-weight: 600;
|
||
letter-spacing: 0.14em;
|
||
text-transform: uppercase;
|
||
color: var(--ink-tertiary);
|
||
line-height: 1.2;
|
||
}
|
||
.dt-page-wordmark {
|
||
margin: 0 !important;
|
||
font-family: var(--font-sans) !important;
|
||
font-weight: 600 !important;
|
||
font-size: 32px !important;
|
||
letter-spacing: -0.035em !important;
|
||
line-height: 1.1 !important;
|
||
color: var(--ink) !important;
|
||
}
|
||
.dt-page-header .dt-page-subtitle {
|
||
margin: 4px 0 0;
|
||
color: var(--ink-secondary) !important;
|
||
font-size: 14px;
|
||
line-height: 1.5;
|
||
}
|
||
.dt-privacy-pill {
|
||
display: inline-flex;
|
||
align-items: center;
|
||
gap: 6px;
|
||
padding: 6px 11px;
|
||
background: var(--success-fill);
|
||
color: var(--success);
|
||
border-radius: 999px;
|
||
font-family: var(--font-sans);
|
||
font-size: 12px;
|
||
font-weight: 500;
|
||
white-space: nowrap;
|
||
flex-shrink: 0;
|
||
}
|
||
.dt-privacy-pill svg {
|
||
width: 13px; height: 13px;
|
||
stroke-width: 2;
|
||
}
|
||
|
||
/* ---------- Files card (mockup §files-card) ---------- */
|
||
/* Card head + row layout. The data lives in real ``st.button`` widgets
|
||
for the remove action — those are styled separately further down by
|
||
keyed selector. */
|
||
.dt-files-section-head {
|
||
display: flex;
|
||
align-items: baseline;
|
||
justify-content: space-between;
|
||
margin: 4px 0 10px;
|
||
gap: 12px;
|
||
}
|
||
.dt-files-section-head h3 { margin: 0 !important; }
|
||
.dt-files-section-head .dt-section-meta {
|
||
font-size: 12.5px;
|
||
color: var(--ink-tertiary);
|
||
font-family: var(--font-sans);
|
||
}
|
||
.dt-file-row {
|
||
display: flex;
|
||
align-items: center;
|
||
gap: 12px;
|
||
}
|
||
.dt-file-icon-chip {
|
||
width: 28px;
|
||
height: 28px;
|
||
border-radius: var(--r-sm);
|
||
background: var(--accent-fill);
|
||
color: var(--accent);
|
||
display: inline-flex;
|
||
align-items: center;
|
||
justify-content: center;
|
||
flex-shrink: 0;
|
||
}
|
||
.dt-file-icon-chip svg { width: 14px; height: 14px; stroke-width: 1.8; }
|
||
.dt-file-name {
|
||
font-family: var(--font-mono) !important;
|
||
font-size: 13px;
|
||
color: var(--ink) !important;
|
||
font-feature-settings: "ss02";
|
||
}
|
||
.dt-file-size {
|
||
font-family: var(--font-mono) !important;
|
||
font-size: 12px;
|
||
color: var(--ink-tertiary) !important;
|
||
font-feature-settings: "ss02";
|
||
}
|
||
|
||
/* "+ Add more files" — last row of the files card (mockup §file-add).
|
||
The button stays in the document; ``onclick`` triggers a programmatic
|
||
click on Streamlit's (off-screen) file_uploader input so the OS file
|
||
picker opens. Negative margins bleed the button to the card edges so
|
||
the dashed top-border and corner radii match the surrounding card
|
||
chrome. */
|
||
.dt-file-add {
|
||
display: flex !important;
|
||
align-items: center;
|
||
justify-content: center;
|
||
gap: 8px;
|
||
width: calc(100% + 2rem);
|
||
padding: 12px 16px;
|
||
background: var(--surface-hover);
|
||
border: none;
|
||
border-top: 1px dashed var(--border-strong);
|
||
border-radius: 0 0 var(--r-lg) var(--r-lg);
|
||
cursor: pointer;
|
||
font-family: var(--font-sans) !important;
|
||
font-size: 13px !important;
|
||
font-weight: 500 !important;
|
||
color: var(--ink-secondary) !important;
|
||
margin: 14px -1rem -1rem;
|
||
line-height: 1;
|
||
transition: background 0.12s ease, color 0.12s ease;
|
||
}
|
||
.dt-file-add:hover {
|
||
background: var(--accent-fill);
|
||
color: var(--accent) !important;
|
||
}
|
||
.dt-file-add svg {
|
||
width: 14px; height: 14px;
|
||
stroke-width: 2;
|
||
}
|
||
|
||
/* Empty-state placeholder centered in the empty files card. */
|
||
.dt-files-empty {
|
||
margin: 8px 0 4px !important;
|
||
text-align: center;
|
||
color: var(--ink-tertiary) !important;
|
||
font-size: 13px;
|
||
}
|
||
|
||
/* Streamlit's file_uploader is rendered off-screen so the OS file
|
||
picker stays wired up to our in-card "Add more files" button — its
|
||
input element is still reachable via JS ``.click()``. */
|
||
.dt-fileuploader-offscreen [data-testid="stFileUploader"] {
|
||
position: absolute !important;
|
||
left: -10000px !important;
|
||
width: 1px !important;
|
||
height: 1px !important;
|
||
overflow: hidden !important;
|
||
pointer-events: none !important;
|
||
}
|
||
|
||
/* ---------- Findings — per-file group cards (mockup §findings) ---------- */
|
||
.dt-finding-group-head {
|
||
display: flex;
|
||
align-items: center;
|
||
gap: 12px;
|
||
/* Generous left/right padding so the filename + counts have visible
|
||
breathing room against the card's rounded edges — the head bleeds
|
||
out to those edges via the negative margin below, so without the
|
||
extra padding the content sits flush against the border. */
|
||
padding: 16px 22px;
|
||
border-bottom: 1px solid var(--border);
|
||
background: var(--surface-hover);
|
||
/* -1rem on top/sides bleeds the head to the card edges (the parent
|
||
``st.container(border=True)`` has 1rem padding). +1.5rem on the
|
||
bottom is breathing room before the first finding row — without
|
||
it the row sits flush against the head's bottom border. */
|
||
margin: -1rem -1rem 1.5rem;
|
||
border-radius: var(--r-lg) var(--r-lg) 0 0;
|
||
cursor: pointer;
|
||
user-select: none;
|
||
transition: background 0.12s ease;
|
||
}
|
||
.dt-finding-group-head:hover {
|
||
background: var(--accent-fill);
|
||
}
|
||
/* Chevron leads the head as the first flex item; rotates 90° to
|
||
indicate expanded state. */
|
||
.dt-finding-group-chevron {
|
||
color: var(--ink-tertiary);
|
||
font-family: "Material Symbols Outlined" !important;
|
||
font-size: 20px !important;
|
||
font-feature-settings: normal !important;
|
||
line-height: 1 !important;
|
||
transition: transform 0.15s ease;
|
||
flex-shrink: 0;
|
||
margin-right: -2px;
|
||
}
|
||
.dt-finding-group-head[data-dt-collapsed="false"] .dt-finding-group-chevron {
|
||
transform: rotate(90deg);
|
||
}
|
||
/* Collapsed = body rows hidden + head tucks tight against card bottom.
|
||
The head's siblings inside the bordered container are the
|
||
``stHorizontalBlock``s emitted by each ``st.columns`` row — when the
|
||
head carries ``data-dt-collapsed="true"`` they collapse to nothing
|
||
and the head's bottom border becomes the card's bottom edge. */
|
||
.dt-finding-group-head[data-dt-collapsed="true"] {
|
||
margin: -1rem -1rem -1rem;
|
||
border-bottom: none;
|
||
border-radius: var(--r-lg);
|
||
}
|
||
/* Hide every sibling that comes AFTER the head's element-container
|
||
(the rows are emitted as ``stLayoutWrapper`` or
|
||
``stElementContainer`` siblings depending on Streamlit's internal
|
||
layout reducer; ``~ *`` matches both and survives future renames). */
|
||
[data-testid="stElementContainer"]:has(.dt-finding-group-head[data-dt-collapsed="true"])
|
||
~ * {
|
||
display: none !important;
|
||
}
|
||
.dt-severity-dot {
|
||
width: 8px; height: 8px;
|
||
border-radius: 50%;
|
||
flex-shrink: 0;
|
||
display: inline-block;
|
||
}
|
||
.dt-severity-dot.warn { background: var(--warn); }
|
||
.dt-severity-dot.info { background: var(--info); }
|
||
.dt-severity-dot.error { background: var(--danger); }
|
||
.dt-severity-dot.success { background: var(--success); }
|
||
.dt-group-filename {
|
||
font-family: var(--font-mono) !important;
|
||
font-size: 13.5px !important;
|
||
font-weight: 500 !important;
|
||
color: var(--ink) !important;
|
||
font-feature-settings: "ss02";
|
||
}
|
||
.dt-group-counts {
|
||
margin-left: auto;
|
||
display: flex;
|
||
align-items: center;
|
||
gap: 8px;
|
||
}
|
||
.dt-count-pill {
|
||
display: inline-flex;
|
||
align-items: center;
|
||
padding: 3px 9px;
|
||
border-radius: 999px;
|
||
font-family: var(--font-sans);
|
||
font-size: 11.5px;
|
||
font-weight: 500;
|
||
line-height: 1.4;
|
||
white-space: nowrap;
|
||
}
|
||
.dt-count-pill.warn { background: var(--warn-fill); color: var(--warn); }
|
||
.dt-count-pill.info { background: var(--info-fill); color: var(--info); }
|
||
.dt-count-pill.error { background: var(--danger-fill); color: var(--danger); }
|
||
.dt-count-pill.success { background: var(--success-fill); color: var(--success); }
|
||
|
||
.dt-finding-row {
|
||
display: flex;
|
||
align-items: flex-start;
|
||
gap: 12px;
|
||
padding: 12px 0;
|
||
border-top: 1px solid var(--border);
|
||
}
|
||
.dt-finding-row:first-of-type { border-top: none; }
|
||
.dt-finding-icon {
|
||
width: 24px;
|
||
height: 24px;
|
||
border-radius: var(--r-sm);
|
||
display: inline-flex;
|
||
align-items: center;
|
||
justify-content: center;
|
||
flex-shrink: 0;
|
||
}
|
||
.dt-finding-icon.warn { background: var(--warn-fill); color: var(--warn); }
|
||
.dt-finding-icon.info { background: var(--info-fill); color: var(--info); }
|
||
.dt-finding-icon.error { background: var(--danger-fill); color: var(--danger); }
|
||
.dt-finding-icon svg { width: 12px; height: 12px; stroke-width: 2.2; }
|
||
/* Material Symbols Outlined applied to the inline ligature span. The
|
||
selector is doubled (``.dt-finding-icon .dt-mui``) to give it more
|
||
specificity than theme.py's base ``font-family: var(--font-sans)
|
||
!important`` on stMarkdownContainer descendants. */
|
||
.dt-finding-icon .dt-mui,
|
||
[data-testid="stMarkdownContainer"] .dt-finding-icon .dt-mui {
|
||
font-family: "Material Symbols Outlined" !important;
|
||
font-size: 16px !important;
|
||
font-feature-settings: normal !important;
|
||
font-weight: 400 !important;
|
||
line-height: 1 !important;
|
||
letter-spacing: 0 !important;
|
||
}
|
||
.dt-finding-body { flex: 1; min-width: 0; }
|
||
.dt-finding-title {
|
||
font-size: 14px !important;
|
||
color: var(--ink) !important;
|
||
margin: 0 0 2px !important;
|
||
line-height: 1.4 !important;
|
||
letter-spacing: -0.005em;
|
||
}
|
||
.dt-finding-title strong { font-weight: 500 !important; }
|
||
.dt-finding-meta {
|
||
font-family: var(--font-mono) !important;
|
||
font-size: 12px !important;
|
||
color: var(--ink-tertiary) !important;
|
||
line-height: 1.4 !important;
|
||
margin: 0 !important;
|
||
font-feature-settings: "ss02";
|
||
}
|
||
|
||
/* ---------- Stats overview ---------- */
|
||
/* 4-card grid shown above the per-file findings on the home page,
|
||
summarizing the most recent analysis run. Numeric values use the
|
||
"Display number" row from geist_spec.md §4 — Geist 28px / 600 /
|
||
-0.03em — and the severity-tinted variants pick up ``--warn`` /
|
||
``--info`` from theme.py. */
|
||
.dt-stats {
|
||
display: grid;
|
||
grid-template-columns: repeat(4, 1fr);
|
||
gap: 12px;
|
||
margin: 8px 0 20px;
|
||
}
|
||
.dt-stat {
|
||
background: var(--surface);
|
||
border: 1px solid var(--border);
|
||
border-radius: var(--r-lg);
|
||
padding: 16px 18px;
|
||
box-shadow: 0 1px 2px rgba(28,25,23,0.03);
|
||
}
|
||
.dt-stat-label {
|
||
font-size: 11.5px;
|
||
text-transform: uppercase;
|
||
letter-spacing: 0.08em;
|
||
color: var(--ink-tertiary);
|
||
font-weight: 500;
|
||
margin-bottom: 6px;
|
||
line-height: 1.4;
|
||
}
|
||
.dt-stat-value {
|
||
font-family: var(--font-sans);
|
||
font-size: 28px;
|
||
font-weight: 600;
|
||
letter-spacing: -0.03em;
|
||
line-height: 1;
|
||
color: var(--ink);
|
||
display: flex;
|
||
align-items: baseline;
|
||
gap: 6px;
|
||
}
|
||
.dt-stat-unit {
|
||
font-family: var(--font-sans);
|
||
font-size: 12px;
|
||
font-weight: 400;
|
||
color: var(--ink-tertiary);
|
||
letter-spacing: 0;
|
||
}
|
||
.dt-stat.is-warn .dt-stat-value { color: var(--warn); }
|
||
.dt-stat.is-info .dt-stat-value { color: var(--info); }
|
||
.dt-stat.is-success .dt-stat-value { color: var(--success); }
|
||
|
||
@media (max-width: 900px) {
|
||
.dt-stats { grid-template-columns: repeat(2, 1fr); }
|
||
}
|
||
</style>
|
||
"""
|
||
|
||
|
||
# Streamlit ships the file_uploader's dropzone button with hard-coded
|
||
# "Upload" text (it's a text node baked into the React component, not
|
||
# a Streamlit i18n string we can override from Python). Our product
|
||
# positioning is local-first, so the word "Upload" is misleading. This
|
||
# script walks the dropzone buttons after first paint and rewrites the
|
||
# label to "Import" — and re-runs on Streamlit's component-rerender
|
||
# DOM mutations so the swap survives navigation and reruns.
|
||
# Injects the sidebar brand block (mockup §brand) at the top of
|
||
# Streamlit's ``stSidebarHeader``: the 28px ink-filled rounded square
|
||
# with the "D" wordmark followed by the "DataTools" word. Streamlit's
|
||
# ``stLogoSpacer`` reserves the slot but doesn't render anything
|
||
# without a ``st.logo()`` call; we replace its content rather than
|
||
# call ``st.logo`` because the brand wants both a chip AND wordmark
|
||
# in one block, which ``st.logo`` can't do without shipping a static
|
||
# image asset. MutationObserver re-injects when Streamlit remounts
|
||
# the sidebar header.
|
||
_INJECT_BRAND_JS = """
|
||
<script>
|
||
(function () {
|
||
function inject(doc) {
|
||
var header = doc.querySelector('[data-testid="stSidebarHeader"]');
|
||
if (!header) return;
|
||
if (header.querySelector('.dt-brand')) return;
|
||
var brand = doc.createElement('div');
|
||
brand.className = 'dt-brand';
|
||
brand.innerHTML =
|
||
'<div class="dt-brand-mark">D</div>' +
|
||
'<div class="dt-brand-name">' +
|
||
'<span class="dt-brand-eyebrow">UNALOGIX</span>' +
|
||
'DataTools' +
|
||
'</div>';
|
||
header.insertBefore(brand, header.firstChild);
|
||
}
|
||
var doc;
|
||
try { doc = window.parent.document; }
|
||
catch (e) { doc = document; }
|
||
inject(doc);
|
||
var win = doc.defaultView || window.parent || window;
|
||
if ('MutationObserver' in win) {
|
||
var raf = 0;
|
||
try {
|
||
new win.MutationObserver(function () {
|
||
if (raf) return;
|
||
raf = win.requestAnimationFrame(function () { raf = 0; inject(doc); });
|
||
}).observe(doc.body, { childList: true, subtree: true });
|
||
} catch (e) {}
|
||
}
|
||
})();
|
||
</script>
|
||
"""
|
||
|
||
|
||
# Toggle a ``.dt-finding-group-head``'s ``data-dt-collapsed`` attribute
|
||
# on click. CSS handles the visual collapse (hide siblings, tuck the
|
||
# head against the card bottom) — all this script does is flip the
|
||
# attribute. MutationObserver re-binds when Streamlit remounts heads.
|
||
_WIRE_COLLAPSIBLE_FINDINGS_JS = """
|
||
<script>
|
||
(function () {
|
||
function wire(doc) {
|
||
var heads = doc.querySelectorAll('.dt-finding-group-head');
|
||
heads.forEach(function (h) {
|
||
if (h.dataset.dtWired === '1') return;
|
||
h.dataset.dtWired = '1';
|
||
h.addEventListener('click', function () {
|
||
var collapsed = h.getAttribute('data-dt-collapsed') === 'true';
|
||
h.setAttribute('data-dt-collapsed', collapsed ? 'false' : 'true');
|
||
});
|
||
});
|
||
}
|
||
var doc;
|
||
try { doc = window.parent.document; }
|
||
catch (e) { doc = document; }
|
||
wire(doc);
|
||
var win = doc.defaultView || window.parent || window;
|
||
if ('MutationObserver' in win) {
|
||
var raf = 0;
|
||
try {
|
||
new win.MutationObserver(function () {
|
||
if (raf) return;
|
||
raf = win.requestAnimationFrame(function () { raf = 0; wire(doc); });
|
||
}).observe(doc.body, { childList: true, subtree: true });
|
||
} catch (e) {}
|
||
}
|
||
})();
|
||
</script>
|
||
"""
|
||
|
||
|
||
_SWAP_NAV_SECTION_INDICATOR_JS = """
|
||
<script>
|
||
(function () {
|
||
// Replace Streamlit's ``expand_more`` / ``expand_less`` Material
|
||
// ligature in sidebar nav section headers with plain ``+`` / ``−``.
|
||
// The section header isn't a button and doesn't carry
|
||
// ``aria-expanded``, so a pure-CSS swap can't switch the glyph
|
||
// based on state — we walk the icon's text node directly.
|
||
function swap(doc) {
|
||
var headers = doc.querySelectorAll('[data-testid="stNavSectionHeader"]');
|
||
headers.forEach(function (h) {
|
||
var icon = h.querySelector('[data-testid="stIconMaterial"]');
|
||
if (!icon) return;
|
||
var text = (icon.textContent || '').trim();
|
||
var glyph = null;
|
||
if (text === 'expand_more') glyph = '+';
|
||
else if (text === 'expand_less') glyph = '−'; // U+2212
|
||
else if (text === '+' || text === '−') return; // already swapped
|
||
else return;
|
||
icon.textContent = glyph;
|
||
});
|
||
}
|
||
var doc;
|
||
try { doc = window.parent.document; }
|
||
catch (e) { doc = document; }
|
||
swap(doc);
|
||
var win = doc.defaultView || window.parent || window;
|
||
if ('MutationObserver' in win) {
|
||
var raf = 0;
|
||
try {
|
||
new win.MutationObserver(function () {
|
||
if (raf) return;
|
||
raf = win.requestAnimationFrame(function () { raf = 0; swap(doc); });
|
||
}).observe(doc.body, { childList: true, subtree: true, characterData: true });
|
||
} catch (e) {}
|
||
}
|
||
})();
|
||
</script>
|
||
"""
|
||
|
||
|
||
_RENAME_UPLOAD_BUTTON_JS = """
|
||
<script>
|
||
(function () {
|
||
function swap(doc) {
|
||
var dropzones = doc.querySelectorAll('[data-testid="stFileUploaderDropzone"]');
|
||
dropzones.forEach(function (dz) {
|
||
var btn = dz.querySelector('button');
|
||
if (!btn) return;
|
||
// The label is a text node directly inside the outer label span;
|
||
// walk all text nodes and replace any exact "Upload".
|
||
var walker = doc.createTreeWalker(btn, NodeFilter.SHOW_TEXT, null, false);
|
||
var node;
|
||
while ((node = walker.nextNode())) {
|
||
if (node.nodeValue && node.nodeValue.trim() === 'Upload') {
|
||
node.nodeValue = node.nodeValue.replace('Upload', 'Import');
|
||
}
|
||
}
|
||
});
|
||
}
|
||
try {
|
||
var doc = window.parent.document;
|
||
swap(doc);
|
||
// Streamlit re-mounts dropzone subtrees on file changes / page
|
||
// switches — observe the parent doc and re-apply the swap when
|
||
// new ``stFileUploaderDropzone`` nodes appear. Throttled via
|
||
// requestAnimationFrame so a burst of mutations is one swap.
|
||
var raf = 0;
|
||
var obs = new (doc.defaultView || window).MutationObserver(function () {
|
||
if (raf) return;
|
||
raf = (doc.defaultView || window).requestAnimationFrame(function () {
|
||
raf = 0;
|
||
swap(doc);
|
||
});
|
||
});
|
||
obs.observe(doc.body, { childList: true, subtree: true });
|
||
} catch (e) {
|
||
swap(document);
|
||
}
|
||
})();
|
||
</script>
|
||
"""
|
||
|
||
|
||
def hide_streamlit_chrome(*, gate_license: bool = True) -> None:
|
||
"""Inject CSS to hide Streamlit's default header, menu, and footer.
|
||
|
||
Also renders the sidebar language selector + license status badge,
|
||
since every entrypoint that hides the default chrome wants those
|
||
visible in the same place. Pages that want a clean chrome without
|
||
them can inject ``_HIDE_CHROME_CSS`` themselves instead of calling
|
||
this.
|
||
|
||
When *gate_license* is True (the default) the function calls
|
||
:func:`require_license_or_render_activation` after the sidebar
|
||
widgets render. If no valid license is present, the activation
|
||
form replaces the page body and the page short-circuits via
|
||
``st.stop()``. The Activate page itself passes ``False`` so it
|
||
can render its own form without recursion.
|
||
"""
|
||
st.markdown(_HIDE_CHROME_CSS, unsafe_allow_html=True)
|
||
# ``apply_theme`` injects the canonical typography + color tokens
|
||
# (geist_spec.md §3). Must run BEFORE ``_DESIGN_TOKENS_CSS`` so the
|
||
# component CSS below can read its ``--font-sans`` / ``--ink`` etc.
|
||
from src.gui.theme import apply_theme
|
||
apply_theme()
|
||
st.markdown(_DESIGN_TOKENS_CSS, unsafe_allow_html=True)
|
||
# ``st.markdown`` doesn't execute embedded scripts; ship every
|
||
# DOM-mutating script through a single iframe component (same way
|
||
# the sticky footer mounts on ``<body>``). Bundled together so
|
||
# there's one component-iframe per page, not three.
|
||
st.iframe(
|
||
_INJECT_BRAND_JS
|
||
+ _RENAME_UPLOAD_BUTTON_JS
|
||
+ _WIRE_COLLAPSIBLE_FINDINGS_JS
|
||
+ _SWAP_NAV_SECTION_INDICATOR_JS,
|
||
height=1,
|
||
)
|
||
# Stamp a session-start record into the audit log the first time
|
||
# any page renders. Idempotent — subsequent calls are no-ops.
|
||
# Wrapped because a broken audit log MUST NOT take the GUI down.
|
||
try:
|
||
from src.audit import log_session_start
|
||
log_session_start()
|
||
except Exception:
|
||
import traceback, sys
|
||
print("DataTools: audit log session-start failed:", file=sys.stderr)
|
||
traceback.print_exc()
|
||
# Production-safe check runs first so a misconfigured shipped
|
||
# build refuses to render anything (rather than rendering a
|
||
# broken activation form that doesn't accept real blobs).
|
||
# No-op in source / pytest runs.
|
||
from src.license import assert_production_safe
|
||
assert_production_safe()
|
||
# Imported lazily so this module stays importable in environments
|
||
# where the i18n packs haven't been laid out (e.g. unit tests of
|
||
# individual legacy helpers).
|
||
from src.i18n import render_language_selector
|
||
render_language_selector()
|
||
# License chrome: sidebar status badge + inline gate.
|
||
from .activation import (
|
||
render_license_status_sidebar,
|
||
require_license_or_render_activation,
|
||
)
|
||
render_license_status_sidebar()
|
||
# Diagnostics sidebar is DISABLED — the async-writer redesign
|
||
# didn't actually fix the blank-pages symptom on the user's
|
||
# machine. The sidebar calls ``audit_log_path()`` which is pure
|
||
# now, so the failure mode must be elsewhere; keep this off
|
||
# while we diagnose so the user has a working GUI.
|
||
if False:
|
||
try:
|
||
_render_diagnostics_sidebar()
|
||
except Exception:
|
||
import traceback, sys
|
||
print("DataTools: diagnostics sidebar render failed:", file=sys.stderr)
|
||
traceback.print_exc()
|
||
if gate_license:
|
||
require_license_or_render_activation()
|
||
|
||
|
||
def _render_diagnostics_sidebar() -> None:
|
||
"""Render a small Diagnostics expander in the sidebar.
|
||
|
||
Shows the path to the current session's audit log and an "Open
|
||
folder" button. Lives behind an expander so it doesn't take
|
||
screen space until the user opens it; the support flow is
|
||
"client mails us the file, we tell them what went wrong."
|
||
"""
|
||
from src.audit import audit_log_dir, audit_log_path
|
||
log_path = audit_log_path()
|
||
with st.sidebar:
|
||
with st.expander("🩺 Diagnostics", expanded=False):
|
||
st.caption("Audit log for this session:")
|
||
st.code(str(log_path), language=None)
|
||
if st.button(
|
||
"📂 Open log folder",
|
||
key="_diag_open_logs",
|
||
type="secondary",
|
||
width="stretch",
|
||
):
|
||
opened = _open_in_file_manager(audit_log_dir(), select=log_path)
|
||
if not opened:
|
||
st.warning(
|
||
"Could not open the file manager from here. "
|
||
"Path is above — paste it into your file manager."
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Clean shutdown
|
||
# ---------------------------------------------------------------------------
|
||
|
||
_FAREWELL_SCRIPT_TEMPLATE = """
|
||
<script>
|
||
(function () {
|
||
// Strategy: append a full-screen overlay directly to the parent's
|
||
// document.body (Streamlit's component iframes carry
|
||
// allow-same-origin, so cross-frame DOM access is permitted).
|
||
//
|
||
// Closing the tab via JavaScript only works in windows JS opened —
|
||
// Chrome/Edge --app windows qualify; a regular browser tab does
|
||
// NOT, and there's no way to override that from page JS (no flag,
|
||
// no API, no keystroke injection — synthesized keydown events
|
||
// never reach the browser chrome or the OS). When close fails we
|
||
// navigate the window to ``about:blank`` so the user at least
|
||
// sees a clean blank tab instead of the connection-error overlay
|
||
// Streamlit shows when the websocket drops.
|
||
//
|
||
// Display-mode detection (``standalone`` for --app windows,
|
||
// ``browser`` for regular tabs) lets us skip the futile close
|
||
// attempt on regular tabs and route straight to the about:blank
|
||
// fallback.
|
||
function isStandalone(win) {
|
||
try {
|
||
return win.matchMedia('(display-mode: standalone)').matches
|
||
|| win.matchMedia('(display-mode: minimal-ui)').matches
|
||
|| win.matchMedia('(display-mode: fullscreen)').matches;
|
||
} catch (e) { return false; }
|
||
}
|
||
function buildOverlay(doc) {
|
||
var overlay = doc.createElement('div');
|
||
overlay.id = 'datatools-farewell-overlay';
|
||
overlay.style.cssText =
|
||
'position:fixed;inset:0;background:#0f1115;color:#e8eaed;' +
|
||
'z-index:2147483647;display:flex;align-items:center;' +
|
||
'justify-content:center;font-family:system-ui,-apple-system,sans-serif;';
|
||
overlay.innerHTML =
|
||
'<div style="text-align:center;padding:32px 40px;border:1px solid #252a36;' +
|
||
'border-radius:12px;background:#161922;max-width:480px;">' +
|
||
'<h1 style="margin:0 0 8px 0;font-weight:600;letter-spacing:-0.01em;">' +
|
||
'__TITLE__</h1>' +
|
||
'<p style="opacity:0.7;margin:0;">__SUBTITLE__</p>' +
|
||
'</div>';
|
||
return overlay;
|
||
}
|
||
function tryClose(win) {
|
||
// Escalating attempts. None of these can override the browser's
|
||
// close-restriction policy on regular tabs.
|
||
try { win.close(); } catch (e) {}
|
||
if (win.closed) return true;
|
||
try {
|
||
var w = win.open('', '_self', '');
|
||
if (w) {
|
||
try { w.close(); } catch (e) {}
|
||
}
|
||
} catch (e) {}
|
||
if (win.closed) return true;
|
||
try { win.top.close(); } catch (e) {}
|
||
return win.closed;
|
||
}
|
||
function fallbackToBlank(win) {
|
||
// Navigate to about:blank so the user sees a clean empty tab
|
||
// instead of the farewell overlay frozen on a connection-error
|
||
// page. They can still close the tab themselves (Ctrl+W /
|
||
// ⌘W / clicking the tab's X). Done as a single fast call — no
|
||
// history entry pollution because location.replace doesn't
|
||
// push to history.
|
||
try { win.location.replace('about:blank'); } catch (e) {}
|
||
}
|
||
function autoDismiss(doc, win) {
|
||
// Try programmatic close first — succeeds in Chrome/Edge
|
||
// ``--app`` windows. If it fails (regular browser tab),
|
||
// auto-redirect to about:blank so the user lands on a clean
|
||
// page instead of the frozen farewell overlay. The "Close this
|
||
// window" button + the long browser-restriction hint paragraph
|
||
// both used to drive / explain this flow; we dropped both per
|
||
// UX request, so this auto-timer is the only path on regular
|
||
// tabs.
|
||
var standalone = isStandalone(win);
|
||
if (tryClose(win)) return;
|
||
setTimeout(function () {
|
||
if (!win.closed) fallbackToBlank(win);
|
||
}, standalone ? 400 : 1500);
|
||
}
|
||
try {
|
||
var doc = window.top.document;
|
||
var win = window.top;
|
||
if (!doc.getElementById('datatools-farewell-overlay')) {
|
||
doc.body.appendChild(buildOverlay(doc));
|
||
}
|
||
autoDismiss(doc, win);
|
||
} catch (e) {
|
||
// Cross-origin access denied (shouldn't happen given Streamlit's
|
||
// sandbox flags, but fall back gracefully): cover this iframe.
|
||
document.body.appendChild(buildOverlay(document));
|
||
autoDismiss(document, window);
|
||
}
|
||
})();
|
||
</script>
|
||
"""
|
||
|
||
|
||
def _js_html_safe(s: str) -> str:
|
||
"""Escape *s* so it can be embedded inside the farewell overlay's
|
||
JS-single-quoted, innerHTML-bound payload.
|
||
|
||
Order matters: backslash first (so subsequent escapes don't get
|
||
re-escaped), then the JS string-terminator, then HTML-special chars.
|
||
"""
|
||
return (
|
||
s.replace("\\", "\\\\")
|
||
.replace("'", "\\'")
|
||
.replace("&", "&")
|
||
.replace("<", "<")
|
||
.replace(">", ">")
|
||
)
|
||
|
||
|
||
def _farewell_script() -> str:
|
||
"""Render the farewell overlay JS with the current language's strings."""
|
||
return (
|
||
_FAREWELL_SCRIPT_TEMPLATE
|
||
.replace("__TITLE__", _js_html_safe(_t("quit.farewell_title")))
|
||
.replace("__SUBTITLE__", _js_html_safe(_t("quit.farewell_subtitle")))
|
||
)
|
||
|
||
|
||
def _downloads_dir() -> "Path":
|
||
"""Return the user's Downloads folder.
|
||
|
||
Resolution order:
|
||
|
||
1. ``DATATOOLS_DOWNLOADS_DIR`` env var (tests + power users).
|
||
2. On Windows, the *real* Downloads path from the
|
||
``User Shell Folders`` registry key. This matters because
|
||
OneDrive can redirect Downloads to
|
||
``C:\\Users\\<user>\\OneDrive\\Downloads`` — without the
|
||
registry lookup we'd write files into the un-redirected
|
||
``C:\\Users\\<user>\\Downloads`` and the user would never
|
||
see them in the Downloads they actually open.
|
||
3. ``Path.home() / "Downloads"`` as the final fallback.
|
||
"""
|
||
import os
|
||
import sys
|
||
from pathlib import Path
|
||
|
||
override = os.environ.get("DATATOOLS_DOWNLOADS_DIR")
|
||
if override:
|
||
return Path(override)
|
||
|
||
if sys.platform == "win32":
|
||
try:
|
||
import winreg # noqa: PLC0415
|
||
|
||
with winreg.OpenKey(
|
||
winreg.HKEY_CURRENT_USER,
|
||
r"Software\Microsoft\Windows\CurrentVersion\Explorer"
|
||
r"\User Shell Folders",
|
||
) as key:
|
||
# GUID for FOLDERID_Downloads. The User Shell
|
||
# Folders entry returns the redirected path when
|
||
# OneDrive is active, the original ~/Downloads
|
||
# otherwise — exactly what we want.
|
||
value, _ = winreg.QueryValueEx(
|
||
key, "{374DE290-123F-4565-9164-39C4925E467B}",
|
||
)
|
||
expanded = os.path.expandvars(value)
|
||
resolved = Path(expanded)
|
||
# Sanity check: only trust the registry path if it
|
||
# exists OR can be created (don't return a path that
|
||
# points into a deleted/borked OneDrive mount).
|
||
if resolved.exists() or resolved.parent.exists():
|
||
return resolved
|
||
except Exception:
|
||
# Any registry hiccup — fall through to ``Path.home()``.
|
||
pass
|
||
|
||
return Path.home() / "Downloads"
|
||
|
||
|
||
def _open_in_file_manager(folder: "Path", *, select: "Path | None" = None) -> bool:
|
||
"""Open the OS file manager at *folder*, optionally highlighting *select*.
|
||
|
||
Windows
|
||
``explorer <folder>`` only. We deliberately do NOT use
|
||
``explorer /select,<file>``: when the path contains a space
|
||
(e.g. ``C:\\Users\\Michael Dombaugh\\Downloads``), Python's
|
||
``subprocess.Popen`` quotes the ``/select,...`` argument as one
|
||
unit, and Explorer's ``/select`` parser does not handle that
|
||
form — it silently falls back to opening the user's default
|
||
view (typically Documents). Opening the bare folder works
|
||
reliably regardless of spaces. ``os.startfile`` is kept as a
|
||
last-resort fallback only.
|
||
macOS
|
||
``open -R <file>`` reveals the file in Finder when ``select``
|
||
is given; otherwise just opens the folder.
|
||
Linux / *BSD
|
||
``xdg-open`` on the folder. No reliable cross-distro way to
|
||
highlight a specific file.
|
||
|
||
Returns ``True`` if any of the dispatch attempts succeeded
|
||
(no guarantee the window actually surfaced — the caller should
|
||
surface a fallback path so the user can paste it manually).
|
||
"""
|
||
import os
|
||
import subprocess
|
||
|
||
if sys.platform == "win32":
|
||
try:
|
||
subprocess.Popen(["explorer", str(folder)])
|
||
return True
|
||
except Exception:
|
||
pass
|
||
try:
|
||
os.startfile(str(folder)) # type: ignore[attr-defined]
|
||
return True
|
||
except Exception:
|
||
return False
|
||
if sys.platform == "darwin":
|
||
try:
|
||
if select is not None:
|
||
subprocess.Popen(["open", "-R", str(select)])
|
||
else:
|
||
subprocess.Popen(["open", str(folder)])
|
||
return True
|
||
except Exception:
|
||
return False
|
||
# Linux / *BSD / etc.
|
||
try:
|
||
subprocess.Popen(["xdg-open", str(folder)])
|
||
return True
|
||
except Exception:
|
||
return False
|
||
|
||
|
||
def local_download_button(
|
||
label: str,
|
||
data: bytes,
|
||
*,
|
||
file_name: str,
|
||
mime: str = "application/octet-stream", # noqa: ARG001 — kept for API compat
|
||
disabled: bool = False,
|
||
help: str | None = None,
|
||
width: str = "stretch",
|
||
) -> None:
|
||
"""Save bytes directly to the user's Downloads folder.
|
||
|
||
DataTools runs as a local Streamlit app, so the "server" IS the
|
||
user's machine — we can write straight to ``~/Downloads/<file_name>``
|
||
instead of going through the browser save dialog. On click:
|
||
|
||
1. Bytes are written to ``Path.home() / "Downloads" / file_name``
|
||
(overwriting any existing file with the same name).
|
||
2. The page reruns and renders a success caption naming the exact
|
||
absolute path the file landed at.
|
||
3. An "Open Downloads folder" button appears that pops the OS file
|
||
manager (Explorer / Finder / xdg-open) at the parent directory.
|
||
|
||
Why not ``st.download_button`` or an HTML data: URL anchor?
|
||
|
||
- ``st.download_button`` has a long-standing failure mode where
|
||
only the first button on the page fires when multiple are
|
||
stacked together.
|
||
- Data: URLs balloon by 33% (base64) and leave the user guessing
|
||
where the browser saved it (default Downloads folder or wherever
|
||
they last picked — varies per browser).
|
||
|
||
The save-server-side path is unambiguous, works the same regardless
|
||
of browser settings, and gives the user a real link to the file.
|
||
|
||
The ``mime`` parameter is accepted for backwards compatibility with
|
||
the previous helper signature; it is no longer relevant because
|
||
nothing on the wire knows the bytes' content type.
|
||
"""
|
||
import hashlib
|
||
from pathlib import Path
|
||
|
||
# Stable widget keys, namespaced by file_name + content digest so
|
||
# repeated renders of the same content keep their saved-state
|
||
# banner, but a re-run that produced different bytes gets a fresh
|
||
# button with no stale success message.
|
||
digest = hashlib.sha1(data, usedforsecurity=False).hexdigest()[:8]
|
||
btn_key = f"_dl_btn_{file_name}_{digest}"
|
||
saved_key = f"_dl_saved_{file_name}_{digest}"
|
||
open_key = f"_dl_open_{file_name}_{digest}"
|
||
|
||
clicked = st.button(
|
||
label,
|
||
key=btn_key,
|
||
disabled=disabled,
|
||
help=help,
|
||
type="secondary",
|
||
width=width,
|
||
)
|
||
|
||
if clicked:
|
||
target_dir = _downloads_dir()
|
||
try:
|
||
target_dir.mkdir(parents=True, exist_ok=True)
|
||
target = target_dir / file_name
|
||
target.write_bytes(data)
|
||
st.session_state[saved_key] = str(target)
|
||
except Exception as e:
|
||
st.error(
|
||
f"Could not save **{file_name}** to `{target_dir}`: {e}"
|
||
)
|
||
return
|
||
|
||
saved_path_str = st.session_state.get(saved_key)
|
||
if saved_path_str:
|
||
saved_path = Path(saved_path_str)
|
||
st.success(f"✓ Saved to `{saved_path_str}`")
|
||
if st.button(
|
||
"📂 Open Downloads folder",
|
||
key=open_key,
|
||
type="secondary",
|
||
):
|
||
opened = _open_in_file_manager(saved_path.parent, select=saved_path)
|
||
if opened:
|
||
# The dispatch returned non-zero; the OS may still have
|
||
# opened the window behind the active one. Surface a
|
||
# confirmation so the user knows we tried.
|
||
st.toast(f"Opening {saved_path.parent}", icon="📂")
|
||
else:
|
||
st.warning(
|
||
f"Could not open the file manager from here. "
|
||
f"The file is at:\n\n`{saved_path_str}`"
|
||
)
|
||
|
||
|
||
# Back-compat alias: existing call sites use the old name. New code
|
||
# should prefer ``local_download_button``.
|
||
html_download_button = local_download_button
|
||
|
||
|
||
def render_sticky_footer() -> None:
|
||
"""Slim fixed-position footer with Close and Help controls.
|
||
|
||
Mounted as a direct child of ``<body>`` via a component-iframe so
|
||
it lives outside every Streamlit container — required because
|
||
``.stApp`` carries ``zoom: 0.85`` and Streamlit's content
|
||
columns add padding/positioning context that would otherwise
|
||
distort or clip the bar.
|
||
|
||
Close is a full-page ``<a href="./close">`` link to the Close
|
||
page, which runs ``shutdown_app`` on render. State loss is fine
|
||
here — the process is terminating. (This was the reason the
|
||
Back-to-Home variant of this footer was retired; that case
|
||
needed a soft nav widget. Close does not.)
|
||
|
||
Help is pure UI: clicking toggles a small overlay panel
|
||
containing the version and support email — no navigation, so
|
||
no state loss.
|
||
"""
|
||
import html as _html
|
||
import json as _json
|
||
|
||
from src import __version__
|
||
|
||
close_label = _html.escape(_t("footer.close"))
|
||
help_label = _html.escape(_t("footer.help"))
|
||
help_title = _html.escape(_t("footer.help_title"))
|
||
help_version = _html.escape(
|
||
_t("footer.help_version").format(version=__version__)
|
||
)
|
||
support_email = "support@unalogix.com"
|
||
help_support_text = _t("footer.help_support").format(email=support_email)
|
||
help_support_html = _html.escape(help_support_text).replace(
|
||
_html.escape(support_email),
|
||
f'<a href="mailto:{_html.escape(support_email)}">'
|
||
f'{_html.escape(support_email)}</a>',
|
||
)
|
||
license_label = _html.escape(_t("footer.help_license_label"))
|
||
logs_label = _html.escape(_t("footer.help_logs_label"))
|
||
logs_link_text = _html.escape(_t("footer.help_logs_link"))
|
||
help_dismiss = _html.escape(_t("footer.help_dismiss"))
|
||
|
||
# Logs section — surface today's audit log path as copyable text
|
||
# plus a link to the /logs page. Wrapped because a broken audit
|
||
# log MUST NOT stop the footer from rendering; fall back to "—".
|
||
try:
|
||
from src.audit import audit_log_path
|
||
log_path_str = str(audit_log_path())
|
||
except Exception:
|
||
log_path_str = "—"
|
||
logs_html = (
|
||
f'<div class="dt-help-row"><span class="dt-help-key">'
|
||
f'{logs_label}:</span></div>'
|
||
f'<div class="dt-help-row dt-help-sub">'
|
||
f'<code class="dt-help-path">{_html.escape(log_path_str)}</code></div>'
|
||
f'<div class="dt-help-row">'
|
||
f'<a href="./logs" target="_self">{logs_link_text}</a></div>'
|
||
)
|
||
|
||
# License section — read state and branch on activated/valid. The
|
||
# query is wrapped because a corrupted license file MUST NOT stop
|
||
# the footer from rendering; in that case we fall back to the
|
||
# "ask to activate" branch.
|
||
try:
|
||
from src.license import current_state as _license_state
|
||
state = _license_state()
|
||
except Exception:
|
||
state = None
|
||
|
||
if state is not None and state.activated and state.valid:
|
||
active_line = _t("footer.help_license_active").format(
|
||
name=state.name or state.email or "—",
|
||
)
|
||
expires_line = _t("footer.help_license_expires").format(
|
||
date=(state.expires_at or "")[:10],
|
||
days=state.days_remaining,
|
||
)
|
||
manage_link = _html.escape(_t("footer.help_manage_link"))
|
||
license_html = (
|
||
f'<div class="dt-help-row"><span class="dt-help-key">'
|
||
f'{license_label}:</span> {_html.escape(active_line)}</div>'
|
||
f'<div class="dt-help-row dt-help-sub">'
|
||
f'{_html.escape(expires_line)}</div>'
|
||
f'<div class="dt-help-row">'
|
||
f'<a href="./activate" target="_self">{manage_link}</a></div>'
|
||
)
|
||
else:
|
||
inactive_line = _html.escape(_t("footer.help_license_inactive"))
|
||
activate_link = _html.escape(_t("footer.help_activate_link"))
|
||
license_html = (
|
||
f'<div class="dt-help-row"><span class="dt-help-key">'
|
||
f'{license_label}:</span> {inactive_line}</div>'
|
||
f'<div class="dt-help-row">'
|
||
f'<a href="./activate" target="_self">{activate_link}</a></div>'
|
||
)
|
||
|
||
popover_html = (
|
||
f'<div class="dt-help-title">{help_title}</div>'
|
||
f'<div class="dt-help-row">{help_version}</div>'
|
||
f'{license_html}'
|
||
f'{logs_html}'
|
||
f'<div class="dt-help-row">{help_support_html}</div>'
|
||
f'<button type="button" class="dt-help-dismiss">{help_dismiss}</button>'
|
||
)
|
||
|
||
st.markdown(
|
||
"""
|
||
<style>
|
||
/* ``.stApp`` carries ``zoom: 0.85`` (compact-layout scaler), so any
|
||
child sized at ``100vh`` only renders at 85vh visually — the bottom
|
||
~15vh of the viewport sits OUTSIDE ``.stApp`` and shows ``body``'s
|
||
white through. Previously we stretched to ``calc(100vh / 0.85)`` to
|
||
close the gap entirely, but that overstretched every page and pushed
|
||
the last row past the visible area. Compromise: stretch to cover
|
||
MOST of the gap, leaving ~4vh of white bar (≈ 1/4 of the original
|
||
~15vh) so short pages stay short but the bar doesn't dominate. */
|
||
.stApp {
|
||
min-height: calc(96vh / 0.85) !important;
|
||
}
|
||
[data-testid="stSidebar"],
|
||
[data-testid="stMain"] {
|
||
min-height: calc(96vh / 0.85) !important;
|
||
}
|
||
/* DO NOT override ``padding-bottom`` on the block container here — the
|
||
``hide_streamlit_chrome`` rule above sets it to 7rem precisely so
|
||
content clears this footer cleanly. A tighter override here was
|
||
previously winning the cascade (loaded later) and cutting off the
|
||
last line of tool-page content. */
|
||
#datatools-sticky-footer {
|
||
position: fixed !important;
|
||
bottom: 0 !important;
|
||
left: 0 !important;
|
||
right: 0 !important;
|
||
background: rgba(255, 255, 255, 0.97) !important;
|
||
backdrop-filter: blur(8px);
|
||
-webkit-backdrop-filter: blur(8px);
|
||
border-top: 1px solid rgba(49, 51, 63, 0.2) !important;
|
||
padding: 0.25rem 0.75rem !important;
|
||
z-index: 2147483646 !important;
|
||
display: flex !important;
|
||
align-items: center !important;
|
||
justify-content: flex-start !important;
|
||
gap: 0.4rem !important;
|
||
font-family: system-ui, -apple-system, sans-serif !important;
|
||
box-sizing: border-box !important;
|
||
min-height: 32px !important;
|
||
}
|
||
/* Footer buttons match the sidebar nav-item style: borderless,
|
||
icon + label, ink-secondary text, soft hover. Close keeps a danger
|
||
tint via the ``.close`` modifier so it still reads as the
|
||
shut-down action without the outlined-pill chrome it used to wear. */
|
||
#datatools-sticky-footer .datatools-footer-btn {
|
||
display: inline-flex !important;
|
||
align-items: center !important;
|
||
gap: 8px !important;
|
||
color: var(--ink-secondary) !important;
|
||
background: transparent !important;
|
||
text-decoration: none !important;
|
||
padding: 5px 10px !important;
|
||
border-radius: var(--r-sm) !important;
|
||
border: none !important;
|
||
font-family: var(--font-sans) !important;
|
||
font-size: 13px !important;
|
||
font-weight: 500 !important;
|
||
line-height: 1.3 !important;
|
||
cursor: pointer !important;
|
||
transition: background 0.12s ease, color 0.12s ease;
|
||
}
|
||
#datatools-sticky-footer .datatools-footer-btn:hover {
|
||
background: rgba(0, 0, 0, 0.04) !important;
|
||
color: var(--ink) !important;
|
||
}
|
||
/* The icon ligature span inside each button — Material Symbols, 16px,
|
||
inherits the surrounding ink color so hover-tint propagates. */
|
||
#datatools-sticky-footer .datatools-footer-btn .dt-mui {
|
||
font-family: "Material Symbols Outlined" !important;
|
||
font-size: 16px !important;
|
||
font-weight: 400 !important;
|
||
font-feature-settings: normal !important;
|
||
letter-spacing: 0 !important;
|
||
line-height: 1 !important;
|
||
}
|
||
/* Close — danger tint stays as a hint but the chrome is otherwise
|
||
identical to Help. */
|
||
#datatools-sticky-footer .datatools-footer-btn.close {
|
||
color: var(--danger) !important;
|
||
}
|
||
#datatools-sticky-footer .datatools-footer-btn.close:hover {
|
||
background: var(--danger-fill) !important;
|
||
color: var(--danger) !important;
|
||
}
|
||
#datatools-help-popover {
|
||
position: fixed !important;
|
||
left: 0.75rem !important;
|
||
bottom: 44px !important;
|
||
background: white !important;
|
||
border: 1px solid rgba(49, 51, 63, 0.25) !important;
|
||
border-radius: 0.5rem !important;
|
||
box-shadow: 0 8px 20px rgba(0,0,0,0.12) !important;
|
||
padding: 0.75rem 0.9rem !important;
|
||
z-index: 2147483647 !important;
|
||
font-family: system-ui, -apple-system, sans-serif !important;
|
||
font-size: 13px !important;
|
||
color: rgb(38, 39, 48) !important;
|
||
min-width: 220px !important;
|
||
max-width: 320px !important;
|
||
}
|
||
#datatools-help-popover[hidden] { display: none !important; }
|
||
#datatools-help-popover .dt-help-title {
|
||
font-weight: 600 !important;
|
||
margin-bottom: 0.35rem !important;
|
||
}
|
||
#datatools-help-popover .dt-help-row {
|
||
margin: 0.15rem 0 !important;
|
||
line-height: 1.4 !important;
|
||
}
|
||
#datatools-help-popover .dt-help-row.dt-help-sub {
|
||
color: rgb(90, 95, 110) !important;
|
||
font-size: 12px !important;
|
||
margin-left: 0.65rem !important;
|
||
}
|
||
#datatools-help-popover .dt-help-key {
|
||
color: rgb(90, 95, 110) !important;
|
||
font-weight: 500 !important;
|
||
}
|
||
#datatools-help-popover .dt-help-path {
|
||
display: block !important;
|
||
font-family: var(--font-mono, ui-monospace, Menlo, Consolas, monospace) !important;
|
||
font-size: 11px !important;
|
||
color: rgb(38, 39, 48) !important;
|
||
background: rgba(0, 0, 0, 0.04) !important;
|
||
padding: 2px 6px !important;
|
||
border-radius: 3px !important;
|
||
word-break: break-all !important;
|
||
user-select: all !important;
|
||
}
|
||
#datatools-help-popover .dt-help-row a {
|
||
color: rgb(0, 102, 204) !important;
|
||
text-decoration: none !important;
|
||
}
|
||
#datatools-help-popover .dt-help-row a:hover {
|
||
text-decoration: underline !important;
|
||
}
|
||
#datatools-help-popover .dt-help-dismiss {
|
||
margin-top: 0.5rem !important;
|
||
font-size: 11px !important;
|
||
color: rgb(90, 95, 110) !important;
|
||
background: none !important;
|
||
border: none !important;
|
||
cursor: pointer !important;
|
||
padding: 0 !important;
|
||
}
|
||
#datatools-help-popover .dt-help-dismiss:hover {
|
||
color: rgb(38, 39, 48) !important;
|
||
}
|
||
/* Hide the sticky-footer's helper st.page_link off-screen but
|
||
keep it in the DOM + clickable. The footer's Close button
|
||
dispatches a programmatic click on this link so navigation uses
|
||
Streamlit's soft nav (preserves the websocket, no visible page
|
||
reload) instead of the browser hard-nav an ``<a href="./close">``
|
||
would trigger. Off-screen (rather than ``display:none``) so
|
||
React event delegation works reliably across browsers.
|
||
|
||
NOTE on the selector: Streamlit's page_link renders an outer
|
||
wrapper div with ``data-testid="stPageLink"`` and an inner anchor
|
||
with ``data-testid="stPageLink-NavLink"`` — the NavLink suffix
|
||
is required to match the anchor (the bare testid is on the
|
||
wrapper). ``href*="close"`` works across both root (``/close``)
|
||
and base-path (``/myapp/close``) deployments. */
|
||
[data-testid="stElementContainer"]:has(a[data-testid="stPageLink-NavLink"][href*="close"]) {
|
||
position: absolute !important;
|
||
left: -9999px !important;
|
||
top: -9999px !important;
|
||
width: 1px !important;
|
||
height: 1px !important;
|
||
overflow: hidden !important;
|
||
opacity: 0 !important;
|
||
pointer-events: none !important;
|
||
}
|
||
/* Defensive fallback for browsers without :has() — at least
|
||
shrink the inline page_link so it doesn't render a visible row.
|
||
Same testid note as above. */
|
||
a[data-testid="stPageLink-NavLink"][href*="close"] {
|
||
visibility: hidden !important;
|
||
height: 0 !important;
|
||
padding: 0 !important;
|
||
margin: 0 !important;
|
||
}
|
||
</style>
|
||
""",
|
||
unsafe_allow_html=True,
|
||
)
|
||
|
||
# Hidden Streamlit page_link to the close page. The footer's
|
||
# Close button programmatically clicks the anchor this renders,
|
||
# which triggers Streamlit's soft navigation (same code path
|
||
# the previous sidebar Close entry used). The link is positioned
|
||
# off-screen via the CSS above so it doesn't take page space
|
||
# but remains reachable to the JS click dispatch.
|
||
#
|
||
# Wrapped because ``st.page_link`` raises ``KeyError('url_pathname')``
|
||
# under ``AppTest`` (the test harness does not populate the page-nav
|
||
# session keys ``page_link`` needs to mark itself active/inactive).
|
||
# The JS click handler has a hard-nav fallback when this helper
|
||
# link isn't present, so a failure here only costs the soft-nav
|
||
# optimization — Close still works.
|
||
try:
|
||
st.page_link(
|
||
"pages/99_Close.py",
|
||
label=_t("footer.close"),
|
||
)
|
||
except Exception:
|
||
pass
|
||
|
||
st.iframe(
|
||
f"""
|
||
<script>
|
||
(function () {{
|
||
var labels = {_json.dumps({
|
||
"close": close_label,
|
||
"help": help_label,
|
||
"popover_html": popover_html,
|
||
})};
|
||
function build(doc) {{
|
||
var prev = doc.getElementById('datatools-sticky-footer');
|
||
if (prev) prev.remove();
|
||
var prevPop = doc.getElementById('datatools-help-popover');
|
||
if (prevPop) prevPop.remove();
|
||
|
||
var div = doc.createElement('div');
|
||
div.id = 'datatools-sticky-footer';
|
||
|
||
// Build a button with a Material-Symbols ligature icon + label,
|
||
// matching the sidebar nav-link layout.
|
||
function makeFooterBtn(cls, iconName, label) {{
|
||
var btn = doc.createElement('button');
|
||
btn.type = 'button';
|
||
btn.className = 'datatools-footer-btn ' + cls;
|
||
var icon = doc.createElement('span');
|
||
icon.className = 'dt-mui';
|
||
icon.textContent = iconName;
|
||
btn.appendChild(icon);
|
||
btn.appendChild(doc.createTextNode(label));
|
||
return btn;
|
||
}}
|
||
var helpBtn = makeFooterBtn('help', 'help_outline', labels.help);
|
||
var closeBtn = makeFooterBtn('close', 'power_settings_new', labels.close);
|
||
// Soft-nav via the hidden ``st.page_link`` that
|
||
// ``render_sticky_footer`` injects. Streamlit owns its click
|
||
// handler and will route through ``st.switch_page`` (same
|
||
// code path the old sidebar Close entry used) — no full-page
|
||
// reload, no websocket churn. Fall back to a hard nav if the
|
||
// helper link hasn't rendered yet (first paint race) so the
|
||
// button is never a no-op.
|
||
//
|
||
// The page_link's anchor uses ``data-testid="stPageLink-NavLink"``
|
||
// (the outer wrapper div carries the bare ``stPageLink`` testid;
|
||
// dispatching click on the wrapper doesn't fire Streamlit's
|
||
// React onClick handler). ``href*="close"`` covers both root
|
||
// (/close) and base-path (e.g. /myapp/close) deployments.
|
||
closeBtn.addEventListener('click', function (e) {{
|
||
e.preventDefault();
|
||
var helper = doc.querySelector(
|
||
'a[data-testid="stPageLink-NavLink"][href*="close"]'
|
||
);
|
||
if (helper) {{
|
||
helper.click();
|
||
return;
|
||
}}
|
||
// Hard-nav fallback. ``window`` inside this script is the
|
||
// component iframe's window — changing ITS location only
|
||
// navigates the iframe (which lives in srcdoc and is
|
||
// invisible). Use the parent doc's location so the whole
|
||
// app navigates.
|
||
var topWin = (doc.defaultView) || window.parent || window.top || window;
|
||
try {{ topWin.location.href = './close'; }}
|
||
catch (err) {{ window.top.location.href = './close'; }}
|
||
}});
|
||
|
||
div.appendChild(helpBtn);
|
||
div.appendChild(closeBtn);
|
||
|
||
var pop = doc.createElement('div');
|
||
pop.id = 'datatools-help-popover';
|
||
pop.hidden = true;
|
||
pop.innerHTML = labels.popover_html;
|
||
|
||
helpBtn.addEventListener('click', function (e) {{
|
||
e.preventDefault();
|
||
pop.hidden = !pop.hidden;
|
||
}});
|
||
pop.querySelector('.dt-help-dismiss').addEventListener('click', function () {{
|
||
pop.hidden = true;
|
||
}});
|
||
doc.addEventListener('click', function (e) {{
|
||
if (pop.hidden) return;
|
||
if (pop.contains(e.target) || helpBtn.contains(e.target)) return;
|
||
pop.hidden = true;
|
||
}});
|
||
|
||
doc.body.appendChild(div);
|
||
doc.body.appendChild(pop);
|
||
|
||
// Push the footer's left edge to start at the sidebar's right
|
||
// edge so its near-white background doesn't paint over the
|
||
// sidebar's gray and read as a visible "white bar" along the
|
||
// bottom of the sidebar. Same offset for the help popover.
|
||
// Re-measure whenever the sidebar resizes (collapse/expand)
|
||
// so the offset tracks the live layout.
|
||
var sb = doc.querySelector('[data-testid="stSidebar"]');
|
||
function applyOffset() {{
|
||
var off = 0;
|
||
if (sb) {{
|
||
var r = sb.getBoundingClientRect();
|
||
// If the sidebar is collapsed it can have width 0 OR be
|
||
// pushed off-screen (negative right). Clamp to 0 so the
|
||
// footer goes flush-left in that case.
|
||
off = Math.max(0, Math.round(r.right));
|
||
}}
|
||
// ``!important`` because the CSS rule for ``left`` is itself
|
||
// ``!important`` and would otherwise win over plain inline.
|
||
div.style.setProperty('left', off + 'px', 'important');
|
||
pop.style.setProperty('left', (off + 12) + 'px', 'important');
|
||
}}
|
||
applyOffset();
|
||
if (sb && 'ResizeObserver' in window) {{
|
||
try {{ new ResizeObserver(applyOffset).observe(sb); }} catch (e) {{}}
|
||
}}
|
||
// Also re-measure on window resize as a belt-and-suspenders
|
||
// — handles viewport changes that don't trigger the sidebar's
|
||
// own resize event.
|
||
var win = doc.defaultView || window.parent || window;
|
||
try {{ win.addEventListener('resize', applyOffset); }} catch (e) {{}}
|
||
}}
|
||
try {{
|
||
build(window.parent.document);
|
||
}} catch (e) {{
|
||
build(document);
|
||
}}
|
||
}})();
|
||
</script>
|
||
""",
|
||
height=1,
|
||
)
|
||
|
||
|
||
def render_tool_header(tool_id: str) -> None:
|
||
"""Title row with an inline Help popover anchored to the right.
|
||
|
||
Replaces the bare ``st.title(...)`` + ``st.caption(...)`` block on
|
||
each tool page. Help content is one markdown blob per tool in the
|
||
i18n pack (``tools.<id>.help_md``) so editors can tweak copy without
|
||
touching Python. The popover is anchored next to the title rather
|
||
than the caption so it reads as part of the page header.
|
||
|
||
Layout: ``[title | help button]`` over ``[caption]``. The help
|
||
column is narrow; the title gets the rest. Vertical alignment is
|
||
left to Streamlit's column default (top) — works on 1.35+ without
|
||
the ``vertical_alignment`` kwarg that landed later.
|
||
"""
|
||
col_title, col_help = st.columns([10, 1])
|
||
with col_title:
|
||
st.title(_t(f"tools.{tool_id}.page_title"))
|
||
with col_help:
|
||
# Spacer pushes the popover button down so it sits closer to
|
||
# the title's baseline than to its top — without the spacer the
|
||
# button floats above the big title text.
|
||
st.write("")
|
||
body = _t(f"tools.{tool_id}.help_md")
|
||
if body.startswith("tools."):
|
||
body = _t("help.missing_body")
|
||
with st.popover(
|
||
_t("help.button_label"),
|
||
icon=":material/help_outline:",
|
||
use_container_width=True,
|
||
):
|
||
st.markdown(body)
|
||
st.caption(_t(f"tools.{tool_id}.page_caption"))
|
||
|
||
|
||
def _render_sticky_footer_DISABLED() -> None:
|
||
"""Slim fixed-position footer at the bottom of the viewport.
|
||
|
||
Contains a "Back to Home" link that's always visible regardless of
|
||
scroll position. The footer is mounted as a direct child of
|
||
``<body>`` via a component-iframe script so it lives OUTSIDE every
|
||
Streamlit container — that matters because ``.stApp`` carries
|
||
``zoom: 0.85`` (our compact-layout scaler) and Streamlit's content
|
||
columns add their own padding/positioning context that previously
|
||
swallowed the in-place ``st.markdown`` footer.
|
||
|
||
The implementation is two-pass:
|
||
|
||
1. ``st.markdown`` injects the CSS rules into the parent document.
|
||
Class-targeted, so the rules apply once the footer DOM node
|
||
exists regardless of where it lives.
|
||
2. ``st.iframe`` renders a zero-height iframe
|
||
whose JS reaches ``window.parent.document`` and creates / moves
|
||
a ``#datatools-sticky-footer`` div directly under ``<body>``.
|
||
This bypasses every Streamlit container.
|
||
|
||
The anchor uses ``href="home"`` (relative) so Streamlit's URL
|
||
routing resolves it to the Home page and the link works correctly
|
||
behind a reverse proxy or non-root mount.
|
||
"""
|
||
import html as _html
|
||
import json as _json
|
||
label_raw = _t("nav.back_to_home")
|
||
label_esc = _html.escape(label_raw)
|
||
|
||
# CSS rules live in the parent document. Class selector so a
|
||
# re-rendered/relocated footer div picks them up automatically.
|
||
st.markdown(
|
||
"""
|
||
<style>
|
||
[data-testid="stAppViewBlockContainer"] {
|
||
padding-bottom: 4rem !important;
|
||
}
|
||
#datatools-sticky-footer {
|
||
position: fixed !important;
|
||
bottom: 0 !important;
|
||
left: 0 !important;
|
||
right: 0 !important;
|
||
background: rgba(255, 255, 255, 0.97) !important;
|
||
backdrop-filter: blur(8px);
|
||
-webkit-backdrop-filter: blur(8px);
|
||
border-top: 1px solid rgba(49, 51, 63, 0.25) !important;
|
||
padding: 0.5rem 1.25rem !important;
|
||
z-index: 2147483646 !important;
|
||
display: flex !important;
|
||
align-items: center !important;
|
||
justify-content: flex-start !important;
|
||
font-family: system-ui, -apple-system, sans-serif !important;
|
||
box-sizing: border-box !important;
|
||
}
|
||
#datatools-sticky-footer a.datatools-sticky-footer-link {
|
||
display: inline-block !important;
|
||
color: rgb(38, 39, 48) !important;
|
||
text-decoration: none !important;
|
||
padding: 0.4rem 0.9rem !important;
|
||
border-radius: 0.5rem !important;
|
||
border: 1px solid rgba(49, 51, 63, 0.28) !important;
|
||
background: rgb(240, 242, 246) !important;
|
||
font-size: 14px !important;
|
||
font-weight: 500 !important;
|
||
line-height: 1.4 !important;
|
||
cursor: pointer !important;
|
||
transition: background 0.12s ease, border-color 0.12s ease;
|
||
}
|
||
#datatools-sticky-footer a.datatools-sticky-footer-link:hover {
|
||
background: rgb(225, 228, 235) !important;
|
||
border-color: rgba(49, 51, 63, 0.4) !important;
|
||
}
|
||
#datatools-sticky-footer a.datatools-sticky-footer-link:active {
|
||
background: rgb(210, 214, 222) !important;
|
||
}
|
||
</style>
|
||
""",
|
||
unsafe_allow_html=True,
|
||
)
|
||
|
||
# Move the footer to <body> directly via component iframe. The
|
||
# iframe carries allow-same-origin so window.parent.document is
|
||
# reachable; if a sandbox config ever blocks that we fall back to
|
||
# rendering inside the iframe itself (still visible, just sized
|
||
# to the iframe rather than the viewport).
|
||
st.iframe(
|
||
f"""
|
||
<script>
|
||
(function () {{
|
||
var label = {_json.dumps(label_raw)};
|
||
function build(doc) {{
|
||
var prev = doc.getElementById('datatools-sticky-footer');
|
||
if (prev) prev.remove();
|
||
var div = doc.createElement('div');
|
||
div.id = 'datatools-sticky-footer';
|
||
var a = doc.createElement('a');
|
||
a.className = 'datatools-sticky-footer-link';
|
||
// Navigate to the app root (``/``) instead of ``/home``. The
|
||
// home page is registered with ``default=True``, which serves
|
||
// it at the root URL. ``/home`` is NOT a recognized URL on
|
||
// every Streamlit minor version even with ``url_path="home"``
|
||
// — some builds reserve the alias only for non-default pages.
|
||
// Using ``./`` is robust against both: it resolves to the
|
||
// current document's directory, which on a single-segment
|
||
// tool-page URL like ``/01_deduplicator`` is the server root.
|
||
a.href = './';
|
||
a.target = '_self';
|
||
a.textContent = label;
|
||
div.appendChild(a);
|
||
return div;
|
||
}}
|
||
try {{
|
||
var doc = window.parent.document;
|
||
doc.body.appendChild(build(doc));
|
||
}} catch (e) {{
|
||
document.body.appendChild(build(document));
|
||
}}
|
||
}})();
|
||
</script>
|
||
""",
|
||
height=1,
|
||
)
|
||
|
||
|
||
def back_to_home_link(*, key: str = "_back_to_home_link") -> None:
|
||
"""Render a "← Back to Home" affordance on a tool page.
|
||
|
||
Tool pages reached from the home findings panel benefit from an
|
||
explicit return-to-home control so a user working through findings
|
||
on multiple uploaded files can hop between files without hunting
|
||
through the sidebar. Call this twice on each tool page — once
|
||
near the top (default key) and once at the bottom with
|
||
``key="_back_to_home_link_bottom"`` so the control stays reachable
|
||
after the user scrolls through long results.
|
||
|
||
Implementation: ``st.switch_page`` under ``st.navigation`` requires
|
||
either a file path to a page in ``pages/`` or a ``StreamlitPage``
|
||
object whose script identity matches one registered in the nav.
|
||
The entry script ``app.py`` is the nav manager itself — it cannot
|
||
be switched-to by filename. So we import the home callable from
|
||
``src.gui.app`` and rebuild the same ``st.Page`` registration here.
|
||
Streamlit identifies pages by the underlying callable's qualified
|
||
name, so a freshly-constructed Page resolves to the registered one.
|
||
"""
|
||
if st.button(_t("nav.back_to_home"), key=key, type="secondary"):
|
||
# Import from the renderer module (not from app.py — importing
|
||
# app.py would re-execute its navigation setup with the wrong
|
||
# "main script" context and blow up the pages/ path resolution).
|
||
from src.gui._home import _home_page
|
||
st.switch_page(
|
||
st.Page(_home_page, title="Home", icon="🧹", url_path="home"),
|
||
)
|
||
|
||
|
||
def shutdown_app() -> None:
|
||
"""Terminate the Streamlit server immediately, no confirm.
|
||
|
||
Designed to be called from a page whose mere act of rendering means
|
||
the user wants to quit (e.g., the sidebar Close entry). Schedules
|
||
``os._exit(0)`` on a daemon thread so the process terminates after
|
||
the farewell overlay has had a chance to paint, then injects the
|
||
overlay JS and short-circuits the rest of the page via ``st.stop``.
|
||
|
||
Streamlit has no first-class shutdown hook, and signalling the
|
||
process (SIGTERM/SIGINT) does not reliably terminate it — Streamlit
|
||
installs its own handlers and the tornado/asyncio loop swallows or
|
||
defers the signal, so the browser sees the websocket drop while the
|
||
python process stays alive. ``os._exit`` is the only reliable kill.
|
||
|
||
The hard-exit thread is skipped under pytest so the test suite does
|
||
not suicide when a test renders this page. The overlay + caption
|
||
still render so test assertions about content work.
|
||
"""
|
||
if not st.session_state.get("_app_shutting_down"):
|
||
st.session_state["_app_shutting_down"] = True
|
||
# Drain the audit log queue to disk before the process dies.
|
||
# Bounded by a 500ms timeout so a stuck disk can't delay
|
||
# shutdown beyond the daemon-thread's own 1s grace period.
|
||
try:
|
||
from src.audit import flush_audit_log, log_event
|
||
log_event("session", "Session ending")
|
||
flush_audit_log(timeout_s=0.5)
|
||
except Exception:
|
||
pass
|
||
if "pytest" not in sys.modules:
|
||
def _hard_exit() -> None:
|
||
time.sleep(1.0)
|
||
os._exit(0)
|
||
|
||
threading.Thread(target=_hard_exit, daemon=True).start()
|
||
|
||
st.iframe(_farewell_script(), height=1)
|
||
st.success(_t("quit.shutting_down"))
|
||
st.stop()
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Config panel (advanced options)
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def config_panel(df: pd.DataFrame) -> dict:
|
||
"""Render the Advanced Options expander. Returns a settings dict.
|
||
|
||
Keys returned:
|
||
strategies: list[MatchStrategy] | None
|
||
survivor_rule: SurvivorRule
|
||
date_column: str | None
|
||
merge: bool
|
||
"""
|
||
columns = list(df.columns)
|
||
|
||
with st.expander("Advanced Options"):
|
||
col_left, col_right = st.columns(2)
|
||
|
||
with col_left:
|
||
subset_cols = st.multiselect(
|
||
"Match on columns",
|
||
columns,
|
||
default=[],
|
||
help="Leave empty to auto-detect based on column names.",
|
||
)
|
||
key_cols = st.multiselect(
|
||
"Strong keys",
|
||
columns,
|
||
default=[],
|
||
help="Columns that uniquely identify records (e.g., EIN, SKU). Each is an independent exact-match strategy.",
|
||
)
|
||
fuzzy_cols = st.multiselect(
|
||
"Fuzzy columns",
|
||
columns,
|
||
default=[],
|
||
help="Columns to fuzzy-match. Others use exact matching.",
|
||
)
|
||
|
||
with col_right:
|
||
algorithm = st.selectbox(
|
||
"Fuzzy algorithm",
|
||
["jaro_winkler", "levenshtein", "token_set_ratio"],
|
||
index=0,
|
||
help="jaro_winkler: best for names. levenshtein: best for typos. token_set_ratio: best for addresses.",
|
||
)
|
||
threshold = st.slider(
|
||
"Similarity threshold",
|
||
min_value=50,
|
||
max_value=100,
|
||
value=85,
|
||
help="Lower = more matches but more false positives.",
|
||
)
|
||
survivor = st.selectbox(
|
||
"Survivor rule",
|
||
["first", "last", "most-complete", "most-recent"],
|
||
index=0,
|
||
help="Which row to keep when duplicates are found.",
|
||
)
|
||
|
||
# Second row of options
|
||
col_a, col_b = st.columns(2)
|
||
|
||
with col_a:
|
||
normalize_options = {c: "auto" for c in columns}
|
||
normalizer_types = ["auto", "email", "phone", "name", "address", "string", "none"]
|
||
|
||
normalize_map: dict[str, str] = {}
|
||
if fuzzy_cols or subset_cols:
|
||
target_cols = fuzzy_cols or subset_cols
|
||
st.markdown("**Per-column normalizers**")
|
||
for col_name in target_cols:
|
||
norm = st.selectbox(
|
||
f"Normalizer for '{col_name}'",
|
||
normalizer_types,
|
||
index=0,
|
||
key=f"norm_{col_name}",
|
||
)
|
||
if norm not in ("auto", "none"):
|
||
normalize_map[col_name] = norm
|
||
|
||
with col_b:
|
||
merge = st.checkbox(
|
||
"Merge mode",
|
||
value=False,
|
||
help="Fill missing fields in the surviving row from removed duplicates.",
|
||
)
|
||
date_column: Optional[str] = None
|
||
if survivor == "most-recent":
|
||
date_column = st.selectbox(
|
||
"Date column",
|
||
columns,
|
||
help="Required for most-recent survivor rule.",
|
||
)
|
||
|
||
# Config save/load
|
||
st.divider()
|
||
cfg_left, cfg_right = st.columns(2)
|
||
|
||
with cfg_left:
|
||
config_file = st.file_uploader(
|
||
"Load config profile",
|
||
type=["json"],
|
||
help="Load previously saved settings.",
|
||
key="config_upload",
|
||
)
|
||
if config_file is not None:
|
||
import json
|
||
try:
|
||
data = json.loads(config_file.read())
|
||
loaded = DeduplicationConfig.from_dict(data)
|
||
st.session_state["loaded_config"] = loaded
|
||
st.success("Config loaded.")
|
||
except Exception as e:
|
||
st.error(f"Failed to load config: {e}")
|
||
|
||
with cfg_right:
|
||
if st.button("Save current settings"):
|
||
cfg = _build_config(
|
||
subset_cols, key_cols, fuzzy_cols,
|
||
algorithm, threshold, normalize_map,
|
||
survivor, date_column, merge,
|
||
)
|
||
cfg_json = cfg.to_dict()
|
||
import json
|
||
html_download_button(
|
||
"Download config JSON",
|
||
json.dumps(cfg_json, indent=2).encode("utf-8"),
|
||
file_name="dedup_config.json",
|
||
mime="application/json",
|
||
)
|
||
|
||
# Build strategies from selections
|
||
strategies = _build_strategies(
|
||
subset_cols, key_cols, fuzzy_cols,
|
||
algorithm, threshold, normalize_map,
|
||
)
|
||
|
||
# Survivor rule mapping
|
||
survivor_map = {
|
||
"first": SurvivorRule.KEEP_FIRST,
|
||
"last": SurvivorRule.KEEP_LAST,
|
||
"most-complete": SurvivorRule.KEEP_MOST_COMPLETE,
|
||
"most-recent": SurvivorRule.KEEP_MOST_RECENT,
|
||
}
|
||
|
||
return {
|
||
"strategies": strategies,
|
||
"survivor_rule": survivor_map[survivor],
|
||
"date_column": date_column,
|
||
"merge": merge,
|
||
}
|
||
|
||
|
||
def _build_strategies(
|
||
subset_cols: list[str],
|
||
key_cols: list[str],
|
||
fuzzy_cols: list[str],
|
||
algorithm: str,
|
||
threshold: int,
|
||
normalize_map: dict[str, str],
|
||
) -> Optional[list[MatchStrategy]]:
|
||
"""Build MatchStrategy list from GUI selections. Returns None for auto-detect."""
|
||
strategies: list[MatchStrategy] = []
|
||
|
||
# If user selected columns explicitly, build from those
|
||
if subset_cols or fuzzy_cols:
|
||
target_cols = subset_cols if subset_cols else fuzzy_cols
|
||
fuzzy_set = set(fuzzy_cols)
|
||
col_strats: list[ColumnMatchStrategy] = []
|
||
for col in target_cols:
|
||
norm = None
|
||
if col in normalize_map:
|
||
norm = NormalizerType(normalize_map[col])
|
||
if col in fuzzy_set:
|
||
algo = Algorithm(algorithm)
|
||
thresh = float(threshold)
|
||
else:
|
||
algo = Algorithm.EXACT
|
||
thresh = 100.0
|
||
col_strats.append(ColumnMatchStrategy(
|
||
column=col, algorithm=algo, threshold=thresh, normalizer=norm,
|
||
))
|
||
strategies.append(MatchStrategy(column_strategies=col_strats))
|
||
|
||
# Add strong key strategies
|
||
if key_cols:
|
||
for col in key_cols:
|
||
strategies.append(MatchStrategy(column_strategies=[
|
||
ColumnMatchStrategy(column=col, algorithm=Algorithm.EXACT, threshold=100.0)
|
||
]))
|
||
|
||
return strategies if strategies else None
|
||
|
||
|
||
def _build_config(
|
||
subset_cols, key_cols, fuzzy_cols,
|
||
algorithm, threshold, normalize_map,
|
||
survivor, date_column, merge,
|
||
) -> DeduplicationConfig:
|
||
"""Build a DeduplicationConfig from GUI state."""
|
||
cfg = DeduplicationConfig(
|
||
survivor_rule=survivor.replace("-", "_"),
|
||
date_column=date_column,
|
||
merge=merge,
|
||
subset_columns=subset_cols or None,
|
||
fuzzy_columns=fuzzy_cols or None,
|
||
default_algorithm=algorithm,
|
||
default_threshold=float(threshold),
|
||
normalize_map=normalize_map or None,
|
||
)
|
||
strategies = _build_strategies(
|
||
subset_cols, key_cols, fuzzy_cols,
|
||
algorithm, threshold, normalize_map,
|
||
)
|
||
if strategies:
|
||
cfg.strategies = [
|
||
StrategyConfig(columns=[
|
||
ColumnStrategyConfig(
|
||
column=cs.column,
|
||
algorithm=cs.algorithm.value,
|
||
threshold=cs.threshold,
|
||
normalizer=cs.normalizer.value if cs.normalizer else None,
|
||
)
|
||
for cs in s.column_strategies
|
||
])
|
||
for s in strategies
|
||
]
|
||
return cfg
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Match group review card
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def _find_differing_cols(
|
||
group: MatchResult, df: pd.DataFrame, display_cols: list[str],
|
||
) -> list[str]:
|
||
"""Return columns where values differ across rows in the group."""
|
||
differing = []
|
||
for col in display_cols:
|
||
values = set()
|
||
for idx in group.row_indices:
|
||
values.add(str(df.iloc[idx].get(col, "")).strip())
|
||
if len(values) > 1:
|
||
differing.append(col)
|
||
return differing
|
||
|
||
|
||
def match_group_card(
|
||
group: MatchResult,
|
||
df: pd.DataFrame,
|
||
group_num: int,
|
||
) -> None:
|
||
"""Render an expandable match group card with side-by-side diff.
|
||
|
||
Users select which rows to keep via checkboxes. When exactly one row
|
||
is kept they can also cherry-pick column values from the other rows.
|
||
|
||
Decision format stored in ``st.session_state["review_decisions"]``::
|
||
|
||
{group_id: {"keep_indices": [int, ...], "overrides": {col: val}}}
|
||
"""
|
||
confidence = group.confidence
|
||
matched_on = ", ".join(group.matched_on)
|
||
n_rows = len(group.row_indices)
|
||
gid = group.group_id
|
||
|
||
decisions = st.session_state.get("review_decisions", {})
|
||
has_decision = gid in decisions
|
||
decision_dict = decisions.get(gid, {})
|
||
keep_indices = decision_dict.get("keep_indices", []) if has_decision else []
|
||
overrides = decision_dict.get("overrides", {}) if has_decision else {}
|
||
|
||
# Build label — append decision status if already decided
|
||
label = (
|
||
f"Group {group_num}: {n_rows} rows "
|
||
f"(confidence: {confidence:.0f}%) "
|
||
f"[{matched_on}]"
|
||
)
|
||
if has_decision:
|
||
if len(keep_indices) == n_rows:
|
||
label += " — Kept All"
|
||
elif len(keep_indices) == 1:
|
||
label += " — Merged (customized)" if overrides else " — Merged"
|
||
else:
|
||
label += f" — Split (kept {len(keep_indices)} of {n_rows})"
|
||
|
||
# Decided groups collapse; undecided groups stay open
|
||
expanded = not has_decision
|
||
|
||
display_cols = [c for c in df.columns if not str(c).startswith("_norm_")]
|
||
differing_cols = _find_differing_cols(group, df, display_cols)
|
||
|
||
with st.expander(label, expanded=expanded):
|
||
if has_decision:
|
||
# --- Decided state: read-only table with diff highlighting ---
|
||
rows_data = []
|
||
for idx in group.row_indices:
|
||
row = {"Row": idx + 1}
|
||
for col in display_cols:
|
||
row[col] = df.iloc[idx].get(col, "")
|
||
rows_data.append(row)
|
||
compare_df = pd.DataFrame(rows_data).set_index("Row")
|
||
|
||
def _highlight_diffs(s: pd.Series) -> list[str]:
|
||
styles = []
|
||
first_val = str(s.iloc[0]).strip() if len(s) > 0 else ""
|
||
for val in s:
|
||
val_str = str(val).strip()
|
||
if val_str != first_val and val_str and first_val:
|
||
styles.append(
|
||
"background-color: rgba(245, 166, 35, 0.2)"
|
||
)
|
||
elif not val_str and first_val:
|
||
styles.append(
|
||
"background-color: rgba(240, 82, 82, 0.1)"
|
||
)
|
||
else:
|
||
styles.append("")
|
||
return styles
|
||
|
||
styled = compare_df.style.apply(_highlight_diffs, axis=0)
|
||
st.dataframe(styled, width="stretch")
|
||
|
||
if len(keep_indices) == n_rows:
|
||
st.info("Decision: Kept All")
|
||
elif len(keep_indices) == 1:
|
||
msg = "Decision: Merge"
|
||
if overrides:
|
||
msg += f" ({len(overrides)} column(s) customized)"
|
||
st.success(msg)
|
||
else:
|
||
kept = ", ".join(str(i + 1) for i in sorted(keep_indices))
|
||
st.success(
|
||
f"Decision: Keep rows {kept} "
|
||
f"(removing {n_rows - len(keep_indices)})"
|
||
)
|
||
|
||
def _undo(g=gid):
|
||
st.session_state["review_decisions"].pop(g, None)
|
||
st.session_state.pop(f"editor_{g}", None)
|
||
|
||
st.button("Undo", key=f"undo_{gid}", on_click=_undo)
|
||
|
||
else:
|
||
# --- Undecided: interactive editor with inline checkboxes & dropdowns ---
|
||
editor_rows = []
|
||
for idx in group.row_indices:
|
||
row_data = {"Keep": idx == group.survivor_index, "Row": idx + 1}
|
||
for col in display_cols:
|
||
row_data[col] = str(df.iloc[idx].get(col, ""))
|
||
editor_rows.append(row_data)
|
||
editor_df = pd.DataFrame(editor_rows)
|
||
|
||
col_config = {
|
||
"Keep": st.column_config.CheckboxColumn(
|
||
"Keep", default=True, width="small",
|
||
),
|
||
"Row": st.column_config.NumberColumn("Row", width="small"),
|
||
}
|
||
for col in differing_cols:
|
||
vals = []
|
||
for idx in group.row_indices:
|
||
v = str(df.iloc[idx].get(col, "")).strip()
|
||
if v not in vals:
|
||
vals.append(v)
|
||
if "" not in vals:
|
||
vals.append("")
|
||
col_config[col] = st.column_config.SelectboxColumn(
|
||
col, options=vals, required=False,
|
||
)
|
||
|
||
disabled_cols = ["Row"] + [
|
||
c for c in display_cols if c not in differing_cols
|
||
]
|
||
|
||
edited = st.data_editor(
|
||
editor_df,
|
||
column_config=col_config,
|
||
disabled=disabled_cols,
|
||
width="stretch",
|
||
hide_index=True,
|
||
key=f"editor_{gid}",
|
||
)
|
||
|
||
# Read which rows are checked
|
||
checked = [
|
||
idx
|
||
for i, idx in enumerate(group.row_indices)
|
||
if edited.iloc[i]["Keep"]
|
||
]
|
||
|
||
if differing_cols:
|
||
st.caption(
|
||
f"Columns with differences (editable): "
|
||
f"{', '.join(differing_cols)}"
|
||
)
|
||
|
||
# Status + surviving rows preview
|
||
if len(checked) == 0:
|
||
st.warning("Select at least one row to keep.")
|
||
else:
|
||
if len(checked) == n_rows:
|
||
st.caption("Keeping all rows (no duplicates removed)")
|
||
elif len(checked) == 1:
|
||
st.caption(
|
||
f"Merging into Row {checked[0] + 1}, "
|
||
f"removing {n_rows - 1} row(s)"
|
||
)
|
||
else:
|
||
st.caption(
|
||
f"Keeping {len(checked)} rows, "
|
||
f"removing {n_rows - len(checked)}"
|
||
)
|
||
|
||
# Build preview of surviving rows with edits applied
|
||
checked_positions = [
|
||
i for i, idx in enumerate(group.row_indices)
|
||
if idx in checked
|
||
]
|
||
preview = edited.iloc[checked_positions].drop(
|
||
columns=["Keep"],
|
||
).reset_index(drop=True)
|
||
st.markdown("**Surviving rows preview:**")
|
||
st.dataframe(preview, width="stretch", hide_index=True)
|
||
|
||
# Confirm
|
||
def _on_confirm(
|
||
g=gid, indices=list(group.row_indices),
|
||
diff=differing_cols, surv=group.survivor_index,
|
||
):
|
||
editor_state = st.session_state.get(f"editor_{g}", {})
|
||
ed_rows = editor_state.get("edited_rows", {})
|
||
|
||
# Determine which rows to keep
|
||
keep = []
|
||
for i, idx in enumerate(indices):
|
||
changes = ed_rows.get(i, {})
|
||
default_keep = idx == surv
|
||
if changes.get("Keep", default_keep):
|
||
keep.append(idx)
|
||
if not keep:
|
||
keep = list(indices)
|
||
|
||
# Column overrides (single-survivor merge only)
|
||
ovr: dict[str, str] = {}
|
||
if len(keep) == 1:
|
||
surv_idx = keep[0]
|
||
surv_pos = indices.index(surv_idx)
|
||
surv_changes = ed_rows.get(surv_pos, {})
|
||
the_df = st.session_state["df"]
|
||
for c in diff:
|
||
if c in surv_changes:
|
||
new_val = (
|
||
str(surv_changes[c])
|
||
if surv_changes[c] is not None
|
||
else ""
|
||
)
|
||
orig = str(
|
||
the_df.iloc[surv_idx].get(c, "")
|
||
).strip()
|
||
if new_val.strip() != orig:
|
||
ovr[c] = new_val
|
||
|
||
st.session_state["review_decisions"][g] = {
|
||
"keep_indices": keep,
|
||
"overrides": ovr,
|
||
}
|
||
|
||
st.button(
|
||
"Confirm",
|
||
key=f"confirm_{gid}",
|
||
type="primary",
|
||
on_click=_on_confirm,
|
||
disabled=(len(checked) == 0),
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Results summary + downloads
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def results_summary(
|
||
result: DeduplicationResult,
|
||
original_df: pd.DataFrame,
|
||
) -> None:
|
||
"""Render summary stats and download buttons."""
|
||
removed = result.original_row_count - len(result.deduplicated_df)
|
||
|
||
# Summary metrics
|
||
col1, col2, col3, col4 = st.columns(4)
|
||
col1.metric("Rows In", result.original_row_count)
|
||
col2.metric("Rows Out", len(result.deduplicated_df))
|
||
col3.metric("Removed", removed)
|
||
col4.metric("Groups", len(result.match_groups))
|
||
|
||
st.divider()
|
||
|
||
# Download buttons
|
||
dl_left, dl_mid, dl_right = st.columns(3)
|
||
|
||
with dl_left:
|
||
csv_bytes = result.deduplicated_df.to_csv(index=False).encode("utf-8-sig")
|
||
html_download_button(
|
||
"Download Deduplicated CSV",
|
||
csv_bytes,
|
||
file_name="deduplicated.csv",
|
||
mime="text/csv",
|
||
)
|
||
|
||
with dl_mid:
|
||
if not result.removed_df.empty:
|
||
removed_bytes = result.removed_df.to_csv(index=False).encode("utf-8-sig")
|
||
html_download_button(
|
||
"Download Removed Rows",
|
||
removed_bytes,
|
||
file_name="removed_rows.csv",
|
||
mime="text/csv",
|
||
)
|
||
|
||
with dl_right:
|
||
if result.match_groups:
|
||
groups_data = _build_match_groups_csv(result, original_df)
|
||
html_download_button(
|
||
"Download Match Groups Report",
|
||
groups_data,
|
||
file_name="match_groups.csv",
|
||
mime="text/csv",
|
||
)
|
||
|
||
|
||
def apply_review_decisions(
|
||
original_df: pd.DataFrame,
|
||
match_groups: list[MatchResult],
|
||
decisions: dict,
|
||
) -> tuple[pd.DataFrame, pd.DataFrame]:
|
||
"""Build final DataFrames by applying user review decisions.
|
||
|
||
Supports three modes per group:
|
||
|
||
- **Merge** (1 row kept): single survivor with optional column overrides.
|
||
- **Split** (some rows kept): selected rows survive, others removed.
|
||
- **Keep all** (all rows kept): no rows removed.
|
||
- **No decision**: engine default (single survivor).
|
||
|
||
Returns ``(deduplicated_df, removed_df)``.
|
||
"""
|
||
remove_indices: set[int] = set()
|
||
row_overrides: dict[int, dict[str, str]] = {}
|
||
|
||
for group in match_groups:
|
||
gid = group.group_id
|
||
decision = decisions.get(gid)
|
||
|
||
# No decision yet — accept with engine defaults
|
||
if decision is None:
|
||
keep = {group.survivor_index}
|
||
else:
|
||
keep = set(decision.get("keep_indices", group.row_indices))
|
||
# Safety: never remove all rows in a group
|
||
if not keep:
|
||
keep = set(group.row_indices)
|
||
|
||
for idx in group.row_indices:
|
||
if idx not in keep:
|
||
remove_indices.add(idx)
|
||
|
||
# Column overrides (only meaningful for single-survivor merge)
|
||
ovr = decision.get("overrides", {}) if decision else {}
|
||
if ovr and len(keep) == 1:
|
||
row_overrides[next(iter(keep))] = ovr
|
||
|
||
# Build output DataFrames
|
||
kept = [i for i in range(len(original_df)) if i not in remove_indices]
|
||
|
||
if row_overrides:
|
||
rows = []
|
||
for i in kept:
|
||
row = original_df.iloc[i].copy()
|
||
if i in row_overrides:
|
||
for col, val in row_overrides[i].items():
|
||
if col in row.index:
|
||
row[col] = val
|
||
rows.append(row)
|
||
deduped = pd.DataFrame(rows).reset_index(drop=True)
|
||
else:
|
||
deduped = original_df.iloc[kept].copy().reset_index(drop=True)
|
||
|
||
removed = (
|
||
original_df.iloc[sorted(remove_indices)].copy().reset_index(drop=True)
|
||
if remove_indices
|
||
else pd.DataFrame()
|
||
)
|
||
|
||
return deduped, removed
|
||
|
||
|
||
def _build_match_groups_csv(
|
||
result: DeduplicationResult,
|
||
original_df: pd.DataFrame,
|
||
) -> bytes:
|
||
"""Build the match groups audit CSV as bytes."""
|
||
rows = []
|
||
for g in result.match_groups:
|
||
for idx in g.row_indices:
|
||
row_data = {
|
||
"_group_id": g.group_id + 1,
|
||
"_is_survivor": idx == g.survivor_index,
|
||
"_confidence": g.confidence,
|
||
"_matched_on": ", ".join(g.matched_on),
|
||
"_original_row": idx + 1,
|
||
}
|
||
for col in original_df.columns:
|
||
if not str(col).startswith("_norm_"):
|
||
row_data[col] = original_df.iloc[idx].get(col, "") if idx < len(original_df) else ""
|
||
rows.append(row_data)
|
||
|
||
groups_df = pd.DataFrame(rows)
|
||
return groups_df.to_csv(index=False).encode("utf-8-sig")
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Analyzer integration (upload-time data quality findings)
|
||
# ---------------------------------------------------------------------------
|
||
|
||
# Tool id -> friendly display name. Single source of truth for the GUI; the
|
||
# CLI keeps its own copy so each entrypoint stays self-contained.
|
||
TOOL_DISPLAY_NAMES: dict[str, str] = {
|
||
"01_deduplicator": "Find Duplicates",
|
||
"02_text_cleaner": "Clean Text",
|
||
"03_format_standardizer": "Standardize Formats",
|
||
"04_missing_handler": "Fix Missing Values",
|
||
"05_column_mapper": "Map Columns",
|
||
"06_outlier_detector": "Find Unusual Values",
|
||
"07_multi_file_merger": "Combine Files",
|
||
"08_validator_reporter": "Quality Check",
|
||
"09_pipeline_runner": "Automated Workflows",
|
||
}
|
||
|
||
_SEVERITY_ICON: dict[str, str] = {
|
||
"info": "ℹ️",
|
||
"warn": "⚠️",
|
||
"error": "🛑",
|
||
}
|
||
|
||
_SEVERITY_COLOR: dict[str, str] = {
|
||
"info": "blue",
|
||
"warn": "orange",
|
||
"error": "red",
|
||
}
|
||
|
||
# Map tool id to the streamlit page path under src/gui/. Skipped tools (no
|
||
# page yet) return empty string and the "Open" button is omitted.
|
||
_TOOL_PAGE_PATHS: dict[str, str] = {
|
||
"01_deduplicator": "pages/1_Deduplicator.py",
|
||
"02_text_cleaner": "pages/2_Text_Cleaner.py",
|
||
"03_format_standardizer": "pages/3_Format_Standardizer.py",
|
||
"04_missing_handler": "pages/4_Missing_Values.py",
|
||
"05_column_mapper": "pages/5_Column_Mapper.py",
|
||
"06_outlier_detector": "pages/6_Outlier_Detector.py",
|
||
"07_multi_file_merger": "pages/7_Multi_File_Merger.py",
|
||
"08_validator_reporter": "pages/8_Validator_Reporter.py",
|
||
"09_pipeline_runner": "pages/9_Pipeline_Runner.py",
|
||
}
|
||
|
||
|
||
def tool_display_name(tool_id: str) -> str:
|
||
"""Map a stable tool id to its GUI display name; falls back to the id.
|
||
|
||
Routes through the active language pack so the home grid, findings
|
||
panel headers, and "Open tool" buttons all stay in sync with the
|
||
sidebar's language selection.
|
||
"""
|
||
if not tool_id:
|
||
return _t("findings.untargeted_label")
|
||
translated = _t(f"tools.{tool_id}.name")
|
||
if translated != f"tools.{tool_id}.name":
|
||
return translated
|
||
return TOOL_DISPLAY_NAMES.get(tool_id, tool_id)
|
||
|
||
|
||
def _tool_page_slug(tool_id: str) -> str:
|
||
return _TOOL_PAGE_PATHS.get(tool_id, "")
|
||
|
||
|
||
def render_findings_panel(
|
||
findings,
|
||
*,
|
||
header: str | None = None,
|
||
key_namespace: str = "",
|
||
) -> None:
|
||
"""Render a per-file findings card matching ``datatools_layout_redesign2.html``.
|
||
|
||
Caller is expected to wrap this in ``st.container(border=True)`` so
|
||
the head + body share one card edge. Output layout (per mockup
|
||
§finding-group):
|
||
|
||
┌───────────────────────────────────────────────────────┐
|
||
│ ● filename.csv [1 warning][2 info] │ ← head
|
||
├───────────────────────────────────────────────────────┤
|
||
│ ⚠ <title> in `col` Open Tool → │ ← row
|
||
│ meta: rows, hint, … │
|
||
│ ────────────────────────────────────────────────────── │
|
||
│ ⓘ <title> in `col` Open Tool → │
|
||
│ … │
|
||
└───────────────────────────────────────────────────────┘
|
||
|
||
The head's severity dot picks the worst severity present; the count
|
||
pills enumerate non-zero severities. Findings are flat-listed
|
||
(sorted error > warn > info), each with a tinted Material-icon
|
||
chip, the description, a mono meta line (affected count + samples
|
||
hint), and a tertiary "Open <Tool> →" link that ``st.switch_page``s
|
||
to the relevant tool page.
|
||
"""
|
||
import html as _html
|
||
import hashlib as _hashlib
|
||
from src.core.text_clean import hidden_char_css
|
||
|
||
if header is None:
|
||
header = _t("findings.header")
|
||
|
||
if not findings:
|
||
st.success(_t("findings.none"))
|
||
return
|
||
|
||
# Inject the hidden-char badge styles once so any sample-preview
|
||
# surface rendered later can show leading/trailing whitespace and
|
||
# invisibles as visible badges. Cheap if already injected.
|
||
st.markdown(hidden_char_css() + _SAMPLE_TABLE_CSS, unsafe_allow_html=True)
|
||
|
||
# Sort severity counts; worst severity drives the head dot.
|
||
by_sev: dict[str, int] = {"error": 0, "warn": 0, "info": 0}
|
||
for f in findings:
|
||
by_sev[f.severity] = by_sev.get(f.severity, 0) + 1
|
||
if by_sev.get("error"):
|
||
worst = "error"
|
||
elif by_sev.get("warn"):
|
||
worst = "warn"
|
||
else:
|
||
worst = "info"
|
||
|
||
pill_labels = {
|
||
"error": ("error", "errors"),
|
||
"warn": ("warning", "warnings"),
|
||
"info": ("info", "info"),
|
||
}
|
||
pills_html = ""
|
||
for sev in ("error", "warn", "info"):
|
||
n = by_sev.get(sev, 0)
|
||
if not n:
|
||
continue
|
||
singular, plural = pill_labels[sev]
|
||
label = singular if n == 1 else plural
|
||
pills_html += (
|
||
f'<span class="dt-count-pill {sev}">{n} {label}</span>'
|
||
)
|
||
|
||
# Chevron leads the head — clicking the row toggles
|
||
# ``data-dt-collapsed``. ``chevron_right`` (▶) is the collapsed
|
||
# rest state; CSS rotates it 90° to point down (▼) when expanded.
|
||
head_html = (
|
||
'<div class="dt-finding-group-head" data-dt-collapsed="true">'
|
||
'<span class="dt-finding-group-chevron">chevron_right</span>'
|
||
f'<span class="dt-severity-dot {worst}"></span>'
|
||
f'<span class="dt-group-filename">{_html.escape(header)}</span>'
|
||
f'<div class="dt-group-counts">{pills_html}</div>'
|
||
'</div>'
|
||
)
|
||
st.markdown(head_html, unsafe_allow_html=True)
|
||
|
||
# Stable namespace for per-row widget keys: collisions across files
|
||
# would otherwise hit when two files surface findings from the
|
||
# same tool. SHA-1 the caller's namespace to keep keys identifier-
|
||
# safe (filenames may contain spaces / dots / unicode).
|
||
ns = _hashlib.sha1(
|
||
(key_namespace or "").encode("utf-8"), usedforsecurity=False,
|
||
).hexdigest()[:8]
|
||
|
||
# Sort findings: error > warn > info; preserve registry order
|
||
# within each severity bucket.
|
||
sev_rank = {"error": 0, "warn": 1, "info": 2}
|
||
sorted_findings = sorted(
|
||
enumerate(findings),
|
||
key=lambda iv: (sev_rank.get(iv[1].severity, 99), iv[0]),
|
||
)
|
||
|
||
for i, f in sorted_findings:
|
||
_render_finding_row_v2(
|
||
f, row_key=f"{ns}_{i}", filename=header,
|
||
)
|
||
|
||
|
||
def _render_finding_row_v2(f, *, row_key: str, filename: str = "") -> None:
|
||
"""One row inside the per-file findings card.
|
||
|
||
Layout: severity chip (col 1) · title + meta (col 2) · "Open Tool"
|
||
tertiary action (col 3). Title and meta render as raw HTML so the
|
||
column name can carry a ``<code>`` chip and counts stay
|
||
Geist-Mono-styled.
|
||
"""
|
||
import html as _html
|
||
|
||
severity_to_icon = {
|
||
"error": "error",
|
||
"warn": "warning",
|
||
"info": "info",
|
||
}
|
||
icon_name = severity_to_icon.get(f.severity, "info")
|
||
|
||
# Title: description + optional column chip.
|
||
column_part = ""
|
||
if getattr(f, "column", None):
|
||
column_part = (
|
||
' in <code>' + _html.escape(str(f.column)) + '</code>'
|
||
)
|
||
title_html = _html.escape(f.description) + column_part
|
||
|
||
# Meta: row count + samples hint, mono.
|
||
meta_parts: list[str] = []
|
||
if getattr(f, "count", 0):
|
||
n = int(f.count)
|
||
meta_parts.append(
|
||
f"{n:,} {'row' if n == 1 else 'rows'} affected"
|
||
)
|
||
if getattr(f, "samples", None):
|
||
meta_parts.append(f"{len(f.samples)} sample"
|
||
f"{'' if len(f.samples) == 1 else 's'} captured")
|
||
meta_html = " · ".join(meta_parts)
|
||
|
||
# Action button moved to the LEFT of the description per UX
|
||
# feedback: ``[icon] [<Tool> →] [description]`` — the action is
|
||
# the prominent affordance in the row, with the description taking
|
||
# the wide remaining column. Tight action-column ratio (1.4) plus
|
||
# ``width="content"`` on the button below keeps the link
|
||
# left-justified against the icon with minimal surrounding
|
||
# whitespace.
|
||
col_icon, col_action, col_body = st.columns([0.4, 1.4, 8])
|
||
|
||
col_icon.markdown(
|
||
f'<div class="dt-finding-icon {f.severity}">'
|
||
f'<span class="dt-mui">{icon_name}</span>'
|
||
'</div>',
|
||
unsafe_allow_html=True,
|
||
)
|
||
|
||
page_slug = _tool_page_slug(f.tool) if getattr(f, "tool", "") else ""
|
||
if page_slug:
|
||
tool_label = tool_display_name(f.tool)
|
||
if col_action.button(
|
||
f"{tool_label} →",
|
||
key=f"_finding_open_{row_key}",
|
||
type="tertiary",
|
||
width="content",
|
||
):
|
||
# Set the active file to the one this finding came from
|
||
# BEFORE switching pages — otherwise the tool page's
|
||
# ``pickup_or_upload`` reads the home page's "default to
|
||
# first imported file" state, losing the context of the
|
||
# card the user clicked. ``filename`` is the per-file
|
||
# findings group header.
|
||
home_uploads = st.session_state.get("home_uploads", {})
|
||
meta = home_uploads.get(filename) if filename else None
|
||
if meta:
|
||
st.session_state["home_uploaded_name"] = filename
|
||
st.session_state["home_uploaded_size"] = meta["size"]
|
||
st.session_state["home_uploaded_bytes"] = meta["bytes"]
|
||
st.switch_page(page_slug)
|
||
|
||
body_html = f'<p class="dt-finding-title">{title_html}</p>'
|
||
if meta_html:
|
||
body_html += f'<p class="dt-finding-meta">{meta_html}</p>'
|
||
col_body.markdown(body_html, unsafe_allow_html=True)
|
||
|
||
|
||
_PREVIEW_TABLE_CSS = """
|
||
<style>
|
||
.hidden-aware-preview {
|
||
width: 100%;
|
||
border-collapse: collapse;
|
||
font-size: 0.9em;
|
||
}
|
||
.hidden-aware-preview th,
|
||
.hidden-aware-preview td {
|
||
padding: 4px 8px;
|
||
border: 1px solid #eee;
|
||
text-align: left;
|
||
vertical-align: top;
|
||
font-family: ui-monospace, SFMono-Regular, monospace;
|
||
/* pre-wrap so internal ASCII whitespace and embedded newlines render
|
||
as the user wrote them; otherwise browsers collapse adjacent spaces. */
|
||
white-space: pre-wrap;
|
||
word-break: break-word;
|
||
max-width: 32em;
|
||
}
|
||
.hidden-aware-preview thead th {
|
||
background: #f6f8fa;
|
||
position: sticky;
|
||
top: 0;
|
||
}
|
||
.hidden-aware-preview tbody tr:nth-child(even) { background: #fafafa; }
|
||
.hidden-aware-preview .row-num {
|
||
color: #888;
|
||
font-family: inherit;
|
||
background: #f6f8fa;
|
||
text-align: right;
|
||
}
|
||
.hidden-aware-preview-wrap {
|
||
max-height: 26rem;
|
||
overflow: auto;
|
||
border: 1px solid #eee;
|
||
border-radius: 4px;
|
||
}
|
||
</style>
|
||
"""
|
||
|
||
|
||
def render_hidden_aware_preview(
|
||
df,
|
||
*,
|
||
n_rows: int = 10,
|
||
caption: str | None = None,
|
||
) -> None:
|
||
"""Render a DataFrame preview that shows hidden characters in every cell.
|
||
|
||
Used for the Clean Text tool's "before" and "after" previews so the user
|
||
can actually see the leading/trailing whitespace, NBSP padding,
|
||
zero-width characters, and smart punctuation that the cleaner is going
|
||
to remove (or just removed). A plain ``st.dataframe`` collapses outer
|
||
ASCII whitespace and renders invisibles as nothing, defeating the
|
||
point of a preview in a cleanup tool.
|
||
|
||
Headers and cell values are both routed through
|
||
:func:`visualize_hidden_html` with ``mark_outer_whitespace=True``.
|
||
"""
|
||
import pandas as pd
|
||
from src.core.text_clean import hidden_char_css, visualize_hidden_html
|
||
|
||
if df is None or len(df) == 0:
|
||
st.info("No rows to preview.")
|
||
return
|
||
|
||
sliced = df.head(n_rows) if len(df) > n_rows else df
|
||
|
||
st.markdown(hidden_char_css() + _PREVIEW_TABLE_CSS, unsafe_allow_html=True)
|
||
if caption:
|
||
st.caption(caption)
|
||
|
||
header_cells = "".join(
|
||
f"<th>{visualize_hidden_html(str(c), mark_outer_whitespace=True)}</th>"
|
||
for c in sliced.columns
|
||
)
|
||
|
||
body_rows: list[str] = []
|
||
for row_idx, (orig_idx, row) in enumerate(sliced.iterrows(), start=1):
|
||
cells = ["<td class='row-num'>" + str(row_idx) + "</td>"]
|
||
for col in sliced.columns:
|
||
value = row[col]
|
||
if isinstance(value, str):
|
||
rendered = visualize_hidden_html(value, mark_outer_whitespace=True)
|
||
elif pd.isna(value):
|
||
rendered = "<span style='color:#aaa'>NaN</span>"
|
||
else:
|
||
# Non-string scalars (numerics, bools) just stringify; they
|
||
# won't have invisible chars but we still need html-escape.
|
||
rendered = visualize_hidden_html(str(value))
|
||
cells.append(f"<td>{rendered}</td>")
|
||
body_rows.append("<tr>" + "".join(cells) + "</tr>")
|
||
|
||
st.markdown(
|
||
"<div class='hidden-aware-preview-wrap'>"
|
||
"<table class='hidden-aware-preview'>"
|
||
f"<thead><tr><th class='row-num'>#</th>{header_cells}</tr></thead>"
|
||
f"<tbody>{''.join(body_rows)}</tbody>"
|
||
"</table>"
|
||
"</div>",
|
||
unsafe_allow_html=True,
|
||
)
|
||
|
||
|
||
_SAMPLE_TABLE_CSS = """
|
||
<style>
|
||
.findings-sample-table {
|
||
width: 100%;
|
||
border-collapse: collapse;
|
||
font-size: 0.9em;
|
||
}
|
||
.findings-sample-table th,
|
||
.findings-sample-table td {
|
||
padding: 4px 8px;
|
||
border-bottom: 1px solid #eee;
|
||
text-align: left;
|
||
vertical-align: top;
|
||
}
|
||
.findings-sample-table td.value {
|
||
font-family: ui-monospace, SFMono-Regular, monospace;
|
||
/* pre-wrap so any ASCII whitespace inside the value is preserved
|
||
visually (browsers collapse adjacent spaces by default). */
|
||
white-space: pre-wrap;
|
||
word-break: break-word;
|
||
}
|
||
.findings-sample-table tbody tr:hover { background: #fafafa; }
|
||
</style>
|
||
"""
|
||
|
||
|
||
def _render_one_finding(f) -> None:
|
||
from src.core.text_clean import visualize_hidden_html
|
||
|
||
color = _SEVERITY_COLOR[f.severity]
|
||
icon = _SEVERITY_ICON[f.severity]
|
||
column_part = f" in `{f.column}`" if getattr(f, "column", None) else ""
|
||
st.markdown(
|
||
f"{icon} :{color}[**{f.id}**]{column_part} — {f.description}"
|
||
)
|
||
if f.samples:
|
||
# Render samples as an HTML table so leading/trailing whitespace
|
||
# and invisible characters in the value column show up as badges.
|
||
# A plain st.dataframe collapses outer whitespace and renders
|
||
# NBSP/ZWSP as nothing, defeating the point of the audit.
|
||
rows_html = []
|
||
for row, col, value in f.samples:
|
||
rendered_value = visualize_hidden_html(
|
||
str(value), mark_outer_whitespace=True,
|
||
)
|
||
rendered_col = visualize_hidden_html(
|
||
str(col), mark_outer_whitespace=True,
|
||
)
|
||
rows_html.append(
|
||
"<tr>"
|
||
f"<td>{int(row) + 1 if isinstance(row, int) else row}</td>"
|
||
f"<td><code>{rendered_col}</code></td>"
|
||
f"<td class='value'>{rendered_value}</td>"
|
||
"</tr>"
|
||
)
|
||
st.markdown(
|
||
"<table class='findings-sample-table'>"
|
||
"<thead><tr>"
|
||
"<th>Row</th><th>Column</th><th>Value</th>"
|
||
"</tr></thead>"
|
||
f"<tbody>{''.join(rows_html)}</tbody>"
|
||
"</table>",
|
||
unsafe_allow_html=True,
|
||
)
|
||
|
||
|
||
def upload_and_analyze_section() -> None:
|
||
"""Render the upload + analyze panel for the home page.
|
||
|
||
Stashes the uploaded file (name + bytes) and findings in session state
|
||
so individual tool pages can pick them up if they want to skip their
|
||
own uploader. Each tool page already has its own uploader today, so
|
||
this is purely additive.
|
||
"""
|
||
st.markdown(f"### {_t('upload.heading')}")
|
||
st.caption(_t("upload.intro"))
|
||
st.caption(_t("upload.limits"))
|
||
|
||
uploaded = st.file_uploader(
|
||
_t("upload.uploader_label"),
|
||
type=["csv", "tsv", "xlsx", "xls"],
|
||
key="home_upload",
|
||
help=_t("upload.uploader_help"),
|
||
)
|
||
if uploaded is None:
|
||
return
|
||
|
||
# Stash on every fresh upload so all tool pages can pick it up.
|
||
if (
|
||
st.session_state.get("home_uploaded_name") != uploaded.name
|
||
or st.session_state.get("home_uploaded_size") != uploaded.size
|
||
):
|
||
st.session_state["home_uploaded_name"] = uploaded.name
|
||
st.session_state["home_uploaded_size"] = uploaded.size
|
||
st.session_state["home_uploaded_bytes"] = uploaded.getvalue()
|
||
# Drop stale findings on a new upload.
|
||
st.session_state.pop("home_findings", None)
|
||
st.session_state.pop("home_skipped", None)
|
||
|
||
col_run, col_skip, _ = st.columns([1, 1, 4])
|
||
with col_run:
|
||
run_clicked = st.button(_t("upload.run_button"), type="primary", key="home_run_analysis")
|
||
with col_skip:
|
||
skip_clicked = st.button(_t("upload.skip_button"), key="home_skip_analysis")
|
||
|
||
if skip_clicked:
|
||
st.session_state["home_findings"] = []
|
||
st.session_state["home_skipped"] = True
|
||
|
||
if run_clicked:
|
||
with st.spinner(_t("upload.scanning")):
|
||
findings = _run_analysis_on_upload(uploaded)
|
||
st.session_state["home_findings"] = findings
|
||
st.session_state["home_skipped"] = False
|
||
|
||
findings = st.session_state.get("home_findings")
|
||
if findings is None:
|
||
return
|
||
|
||
if st.session_state.get("home_skipped"):
|
||
st.info(_t("upload.skipped_notice"))
|
||
return
|
||
|
||
st.divider()
|
||
render_findings_panel(findings)
|
||
|
||
|
||
def _run_analysis_on_upload(uploaded):
|
||
"""Read the uploaded file with pre-parse repair, then analyze.
|
||
|
||
Errors are caught and surfaced as a single synthetic ``Finding``
|
||
instead of bubbling a traceback up into the page chrome. A bad
|
||
file (empty bytes, unreadable encoding, pandas parse failure on
|
||
one of several uploaded files) should yield a clean red banner for
|
||
that file, not kill the whole multi-file analysis run.
|
||
"""
|
||
import hashlib
|
||
from src.audit import log_event, log_exception
|
||
from src.core.analyze import Finding, analyze
|
||
from src.core.errors import format_for_user
|
||
from src.core.io import repair_bytes
|
||
|
||
name = uploaded.name
|
||
data = uploaded.getvalue()
|
||
suffix = name.rsplit(".", 1)[-1].lower() if "." in name else ""
|
||
digest = hashlib.sha1(
|
||
data, usedforsecurity=False,
|
||
).hexdigest()[:12] if data else "empty"
|
||
|
||
log_event(
|
||
"analyze",
|
||
f"Analyzing {name}",
|
||
filename=name,
|
||
bytes=len(data),
|
||
sha1_12=digest,
|
||
suffix=suffix,
|
||
)
|
||
|
||
def _error_finding(description: str, fid: str = "analysis_failed") -> list[Finding]:
|
||
return [Finding(
|
||
id=fid,
|
||
severity="error",
|
||
tool="",
|
||
count=1,
|
||
description=description,
|
||
confidence="high",
|
||
fix_action="",
|
||
)]
|
||
|
||
if not data:
|
||
log_event(
|
||
"analyze",
|
||
f"Skipping {name} — 0 bytes",
|
||
level="warn",
|
||
filename=name,
|
||
outcome="empty_upload",
|
||
)
|
||
return _error_finding(
|
||
f"`{name}` is empty (0 bytes). Please re-upload — the bytes "
|
||
f"may not have transferred correctly from your browser.",
|
||
fid="empty_upload",
|
||
)
|
||
|
||
try:
|
||
if suffix in ("xlsx", "xls"):
|
||
df = pd.read_excel(io.BytesIO(data), dtype=str, keep_default_na=False)
|
||
findings = analyze(df)
|
||
log_event(
|
||
"analyze",
|
||
f"Analyzed {name} ({len(findings)} findings)",
|
||
filename=name,
|
||
bytes=len(data),
|
||
sha1_12=digest,
|
||
findings=len(findings),
|
||
rows=len(df), cols=len(df.columns),
|
||
)
|
||
return findings
|
||
|
||
# CSV / TSV: run repair_bytes so the user sees csv_* findings.
|
||
text_head = data[:4096].decode("utf-8", errors="replace")
|
||
delim = "\t" if suffix == "tsv" else ","
|
||
if delim == ",":
|
||
for cand in ("\t", ";", "|"):
|
||
if text_head.count(cand) > text_head.count(",") * 1.5:
|
||
delim = cand
|
||
break
|
||
repair = repair_bytes(data, encoding="utf-8", delimiter=delim)
|
||
if not repair.repaired_bytes:
|
||
log_event(
|
||
"analyze",
|
||
f"Skipping {name} — empty after repair",
|
||
level="warn",
|
||
filename=name,
|
||
outcome="empty_after_repair",
|
||
)
|
||
return _error_finding(
|
||
f"`{name}` is empty after pre-parse repair "
|
||
f"(original was {len(data)} bytes — likely all NUL "
|
||
f"bytes or stripped during a BOM/line-ending pass). "
|
||
f"Open the file in a text editor to confirm it has "
|
||
f"content.",
|
||
fid="empty_after_repair",
|
||
)
|
||
df = pd.read_csv(
|
||
io.BytesIO(repair.repaired_bytes),
|
||
encoding="utf-8", delimiter=delim,
|
||
dtype=str, keep_default_na=False, on_bad_lines="warn",
|
||
)
|
||
findings = analyze(df, repair_result=repair)
|
||
log_event(
|
||
"analyze",
|
||
f"Analyzed {name} ({len(findings)} findings)",
|
||
filename=name,
|
||
bytes=len(data),
|
||
sha1_12=digest,
|
||
findings=len(findings),
|
||
rows=len(df), cols=len(df.columns),
|
||
delimiter=repr(delim),
|
||
)
|
||
return findings
|
||
except pd.errors.EmptyDataError as e:
|
||
log_exception(
|
||
f"analyze({name})",
|
||
e,
|
||
filename=name,
|
||
outcome="empty_after_repair",
|
||
)
|
||
return _error_finding(
|
||
f"`{name}` could not be parsed — pandas reports no columns "
|
||
f"in the file. Original size was {len(data)} bytes. Open "
|
||
f"the file in a text editor to confirm the header row is "
|
||
f"present and uses the same delimiter as the data rows.",
|
||
fid="empty_after_repair",
|
||
)
|
||
except Exception as e:
|
||
log_exception(
|
||
f"analyze({name})",
|
||
e,
|
||
filename=name,
|
||
outcome="analysis_failed",
|
||
)
|
||
return _error_finding(
|
||
f"`{name}` could not be analyzed: {format_for_user(e)}",
|
||
)
|
||
|
||
|
||
def findings_count_for_tool(tool_id: str) -> int:
|
||
"""How many findings in session state target *tool_id*; 0 when none.
|
||
|
||
Used by the home-page tool grid to badge cards that have actionable
|
||
findings without re-running the analyzer.
|
||
"""
|
||
findings = st.session_state.get("home_findings") or []
|
||
return sum(1 for f in findings if f.tool == tool_id)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Cross-page upload pickup
|
||
# ---------------------------------------------------------------------------
|
||
|
||
class _StashedUpload:
|
||
"""Duck-types ``st.runtime.uploaded_file_manager.UploadedFile`` enough
|
||
for the tool pages: ``.name``, ``.size``, ``.getvalue()``.
|
||
|
||
Tool pages that previously consumed a Streamlit ``UploadedFile`` can
|
||
accept this in its place without changes.
|
||
"""
|
||
|
||
__slots__ = ("name", "size", "_data")
|
||
|
||
def __init__(self, name: str, data: bytes) -> None:
|
||
self.name = name
|
||
self.size = len(data)
|
||
self._data = data
|
||
|
||
def getvalue(self) -> bytes:
|
||
return self._data
|
||
|
||
def read(self) -> bytes:
|
||
return self._data
|
||
|
||
|
||
def pickup_or_upload(
|
||
*,
|
||
label: str,
|
||
key: str,
|
||
types: list[str],
|
||
help: str | None = None,
|
||
):
|
||
"""Return an upload object, preferring the home-page upload when present.
|
||
|
||
Behavior:
|
||
|
||
- If ``st.session_state['home_uploaded_bytes']`` is set and the user
|
||
hasn't asked for a different file on this page, render a banner
|
||
("Using *<name>* from upload screen") plus a "Use a different file"
|
||
button, and return a :class:`_StashedUpload` shim.
|
||
- Otherwise render the standard ``st.file_uploader`` with the supplied
|
||
*label*, *key*, and *types*. Returns the Streamlit ``UploadedFile``
|
||
directly (or ``None`` if nothing uploaded).
|
||
|
||
The ``_StashedUpload`` shim exposes ``.name``, ``.size``, and
|
||
``.getvalue()`` so existing tool-page code that consumes a Streamlit
|
||
upload object works without changes.
|
||
"""
|
||
override_key = f"{key}__override"
|
||
has_session_upload = st.session_state.get("home_uploaded_bytes") is not None
|
||
use_session = has_session_upload and not st.session_state.get(override_key, False)
|
||
|
||
if use_session:
|
||
name = st.session_state.get("home_uploaded_name") or _t("gate.default_name")
|
||
st.info(_t("upload.using_session_file", name=name))
|
||
if st.button(_t("upload.use_different_file"), key=f"{key}__pick_diff"):
|
||
st.session_state[override_key] = True
|
||
st.rerun()
|
||
return _StashedUpload(name, st.session_state["home_uploaded_bytes"])
|
||
|
||
if {"csv", "tsv", "xlsx", "xls"} & set(types):
|
||
st.caption(_t("upload.pickup_caption"))
|
||
uploaded = st.file_uploader(label, type=types, key=key, help=help)
|
||
if uploaded is not None and st.session_state.get(override_key):
|
||
# User has uploaded their own file on this page; clear the override
|
||
# so the next visit to a tool page starts fresh.
|
||
pass
|
||
if uploaded is None and st.session_state.get(override_key) and has_session_upload:
|
||
if st.button(_t("upload.switch_back"), key=f"{key}__switch_back"):
|
||
st.session_state[override_key] = False
|
||
st.rerun()
|
||
return uploaded
|