fix(downloads): swap st.download_button for an HTML <a download> helper

Reported symptom: only the FIRST download button in a multi-button
row pops the browser save dialog. The second and third do nothing on
click. Affects every tool page that exposes (cleaned + audit + config)
downloads.

Root cause is ``st.download_button`` itself — when several render in
the same script pass, the click-to-bytes wiring on the browser side
mis-routes and only one button's data is actually exposed. Explicit
``key`` arguments don't fix it; ``use_container_width=True`` doesn't
help either; we confirmed this in the Text Cleaner reverts.

Replace the widget with a real ``<a download="file" href="data:...">``
anchor rendered via ``st.markdown(..., unsafe_allow_html=True)``.
Bypasses Streamlit's widget machinery entirely; behaves identically to
a native browser download. Side benefit: clicking it does NOT trigger
a script rerun, so other in-flight UI state survives.

New helper ``html_download_button`` lives in
``src/gui/components/_legacy.py`` (exported from ``components``). API:

    html_download_button(
        label, data,
        *, file_name, mime="application/octet-stream",
        disabled=False, help=None, use_container_width=True,
    )

Translation pattern applied across every tool page (and shared
``results_summary`` / ``config_panel`` widgets in ``_legacy.py``):

- ``st.download_button(`` -> ``html_download_button(``
- ``data=foo_bytes`` kwarg -> positional second arg
- ``key="..."`` -> dropped (helper has no widget identity)
- ``use_container_width=True`` -> dropped (default)
- ``disabled=`` and ``help=`` pass through unchanged
- Pre-computed byte buffers kept where they were

Total: 17 sites replaced (3 in Text Cleaner, 3 in Format
Standardizer, 3 in Fix Missing Values, 3 in Map Columns, 3 in
Automated Workflows, 2 in Find Duplicates page + 4 in shared
_legacy.py widgets used by Find Duplicates).

Caveat: data: URLs balloon by 33% (base64). Fine for tool output
sizes we ship; if a future result topped a few hundred MB we'd want a
Blob-URL fallback.

The marketing demo at src/gui/app_demo.py keeps its single
st.download_button — single button, no collision, no need to switch.

2008 tests pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-16 21:13:41 +00:00
parent 6415be8bf4
commit aeead05e4c
8 changed files with 135 additions and 97 deletions

View File

@@ -48,6 +48,7 @@ __all__ = [
# Shared chrome / pickup # Shared chrome / pickup
"back_to_home_link", "back_to_home_link",
"hide_streamlit_chrome", "hide_streamlit_chrome",
"html_download_button",
"shutdown_app", "shutdown_app",
"pickup_or_upload", "pickup_or_upload",
# License gate + activation form # License gate + activation form

View File

@@ -221,6 +221,70 @@ def _farewell_script() -> str:
) )
def html_download_button(
label: str,
data: bytes,
*,
file_name: str,
mime: str = "application/octet-stream",
disabled: bool = False,
help: str | None = None,
use_container_width: bool = True,
) -> None:
"""Render a download trigger as a real ``<a download>`` anchor.
Replaces ``st.download_button`` for pages that stack multiple
download triggers in one render pass. Streamlit's ``download_button``
has a long-standing failure mode where only the first button in the
page actually fires when several are rendered together: explicit
``key`` arguments are not sufficient, since the browser-side
bytes-to-Blob translation appears to share state across widgets in
some browsers (Edge/Chrome on Windows in particular).
Sidestepping the widget system entirely fixes it. The bytes are
base64-encoded into a ``data:`` URL on the anchor's ``href``; the
browser's native ``download`` attribute pops the standard save
dialog. No script reruns happen on click — that's an upside, since
it avoids resetting any other in-flight UI state.
Caveat: data: URLs balloon by 33% (base64). Fine up to a few tens
of MB. For 1 GB+ datasets a different mechanism would be needed,
but tool output is rarely that large.
"""
import base64
import html as _html
width_css = "width:100%;" if use_container_width else ""
base_style = (
"display:inline-block;text-align:center;"
"padding:0.375rem 0.75rem;border-radius:0.5rem;"
"border:1px solid rgba(49,51,63,0.2);"
"background:rgb(240,242,246);color:rgb(38,39,48);"
"text-decoration:none;font-weight:400;cursor:pointer;"
"font-family:inherit;font-size:14px;"
"box-sizing:border-box;line-height:1.6;"
f"{width_css}"
)
safe_label = _html.escape(label)
title_attr = f' title="{_html.escape(help)}"' if help else ""
if disabled:
disabled_style = base_style + "opacity:0.5;cursor:not-allowed;"
st.markdown(
f'<span{title_attr} style="{disabled_style}">{safe_label}</span>',
unsafe_allow_html=True,
)
return
b64 = base64.b64encode(data).decode("ascii")
safe_name = _html.escape(file_name, quote=True)
st.markdown(
f'<a download="{safe_name}" href="data:{mime};base64,{b64}"'
f'{title_attr} style="{base_style}">{safe_label}</a>',
unsafe_allow_html=True,
)
def back_to_home_link(*, key: str = "_back_to_home_link") -> None: def back_to_home_link(*, key: str = "_back_to_home_link") -> None:
"""Render a small "← Back to Home" affordance near the top of a tool page. """Render a small "← Back to Home" affordance near the top of a tool page.
@@ -398,9 +462,9 @@ def config_panel(df: pd.DataFrame) -> dict:
) )
cfg_json = cfg.to_dict() cfg_json = cfg.to_dict()
import json import json
st.download_button( html_download_button(
"Download config JSON", "Download config JSON",
data=json.dumps(cfg_json, indent=2), json.dumps(cfg_json, indent=2).encode("utf-8"),
file_name="dedup_config.json", file_name="dedup_config.json",
mime="application/json", mime="application/json",
) )
@@ -777,9 +841,9 @@ def results_summary(
with dl_left: with dl_left:
csv_bytes = result.deduplicated_df.to_csv(index=False).encode("utf-8-sig") csv_bytes = result.deduplicated_df.to_csv(index=False).encode("utf-8-sig")
st.download_button( html_download_button(
"Download Deduplicated CSV", "Download Deduplicated CSV",
data=csv_bytes, csv_bytes,
file_name="deduplicated.csv", file_name="deduplicated.csv",
mime="text/csv", mime="text/csv",
) )
@@ -787,9 +851,9 @@ def results_summary(
with dl_mid: with dl_mid:
if not result.removed_df.empty: if not result.removed_df.empty:
removed_bytes = result.removed_df.to_csv(index=False).encode("utf-8-sig") removed_bytes = result.removed_df.to_csv(index=False).encode("utf-8-sig")
st.download_button( html_download_button(
"Download Removed Rows", "Download Removed Rows",
data=removed_bytes, removed_bytes,
file_name="removed_rows.csv", file_name="removed_rows.csv",
mime="text/csv", mime="text/csv",
) )
@@ -797,9 +861,9 @@ def results_summary(
with dl_right: with dl_right:
if result.match_groups: if result.match_groups:
groups_data = _build_match_groups_csv(result, original_df) groups_data = _build_match_groups_csv(result, original_df)
st.download_button( html_download_button(
"Download Match Groups Report", "Download Match Groups Report",
data=groups_data, groups_data,
file_name="match_groups.csv", file_name="match_groups.csv",
mime="text/csv", mime="text/csv",
) )

View File

@@ -21,6 +21,7 @@ from src.gui.components import (
back_to_home_link, back_to_home_link,
config_panel, config_panel,
hide_streamlit_chrome, hide_streamlit_chrome,
html_download_button,
match_group_card, match_group_card,
pickup_or_upload, pickup_or_upload,
require_feature_or_render_upgrade, require_feature_or_render_upgrade,
@@ -364,20 +365,17 @@ if uploaded is not None:
else b"" else b""
) )
st.download_button( html_download_button(
"Download Reviewed & Deduplicated CSV", "Download Reviewed & Deduplicated CSV",
data=reviewed_bytes, reviewed_bytes,
file_name="deduplicated_reviewed.csv", file_name="deduplicated_reviewed.csv",
mime="text/csv", mime="text/csv",
key="dedup_dl_reviewed",
use_container_width=True,
) )
st.download_button( html_download_button(
"Download Reviewed Removed Rows", "Download Reviewed Removed Rows",
data=reviewed_removed_bytes, reviewed_removed_bytes,
file_name="removed_reviewed.csv", file_name="removed_reviewed.csv",
mime="text/csv", mime="text/csv",
key="dedup_dl_reviewed_removed",
disabled=reviewed_removed_empty, disabled=reviewed_removed_empty,
help=( help=(
"No rows were removed under the current " "No rows were removed under the current "
@@ -385,7 +383,6 @@ if uploaded is not None:
if reviewed_removed_empty if reviewed_removed_empty
else None else None
), ),
use_container_width=True,
) )
# Log entries # Log entries

View File

@@ -17,6 +17,7 @@ if str(_project_root) not in sys.path:
from src.gui.components import ( from src.gui.components import (
back_to_home_link, back_to_home_link,
hide_streamlit_chrome, hide_streamlit_chrome,
html_download_button,
pickup_or_upload, pickup_or_upload,
render_hidden_aware_preview, render_hidden_aware_preview,
require_feature_or_render_upgrade, require_feature_or_render_upgrade,
@@ -329,13 +330,12 @@ else:
# Downloads # Downloads
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# #
# All three byte buffers are prepared up front (outside the columns) so # Rendered via ``html_download_button`` (raw <a download> anchor) rather
# each ``st.download_button`` sees stable ``data`` across reruns and an # than ``st.download_button``. The latter has a long-standing bug where
# explicit ``key`` — without those, Streamlit auto-derived widget IDs # the second and third download_buttons rendered in the same script pass
# can collide for multiple download_buttons in adjacent columns and # fail to fire — only the first one's click reaches the browser save
# only the first one actually fires on click. The empty-changes case # dialog. The HTML helper bypasses the widget system entirely and works
# now renders a disabled button (rather than vanishing) so the layout # uniformly across all browsers.
# stays steady and the user understands why nothing's available.
st.divider() st.divider()
stem = Path(st.session_state.get("textclean_input_name", "input")).stem stem = Path(st.session_state.get("textclean_input_name", "input")).stem
@@ -350,33 +350,27 @@ config_bytes = json.dumps(options.to_dict(), indent=2).encode("utf-8")
dl_a, dl_b, dl_c = st.columns(3) dl_a, dl_b, dl_c = st.columns(3)
with dl_a: with dl_a:
st.download_button( html_download_button(
"Download cleaned CSV", "Download cleaned CSV",
data=cleaned_bytes, cleaned_bytes,
file_name=f"{stem}_cleaned.csv", file_name=f"{stem}_cleaned.csv",
mime="text/csv", mime="text/csv",
key="textclean_dl_cleaned",
use_container_width=True,
) )
with dl_b: with dl_b:
st.download_button( html_download_button(
"Download changes audit", "Download changes audit",
data=changes_bytes, changes_bytes,
file_name=f"{stem}_changes.csv", file_name=f"{stem}_changes.csv",
mime="text/csv", mime="text/csv",
key="textclean_dl_changes",
disabled=result.changes.empty, disabled=result.changes.empty,
help="No changes to audit." if result.changes.empty else None, help="No changes to audit." if result.changes.empty else None,
use_container_width=True,
) )
with dl_c: with dl_c:
st.download_button( html_download_button(
"Download config JSON", "Download config JSON",
data=config_bytes, config_bytes,
file_name="text_clean_config.json", file_name="text_clean_config.json",
mime="application/json", mime="application/json",
key="textclean_dl_config",
use_container_width=True,
) )
st.divider() st.divider()

View File

@@ -17,6 +17,7 @@ if str(_project_root) not in sys.path:
from src.gui.components import ( from src.gui.components import (
back_to_home_link, back_to_home_link,
hide_streamlit_chrome, hide_streamlit_chrome,
html_download_button,
pickup_or_upload, pickup_or_upload,
require_feature_or_render_upgrade, require_feature_or_render_upgrade,
) )
@@ -604,13 +605,14 @@ st.dataframe(result.standardized_df.head(10), use_container_width=True)
# Downloads # Downloads
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# #
# All three byte buffers are prepared up front (outside the columns) so # Rendered via ``html_download_button`` (raw <a download> anchor) rather
# each ``st.download_button`` sees stable ``data`` across reruns and an # than ``st.download_button``. The latter has a long-standing bug where
# explicit ``key`` — without those, Streamlit auto-derived widget IDs # the second and third download_buttons rendered in the same script pass
# can collide for multiple download_buttons in adjacent columns and # fail to fire — only the first one's click reaches the browser save
# only the first one actually fires on click. The empty-changes case # dialog. The HTML helper bypasses the widget system entirely and works
# now renders a disabled button (rather than vanishing) so the layout # uniformly across all browsers. The empty-changes case still renders a
# stays steady and the user understands why nothing's available. # disabled button (rather than vanishing) so the layout stays steady and
# the user understands why nothing's available.
st.divider() st.divider()
stem = Path(st.session_state.get("fmtstd_input_name", "input")).stem stem = Path(st.session_state.get("fmtstd_input_name", "input")).stem
@@ -625,33 +627,27 @@ config_bytes = json.dumps(options.to_dict(), indent=2).encode("utf-8")
dl_a, dl_b, dl_c = st.columns(3) dl_a, dl_b, dl_c = st.columns(3)
with dl_a: with dl_a:
st.download_button( html_download_button(
"Download standardized CSV", "Download standardized CSV",
data=standardized_bytes, standardized_bytes,
file_name=f"{stem}_standardized.csv", file_name=f"{stem}_standardized.csv",
mime="text/csv", mime="text/csv",
key="fmtstd_dl_standardized",
use_container_width=True,
) )
with dl_b: with dl_b:
st.download_button( html_download_button(
"Download changes audit", "Download changes audit",
data=changes_bytes, changes_bytes,
file_name=f"{stem}_changes.csv", file_name=f"{stem}_changes.csv",
mime="text/csv", mime="text/csv",
key="fmtstd_dl_changes",
disabled=result.changes.empty, disabled=result.changes.empty,
help="No changes to audit." if result.changes.empty else None, help="No changes to audit." if result.changes.empty else None,
use_container_width=True,
) )
with dl_c: with dl_c:
st.download_button( html_download_button(
"Download config JSON", "Download config JSON",
data=config_bytes, config_bytes,
file_name="format_standardize_config.json", file_name="format_standardize_config.json",
mime="application/json", mime="application/json",
key="fmtstd_dl_config",
use_container_width=True,
) )
st.divider() st.divider()

View File

@@ -17,6 +17,7 @@ if str(_project_root) not in sys.path:
from src.gui.components import ( from src.gui.components import (
back_to_home_link, back_to_home_link,
hide_streamlit_chrome, hide_streamlit_chrome,
html_download_button,
pickup_or_upload, pickup_or_upload,
require_feature_or_render_upgrade, require_feature_or_render_upgrade,
) )
@@ -364,13 +365,14 @@ st.dataframe(result.handled_df.head(10), use_container_width=True)
# Downloads # Downloads
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# #
# All three byte buffers are prepared up front (outside the columns) so # Rendered via ``html_download_button`` (raw <a download> anchor) rather
# each ``st.download_button`` sees stable ``data`` across reruns and an # than ``st.download_button``. The latter has a long-standing bug where
# explicit ``key`` — without those, Streamlit auto-derived widget IDs # the second and third download_buttons rendered in the same script pass
# can collide for multiple download_buttons in adjacent columns and # fail to fire — only the first one's click reaches the browser save
# only the first one actually fires on click. The empty-changes case # dialog. The HTML helper bypasses the widget system entirely and works
# now renders a disabled button (rather than vanishing) so the layout # uniformly across all browsers. The empty-changes case still renders a
# stays steady and the user understands why nothing's available. # disabled button (rather than vanishing) so the layout stays steady and
# the user understands why nothing's available.
st.divider() st.divider()
stem = Path(st.session_state.get("missing_input_name", "input")).stem stem = Path(st.session_state.get("missing_input_name", "input")).stem
@@ -387,33 +389,27 @@ config_bytes = json.dumps(
dl_a, dl_b, dl_c = st.columns(3) dl_a, dl_b, dl_c = st.columns(3)
with dl_a: with dl_a:
st.download_button( html_download_button(
"Download handled CSV", "Download handled CSV",
data=handled_bytes, handled_bytes,
file_name=f"{stem}_missing.csv", file_name=f"{stem}_missing.csv",
mime="text/csv", mime="text/csv",
key="missing_dl_handled",
use_container_width=True,
) )
with dl_b: with dl_b:
st.download_button( html_download_button(
"Download changes audit", "Download changes audit",
data=changes_bytes, changes_bytes,
file_name=f"{stem}_missing_changes.csv", file_name=f"{stem}_missing_changes.csv",
mime="text/csv", mime="text/csv",
key="missing_dl_changes",
disabled=result.changes.empty, disabled=result.changes.empty,
help="No changes to audit." if result.changes.empty else None, help="No changes to audit." if result.changes.empty else None,
use_container_width=True,
) )
with dl_c: with dl_c:
st.download_button( html_download_button(
"Download config JSON", "Download config JSON",
data=config_bytes, config_bytes,
file_name="missing_config.json", file_name="missing_config.json",
mime="application/json", mime="application/json",
key="missing_dl_config",
use_container_width=True,
) )
st.divider() st.divider()

View File

@@ -17,6 +17,7 @@ if str(_project_root) not in sys.path:
from src.gui.components import ( from src.gui.components import (
back_to_home_link, back_to_home_link,
hide_streamlit_chrome, hide_streamlit_chrome,
html_download_button,
pickup_or_upload, pickup_or_upload,
require_feature_or_render_upgrade, require_feature_or_render_upgrade,
) )
@@ -432,33 +433,27 @@ _no_mapping = not result.mapping
dl_a, dl_b, dl_c = st.columns(3) dl_a, dl_b, dl_c = st.columns(3)
with dl_a: with dl_a:
st.download_button( html_download_button(
"Download mapped CSV", "Download mapped CSV",
data=mapped_bytes, mapped_bytes,
file_name=f"{stem}_mapped.csv", file_name=f"{stem}_mapped.csv",
mime="text/csv", mime="text/csv",
key="colmap_dl_mapped",
use_container_width=True,
) )
with dl_b: with dl_b:
st.download_button( html_download_button(
"Download mapping audit", "Download mapping audit",
data=audit_bytes, audit_bytes,
file_name=f"{stem}_mapping.json", file_name=f"{stem}_mapping.json",
mime="application/json", mime="application/json",
key="colmap_dl_audit",
disabled=_no_mapping, disabled=_no_mapping,
help="No mapping was applied." if _no_mapping else None, help="No mapping was applied." if _no_mapping else None,
use_container_width=True,
) )
with dl_c: with dl_c:
st.download_button( html_download_button(
"Download config JSON", "Download config JSON",
data=config_bytes, config_bytes,
file_name="column_map_config.json", file_name="column_map_config.json",
mime="application/json", mime="application/json",
key="colmap_dl_config",
use_container_width=True,
) )
st.divider() st.divider()

View File

@@ -17,6 +17,7 @@ if str(_project_root) not in sys.path:
from src.gui.components import ( from src.gui.components import (
back_to_home_link, back_to_home_link,
hide_streamlit_chrome, hide_streamlit_chrome,
html_download_button,
pickup_or_upload, pickup_or_upload,
require_feature_or_render_upgrade, require_feature_or_render_upgrade,
) )
@@ -386,37 +387,31 @@ _pipeline_empty = current_pipeline is None or not current_pipeline.steps
dl_a, dl_b, dl_c = st.columns(3) dl_a, dl_b, dl_c = st.columns(3)
with dl_a: with dl_a:
st.download_button( html_download_button(
"Download cleaned CSV", "Download cleaned CSV",
data=cleaned_bytes, cleaned_bytes,
file_name=f"{stem}_pipeline.csv", file_name=f"{stem}_pipeline.csv",
mime="text/csv", mime="text/csv",
key="pipeline_dl_cleaned",
use_container_width=True,
) )
with dl_b: with dl_b:
st.download_button( html_download_button(
"Download pipeline JSON", "Download pipeline JSON",
data=pipeline_bytes, pipeline_bytes,
file_name="pipeline.json", file_name="pipeline.json",
mime="application/json", mime="application/json",
key="pipeline_dl_pipeline",
disabled=_pipeline_empty, disabled=_pipeline_empty,
help=( help=(
"No pipeline defined." "No pipeline defined."
if _pipeline_empty if _pipeline_empty
else "Save this and pass --pipeline pipeline.json to the CLI to re-run on next week's file." else "Save this and pass --pipeline pipeline.json to the CLI to re-run on next week's file."
), ),
use_container_width=True,
) )
with dl_c: with dl_c:
st.download_button( html_download_button(
"Download run audit", "Download run audit",
data=audit_bytes, audit_bytes,
file_name=f"{stem}_pipeline_audit.json", file_name=f"{stem}_pipeline_audit.json",
mime="application/json", mime="application/json",
key="pipeline_dl_audit",
use_container_width=True,
) )
st.divider() st.divider()