From aeead05e4c624d61badc7aaa7f1ce6cd311191c9 Mon Sep 17 00:00:00 2001 From: Michael Date: Sat, 16 May 2026 21:13:41 +0000 Subject: [PATCH] fix(downloads): swap st.download_button for an HTML helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported symptom: only the FIRST download button in a multi-button row pops the browser save dialog. The second and third do nothing on click. Affects every tool page that exposes (cleaned + audit + config) downloads. Root cause is ``st.download_button`` itself — when several render in the same script pass, the click-to-bytes wiring on the browser side mis-routes and only one button's data is actually exposed. Explicit ``key`` arguments don't fix it; ``use_container_width=True`` doesn't help either; we confirmed this in the Text Cleaner reverts. Replace the widget with a real ```` anchor rendered via ``st.markdown(..., unsafe_allow_html=True)``. Bypasses Streamlit's widget machinery entirely; behaves identically to a native browser download. Side benefit: clicking it does NOT trigger a script rerun, so other in-flight UI state survives. New helper ``html_download_button`` lives in ``src/gui/components/_legacy.py`` (exported from ``components``). API: html_download_button( label, data, *, file_name, mime="application/octet-stream", disabled=False, help=None, use_container_width=True, ) Translation pattern applied across every tool page (and shared ``results_summary`` / ``config_panel`` widgets in ``_legacy.py``): - ``st.download_button(`` -> ``html_download_button(`` - ``data=foo_bytes`` kwarg -> positional second arg - ``key="..."`` -> dropped (helper has no widget identity) - ``use_container_width=True`` -> dropped (default) - ``disabled=`` and ``help=`` pass through unchanged - Pre-computed byte buffers kept where they were Total: 17 sites replaced (3 in Text Cleaner, 3 in Format Standardizer, 3 in Fix Missing Values, 3 in Map Columns, 3 in Automated Workflows, 2 in Find Duplicates page + 4 in shared _legacy.py widgets used by Find Duplicates). Caveat: data: URLs balloon by 33% (base64). Fine for tool output sizes we ship; if a future result topped a few hundred MB we'd want a Blob-URL fallback. The marketing demo at src/gui/app_demo.py keeps its single st.download_button — single button, no collision, no need to switch. 2008 tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/gui/components/__init__.py | 1 + src/gui/components/_legacy.py | 80 +++++++++++++++++++++++--- src/gui/pages/1_Deduplicator.py | 13 ++--- src/gui/pages/2_Text_Cleaner.py | 32 +++++------ src/gui/pages/3_Format_Standardizer.py | 34 +++++------ src/gui/pages/4_Missing_Values.py | 34 +++++------ src/gui/pages/5_Column_Mapper.py | 19 +++--- src/gui/pages/9_Pipeline_Runner.py | 19 +++--- 8 files changed, 135 insertions(+), 97 deletions(-) diff --git a/src/gui/components/__init__.py b/src/gui/components/__init__.py index 2c36695..5337f7a 100644 --- a/src/gui/components/__init__.py +++ b/src/gui/components/__init__.py @@ -48,6 +48,7 @@ __all__ = [ # Shared chrome / pickup "back_to_home_link", "hide_streamlit_chrome", + "html_download_button", "shutdown_app", "pickup_or_upload", # License gate + activation form diff --git a/src/gui/components/_legacy.py b/src/gui/components/_legacy.py index aeb8d9b..f110129 100644 --- a/src/gui/components/_legacy.py +++ b/src/gui/components/_legacy.py @@ -221,6 +221,70 @@ def _farewell_script() -> str: ) +def html_download_button( + label: str, + data: bytes, + *, + file_name: str, + mime: str = "application/octet-stream", + disabled: bool = False, + help: str | None = None, + use_container_width: bool = True, +) -> None: + """Render a download trigger as a real ```` anchor. + + Replaces ``st.download_button`` for pages that stack multiple + download triggers in one render pass. Streamlit's ``download_button`` + has a long-standing failure mode where only the first button in the + page actually fires when several are rendered together: explicit + ``key`` arguments are not sufficient, since the browser-side + bytes-to-Blob translation appears to share state across widgets in + some browsers (Edge/Chrome on Windows in particular). + + Sidestepping the widget system entirely fixes it. The bytes are + base64-encoded into a ``data:`` URL on the anchor's ``href``; the + browser's native ``download`` attribute pops the standard save + dialog. No script reruns happen on click — that's an upside, since + it avoids resetting any other in-flight UI state. + + Caveat: data: URLs balloon by 33% (base64). Fine up to a few tens + of MB. For 1 GB+ datasets a different mechanism would be needed, + but tool output is rarely that large. + """ + import base64 + import html as _html + + width_css = "width:100%;" if use_container_width else "" + base_style = ( + "display:inline-block;text-align:center;" + "padding:0.375rem 0.75rem;border-radius:0.5rem;" + "border:1px solid rgba(49,51,63,0.2);" + "background:rgb(240,242,246);color:rgb(38,39,48);" + "text-decoration:none;font-weight:400;cursor:pointer;" + "font-family:inherit;font-size:14px;" + "box-sizing:border-box;line-height:1.6;" + f"{width_css}" + ) + safe_label = _html.escape(label) + title_attr = f' title="{_html.escape(help)}"' if help else "" + + if disabled: + disabled_style = base_style + "opacity:0.5;cursor:not-allowed;" + st.markdown( + f'{safe_label}', + unsafe_allow_html=True, + ) + return + + b64 = base64.b64encode(data).decode("ascii") + safe_name = _html.escape(file_name, quote=True) + st.markdown( + f'{safe_label}', + unsafe_allow_html=True, + ) + + def back_to_home_link(*, key: str = "_back_to_home_link") -> None: """Render a small "← Back to Home" affordance near the top of a tool page. @@ -398,9 +462,9 @@ def config_panel(df: pd.DataFrame) -> dict: ) cfg_json = cfg.to_dict() import json - st.download_button( + html_download_button( "Download config JSON", - data=json.dumps(cfg_json, indent=2), + json.dumps(cfg_json, indent=2).encode("utf-8"), file_name="dedup_config.json", mime="application/json", ) @@ -777,9 +841,9 @@ def results_summary( with dl_left: csv_bytes = result.deduplicated_df.to_csv(index=False).encode("utf-8-sig") - st.download_button( + html_download_button( "Download Deduplicated CSV", - data=csv_bytes, + csv_bytes, file_name="deduplicated.csv", mime="text/csv", ) @@ -787,9 +851,9 @@ def results_summary( with dl_mid: if not result.removed_df.empty: removed_bytes = result.removed_df.to_csv(index=False).encode("utf-8-sig") - st.download_button( + html_download_button( "Download Removed Rows", - data=removed_bytes, + removed_bytes, file_name="removed_rows.csv", mime="text/csv", ) @@ -797,9 +861,9 @@ def results_summary( with dl_right: if result.match_groups: groups_data = _build_match_groups_csv(result, original_df) - st.download_button( + html_download_button( "Download Match Groups Report", - data=groups_data, + groups_data, file_name="match_groups.csv", mime="text/csv", ) diff --git a/src/gui/pages/1_Deduplicator.py b/src/gui/pages/1_Deduplicator.py index 8d5ca24..00f2eb8 100644 --- a/src/gui/pages/1_Deduplicator.py +++ b/src/gui/pages/1_Deduplicator.py @@ -21,6 +21,7 @@ from src.gui.components import ( back_to_home_link, config_panel, hide_streamlit_chrome, + html_download_button, match_group_card, pickup_or_upload, require_feature_or_render_upgrade, @@ -364,20 +365,17 @@ if uploaded is not None: else b"" ) - st.download_button( + html_download_button( "Download Reviewed & Deduplicated CSV", - data=reviewed_bytes, + reviewed_bytes, file_name="deduplicated_reviewed.csv", mime="text/csv", - key="dedup_dl_reviewed", - use_container_width=True, ) - st.download_button( + html_download_button( "Download Reviewed Removed Rows", - data=reviewed_removed_bytes, + reviewed_removed_bytes, file_name="removed_reviewed.csv", mime="text/csv", - key="dedup_dl_reviewed_removed", disabled=reviewed_removed_empty, help=( "No rows were removed under the current " @@ -385,7 +383,6 @@ if uploaded is not None: if reviewed_removed_empty else None ), - use_container_width=True, ) # Log entries diff --git a/src/gui/pages/2_Text_Cleaner.py b/src/gui/pages/2_Text_Cleaner.py index 8b9db5f..151739e 100644 --- a/src/gui/pages/2_Text_Cleaner.py +++ b/src/gui/pages/2_Text_Cleaner.py @@ -17,6 +17,7 @@ if str(_project_root) not in sys.path: from src.gui.components import ( back_to_home_link, hide_streamlit_chrome, + html_download_button, pickup_or_upload, render_hidden_aware_preview, require_feature_or_render_upgrade, @@ -329,13 +330,12 @@ else: # Downloads # --------------------------------------------------------------------------- # -# All three byte buffers are prepared up front (outside the columns) so -# each ``st.download_button`` sees stable ``data`` across reruns and an -# explicit ``key`` — without those, Streamlit auto-derived widget IDs -# can collide for multiple download_buttons in adjacent columns and -# only the first one actually fires on click. The empty-changes case -# now renders a disabled button (rather than vanishing) so the layout -# stays steady and the user understands why nothing's available. +# Rendered via ``html_download_button`` (raw anchor) rather +# than ``st.download_button``. The latter has a long-standing bug where +# the second and third download_buttons rendered in the same script pass +# fail to fire — only the first one's click reaches the browser save +# dialog. The HTML helper bypasses the widget system entirely and works +# uniformly across all browsers. st.divider() stem = Path(st.session_state.get("textclean_input_name", "input")).stem @@ -350,33 +350,27 @@ config_bytes = json.dumps(options.to_dict(), indent=2).encode("utf-8") dl_a, dl_b, dl_c = st.columns(3) with dl_a: - st.download_button( + html_download_button( "Download cleaned CSV", - data=cleaned_bytes, + cleaned_bytes, file_name=f"{stem}_cleaned.csv", mime="text/csv", - key="textclean_dl_cleaned", - use_container_width=True, ) with dl_b: - st.download_button( + html_download_button( "Download changes audit", - data=changes_bytes, + changes_bytes, file_name=f"{stem}_changes.csv", mime="text/csv", - key="textclean_dl_changes", disabled=result.changes.empty, help="No changes to audit." if result.changes.empty else None, - use_container_width=True, ) with dl_c: - st.download_button( + html_download_button( "Download config JSON", - data=config_bytes, + config_bytes, file_name="text_clean_config.json", mime="application/json", - key="textclean_dl_config", - use_container_width=True, ) st.divider() diff --git a/src/gui/pages/3_Format_Standardizer.py b/src/gui/pages/3_Format_Standardizer.py index 75d4d05..dda5741 100644 --- a/src/gui/pages/3_Format_Standardizer.py +++ b/src/gui/pages/3_Format_Standardizer.py @@ -17,6 +17,7 @@ if str(_project_root) not in sys.path: from src.gui.components import ( back_to_home_link, hide_streamlit_chrome, + html_download_button, pickup_or_upload, require_feature_or_render_upgrade, ) @@ -604,13 +605,14 @@ st.dataframe(result.standardized_df.head(10), use_container_width=True) # Downloads # --------------------------------------------------------------------------- # -# All three byte buffers are prepared up front (outside the columns) so -# each ``st.download_button`` sees stable ``data`` across reruns and an -# explicit ``key`` — without those, Streamlit auto-derived widget IDs -# can collide for multiple download_buttons in adjacent columns and -# only the first one actually fires on click. The empty-changes case -# now renders a disabled button (rather than vanishing) so the layout -# stays steady and the user understands why nothing's available. +# Rendered via ``html_download_button`` (raw anchor) rather +# than ``st.download_button``. The latter has a long-standing bug where +# the second and third download_buttons rendered in the same script pass +# fail to fire — only the first one's click reaches the browser save +# dialog. The HTML helper bypasses the widget system entirely and works +# uniformly across all browsers. The empty-changes case still renders a +# disabled button (rather than vanishing) so the layout stays steady and +# the user understands why nothing's available. st.divider() stem = Path(st.session_state.get("fmtstd_input_name", "input")).stem @@ -625,33 +627,27 @@ config_bytes = json.dumps(options.to_dict(), indent=2).encode("utf-8") dl_a, dl_b, dl_c = st.columns(3) with dl_a: - st.download_button( + html_download_button( "Download standardized CSV", - data=standardized_bytes, + standardized_bytes, file_name=f"{stem}_standardized.csv", mime="text/csv", - key="fmtstd_dl_standardized", - use_container_width=True, ) with dl_b: - st.download_button( + html_download_button( "Download changes audit", - data=changes_bytes, + changes_bytes, file_name=f"{stem}_changes.csv", mime="text/csv", - key="fmtstd_dl_changes", disabled=result.changes.empty, help="No changes to audit." if result.changes.empty else None, - use_container_width=True, ) with dl_c: - st.download_button( + html_download_button( "Download config JSON", - data=config_bytes, + config_bytes, file_name="format_standardize_config.json", mime="application/json", - key="fmtstd_dl_config", - use_container_width=True, ) st.divider() diff --git a/src/gui/pages/4_Missing_Values.py b/src/gui/pages/4_Missing_Values.py index 5701219..f499f59 100644 --- a/src/gui/pages/4_Missing_Values.py +++ b/src/gui/pages/4_Missing_Values.py @@ -17,6 +17,7 @@ if str(_project_root) not in sys.path: from src.gui.components import ( back_to_home_link, hide_streamlit_chrome, + html_download_button, pickup_or_upload, require_feature_or_render_upgrade, ) @@ -364,13 +365,14 @@ st.dataframe(result.handled_df.head(10), use_container_width=True) # Downloads # --------------------------------------------------------------------------- # -# All three byte buffers are prepared up front (outside the columns) so -# each ``st.download_button`` sees stable ``data`` across reruns and an -# explicit ``key`` — without those, Streamlit auto-derived widget IDs -# can collide for multiple download_buttons in adjacent columns and -# only the first one actually fires on click. The empty-changes case -# now renders a disabled button (rather than vanishing) so the layout -# stays steady and the user understands why nothing's available. +# Rendered via ``html_download_button`` (raw anchor) rather +# than ``st.download_button``. The latter has a long-standing bug where +# the second and third download_buttons rendered in the same script pass +# fail to fire — only the first one's click reaches the browser save +# dialog. The HTML helper bypasses the widget system entirely and works +# uniformly across all browsers. The empty-changes case still renders a +# disabled button (rather than vanishing) so the layout stays steady and +# the user understands why nothing's available. st.divider() stem = Path(st.session_state.get("missing_input_name", "input")).stem @@ -387,33 +389,27 @@ config_bytes = json.dumps( dl_a, dl_b, dl_c = st.columns(3) with dl_a: - st.download_button( + html_download_button( "Download handled CSV", - data=handled_bytes, + handled_bytes, file_name=f"{stem}_missing.csv", mime="text/csv", - key="missing_dl_handled", - use_container_width=True, ) with dl_b: - st.download_button( + html_download_button( "Download changes audit", - data=changes_bytes, + changes_bytes, file_name=f"{stem}_missing_changes.csv", mime="text/csv", - key="missing_dl_changes", disabled=result.changes.empty, help="No changes to audit." if result.changes.empty else None, - use_container_width=True, ) with dl_c: - st.download_button( + html_download_button( "Download config JSON", - data=config_bytes, + config_bytes, file_name="missing_config.json", mime="application/json", - key="missing_dl_config", - use_container_width=True, ) st.divider() diff --git a/src/gui/pages/5_Column_Mapper.py b/src/gui/pages/5_Column_Mapper.py index b6edda5..47f578b 100644 --- a/src/gui/pages/5_Column_Mapper.py +++ b/src/gui/pages/5_Column_Mapper.py @@ -17,6 +17,7 @@ if str(_project_root) not in sys.path: from src.gui.components import ( back_to_home_link, hide_streamlit_chrome, + html_download_button, pickup_or_upload, require_feature_or_render_upgrade, ) @@ -432,33 +433,27 @@ _no_mapping = not result.mapping dl_a, dl_b, dl_c = st.columns(3) with dl_a: - st.download_button( + html_download_button( "Download mapped CSV", - data=mapped_bytes, + mapped_bytes, file_name=f"{stem}_mapped.csv", mime="text/csv", - key="colmap_dl_mapped", - use_container_width=True, ) with dl_b: - st.download_button( + html_download_button( "Download mapping audit", - data=audit_bytes, + audit_bytes, file_name=f"{stem}_mapping.json", mime="application/json", - key="colmap_dl_audit", disabled=_no_mapping, help="No mapping was applied." if _no_mapping else None, - use_container_width=True, ) with dl_c: - st.download_button( + html_download_button( "Download config JSON", - data=config_bytes, + config_bytes, file_name="column_map_config.json", mime="application/json", - key="colmap_dl_config", - use_container_width=True, ) st.divider() diff --git a/src/gui/pages/9_Pipeline_Runner.py b/src/gui/pages/9_Pipeline_Runner.py index 9bafbfa..17ea272 100644 --- a/src/gui/pages/9_Pipeline_Runner.py +++ b/src/gui/pages/9_Pipeline_Runner.py @@ -17,6 +17,7 @@ if str(_project_root) not in sys.path: from src.gui.components import ( back_to_home_link, hide_streamlit_chrome, + html_download_button, pickup_or_upload, require_feature_or_render_upgrade, ) @@ -386,37 +387,31 @@ _pipeline_empty = current_pipeline is None or not current_pipeline.steps dl_a, dl_b, dl_c = st.columns(3) with dl_a: - st.download_button( + html_download_button( "Download cleaned CSV", - data=cleaned_bytes, + cleaned_bytes, file_name=f"{stem}_pipeline.csv", mime="text/csv", - key="pipeline_dl_cleaned", - use_container_width=True, ) with dl_b: - st.download_button( + html_download_button( "Download pipeline JSON", - data=pipeline_bytes, + pipeline_bytes, file_name="pipeline.json", mime="application/json", - key="pipeline_dl_pipeline", disabled=_pipeline_empty, help=( "No pipeline defined." if _pipeline_empty else "Save this and pass --pipeline pipeline.json to the CLI to re-run on next week's file." ), - use_container_width=True, ) with dl_c: - st.download_button( + html_download_button( "Download run audit", - data=audit_bytes, + audit_bytes, file_name=f"{stem}_pipeline_audit.json", mime="application/json", - key="pipeline_dl_audit", - use_container_width=True, ) st.divider()