From 3f007ef3d697118e28054d337f202ce0aa9c6458 Mon Sep 17 00:00:00 2001 From: Michael Date: Wed, 29 Apr 2026 21:23:21 +0000 Subject: [PATCH] feat(gui): 1 GB upload cap + delimiter / encoding diversity caption MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Streamlit's default file_uploader footer reads "Limit 200MB per file — CSV, TSV, XLSX, XLS" which contradicts the 1 GB efficiency target shipped in 438bc0f and codified in docs/REQUIREMENTS.md §1.1. Three changes: 1. .streamlit/config.toml — set [server] maxUploadSize = 1024. Footer now reads "Limit 1024MB per file". 2. upload_and_analyze_section (home page) — adds an explicit caption above the uploader stating size limit, supported formats, the four auto-detected delimiters, and the 13 auto-detected encodings (with the Review-page override as the safety net). 3. pickup_or_upload (every tool page that falls back to its own uploader when no home-page upload is present) — same caption, only rendered when the upload accepts CSV/TSV/XLSX/XLS so JSON schema / config uploaders aren't decorated. Test suite: 765 passed, 17 xfailed (no regressions). Home + Review + Deduplicator pages all serve HTTP 200 under the new config. Co-Authored-By: Claude Opus 4.7 (1M context) --- .streamlit/config.toml | 6 ++++++ src/gui/components/_legacy.py | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/.streamlit/config.toml b/.streamlit/config.toml index 337ace2..fa008c6 100644 --- a/.streamlit/config.toml +++ b/.streamlit/config.toml @@ -3,3 +3,9 @@ toolbarMode = "minimal" [browser] gatherUsageStats = false + +[server] +# Lift the default 200 MB upload cap to 1 GB so file_uploader's footer +# reads "Limit 1024MB per file" — matches the analyzer + gate's stated +# 1 GB efficiency target. See docs/REQUIREMENTS.md §1.1. +maxUploadSize = 1024 diff --git a/src/gui/components/_legacy.py b/src/gui/components/_legacy.py index f02b6a0..6892077 100644 --- a/src/gui/components/_legacy.py +++ b/src/gui/components/_legacy.py @@ -981,11 +981,23 @@ def upload_and_analyze_section() -> None: "Optional: scan an uploaded file for data quality issues and see " "which tools can fix each one. Skip if you already know what you need." ) + st.caption( + "**Up to 1 GB.** Formats: CSV, TSV, XLSX, XLS. " + "Delimiters auto-detected: comma, tab, semicolon, pipe. " + "Encodings auto-detected: UTF-8 (with/without BOM), UTF-16, " + "cp1252, Latin-1/9, cp1250, ISO-8859-2, cp1251, KOI8-R, " + "Mac Roman, Shift_JIS, GB18030, Big5, EUC-KR — and override on the Review page." + ) uploaded = st.file_uploader( "Upload CSV or Excel", type=["csv", "tsv", "xlsx", "xls"], key="home_upload", + help=( + "Up to 1 GB. Comma / tab / semicolon / pipe delimiters all " + "auto-detected. Encoding auto-detected with override on the " + "Review page if needed." + ), ) if uploaded is None: return @@ -1174,6 +1186,13 @@ def pickup_or_upload( st.rerun() return _StashedUpload(name, st.session_state["home_uploaded_bytes"]) + if {"csv", "tsv", "xlsx", "xls"} & set(types): + st.caption( + "Up to 1 GB. Delimiters auto-detected: comma, tab, semicolon, pipe. " + "Encoding auto-detected (UTF-8 / UTF-16 / cp1252 / Latin-1 family / " + "cp1250 / cp1251 / KOI8-R / Mac Roman / Shift_JIS / GB18030 / Big5 / " + "EUC-KR), with override on the Review page." + ) uploaded = st.file_uploader(label, type=types, key=key, help=help) if uploaded is not None and st.session_state.get(override_key): # User has uploaded their own file on this page; clear the override