diff --git a/src/gui/pages/10_PDF_Extractor.py b/src/gui/pages/10_PDF_Extractor.py index b70f302..8619463 100644 --- a/src/gui/pages/10_PDF_Extractor.py +++ b/src/gui/pages/10_PDF_Extractor.py @@ -174,19 +174,20 @@ pdf_uploads: dict = st.session_state.setdefault(K_UPLOADS, {}) upload_counter: int = st.session_state.setdefault(K_UPLOAD_COUNTER, 0) uploader_key = f"pdf_upload_v{upload_counter}" -# Hide the file_uploader's built-in file list (Streamlit shows -# tiny chips with X buttons under its dropzone). We render our own -# Home-style list below, so suppressing the native one leaves a -# single source of truth on screen. + +# Mirror the Home-page upload pattern: the Streamlit file_uploader +# is positioned off-screen via CSS (keeps its underlying ```` reachable to JS), and the page renders a Home-style +# bordered file list with an "Add more files" button at the +# bottom. A small iframe-injected script wires that button to +# programmatically click the hidden uploader so the OS file picker +# opens. Same approach as ``_sync_uploader_to_home_uploads`` in +# ``src/gui/_home.py``. st.markdown( - """""", + '', unsafe_allow_html=True, ) @@ -195,8 +196,6 @@ def _sync_pdf_uploads() -> None: """``on_change`` callback. Adds newly-uploaded files to the persistent stash. **Add-only** โ€” removal happens through the custom X buttons + counter bump, NOT through this callback. - That way the widget's hidden native X buttons can't silently - drop files behind the user's back, and we can ignore them. """ widget_files = st.session_state.get(uploader_key) or [] for f in widget_files: @@ -220,81 +219,178 @@ st.file_uploader( accept_multiple_files=True, key=uploader_key, on_change=_sync_pdf_uploads, + label_visibility="collapsed", help="Drop one or more bank-statement PDFs. Multi-file batches " "are merged into a single table with a ``source_file`` column.", ) # --------------------------------------------------------------------------- -# Custom file list (Home-style: one row per file, X to remove) +# Files section (Home-style layout) # --------------------------------------------------------------------------- -if pdf_uploads: - n = len(pdf_uploads) - total = sum(m["size"] for m in pdf_uploads.values()) - word = "file" if n == 1 else "files" - st.markdown( - f"**{n} {word}** ยท {_format_size(total)} total", +import html as _html + +_DOC_SVG = ( + '' + '' + '' + '' +) +_PLUS_SVG = ( + '' + '' + '' +) + +n_files = len(pdf_uploads) +if n_files: + total_bytes = sum(m["size"] for m in pdf_uploads.values()) + files_word = "file" if n_files == 1 else "files" + meta_html = ( + f'{n_files} {files_word} ยท ' + f'{_html.escape(_format_size(total_bytes))} total' ) - to_remove: str | None = None - with st.container(border=True): - for name, meta in pdf_uploads.items(): - digest = hashlib.sha1( - name.encode("utf-8"), usedforsecurity=False, - ).hexdigest()[:10] - col_x, col_name, col_size = st.columns([0.55, 8, 1.6]) - if col_x.button( - "โœ•", - key=f"pdf_rm_{digest}", - help=f"Remove {name}", - type="tertiary", - ): - to_remove = name - col_name.markdown(f"๐Ÿ“„ **{name}**") - col_size.markdown( - f"
" - f"{_format_size(meta['size'])}
", - unsafe_allow_html=True, - ) +else: + meta_html = "No files imported yet" - c_scan, c_clear = st.columns([1, 4]) - with c_scan: - scan_clicked = st.button("Scan", type="primary") - with c_clear: - if st.button( - "Clear all files", - type="secondary", - help="Removes all uploaded files and the last scan result.", +st.markdown( + '
' + '

Files

' + f'{meta_html}' + '
', + unsafe_allow_html=True, +) + +# Single bordered card hosting the file rows + the in-card +# "Add more files" button at the bottom, matching the Home page. +# Two-phase remove pattern: walk all rows once, accumulate +# ``to_remove`` if any X was clicked, then mutate state + rerun +# ONCE after the loop so Streamlit doesn't see a half-mutated +# dict mid-render. +to_remove: str | None = None +with st.container(border=True): + for name, meta in pdf_uploads.items(): + digest = hashlib.sha1( + name.encode("utf-8"), usedforsecurity=False, + ).hexdigest()[:10] + col_x, col_name, col_size = st.columns([0.55, 8, 1.6]) + if col_x.button( + "โœ•", + key=f"pdf_rm_{digest}", + help=f"Remove {name}", + type="tertiary", ): - st.session_state[K_UPLOADS] = {} - st.session_state[K_UPLOAD_COUNTER] = upload_counter + 1 - for k in (K_ROWS, K_WARNINGS, K_SOURCE_COUNT): - st.session_state.pop(k, None) - log_event( - "upload", - "PDF list cleared", - page="10_PDF_Extractor", - count=n, - ) - st.rerun() + to_remove = name + col_name.markdown( + '
' + f'{_DOC_SVG}' + f'{_html.escape(name)}' + '
', + unsafe_allow_html=True, + ) + col_size.markdown( + f'
' + f'' + f'{_html.escape(_format_size(meta["size"]))}' + '
', + unsafe_allow_html=True, + ) + # In-card "Add more files" button. The HTML is rendered as-is + # โ€” Streamlit's sanitiser strips inline ``onclick``, so the + # click wiring is done by the iframe script below. + st.markdown( + '', + unsafe_allow_html=True, + ) - if to_remove is not None: +# Wire the in-card "Add more files" button to the off-screen +# ``stFileUploaderDropzoneInput``. Identical pattern to the +# Home page (see ``src/gui/_home.py``); a ``MutationObserver`` +# re-wires after every Streamlit rerun in case the button got +# re-mounted. +st.iframe( + """ + +""", + height=1, +) + +if to_remove is not None: + log_event( + "upload", + f"PDF removed: {to_remove}", + filename=to_remove, + page="10_PDF_Extractor", + ) + del pdf_uploads[to_remove] + # Bump the uploader counter so the widget re-instantiates and + # forgets the removed file. + st.session_state[K_UPLOAD_COUNTER] = upload_counter + 1 + st.rerun() + + +# --------------------------------------------------------------------------- +# Action buttons (Scan + Clear all) live below the Files card +# --------------------------------------------------------------------------- + +c_scan, c_clear, _spacer = st.columns([1, 1, 4]) +with c_scan: + scan_clicked = st.button( + "Scan", + type="primary", + disabled=not pdf_uploads, + use_container_width=True, + ) +with c_clear: + if st.button( + "Clear all files", + type="secondary", + disabled=not pdf_uploads, + help="Removes all uploaded files and the last scan result.", + use_container_width=True, + ): + st.session_state[K_UPLOADS] = {} + st.session_state[K_UPLOAD_COUNTER] = upload_counter + 1 + for k in (K_ROWS, K_WARNINGS, K_SOURCE_COUNT): + st.session_state.pop(k, None) log_event( "upload", - f"PDF removed: {to_remove}", - filename=to_remove, + "PDF list cleared", page="10_PDF_Extractor", + count=n_files, ) - del pdf_uploads[to_remove] - # Bump the uploader counter so the widget re-instantiates - # and forgets the removed file. Without this, the user - # would have to click the widget's own X (which is hidden) - # OR re-upload to refresh the state. - st.session_state[K_UPLOAD_COUNTER] = upload_counter + 1 st.rerun() -else: - st.caption("No files uploaded yet.") - scan_clicked = False # ---------------------------------------------------------------------------