diff --git a/src/gui/pages/2_Text_Cleaner.py b/src/gui/pages/2_Text_Cleaner.py index 4d7e667..61ecf02 100644 --- a/src/gui/pages/2_Text_Cleaner.py +++ b/src/gui/pages/2_Text_Cleaner.py @@ -123,82 +123,86 @@ st.divider() # --------------------------------------------------------------------------- # Options # --------------------------------------------------------------------------- +# +# Wrapped in an outer expander whose default state mirrors the preview +# expander above: open before a result exists, folded once the user has +# clicked Clean Text. Together they push the Results section to the top +# of the visible area after a run. -st.subheader("Options") - -preset_label = st.radio( - "Preset", - ["excel-hygiene (recommended)", "minimal", "paranoid"], - index=0, - horizontal=True, - help=( - "excel-hygiene: trim, collapse whitespace, fold smart quotes, strip " - "invisible chars, normalize line endings, NFC. " - "minimal: only trim and collapse. " - "paranoid: everything including NFKC compat fold (lossy)." - ), -) -preset_key = preset_label.split(" ", 1)[0] -options = CleanOptions.from_preset(preset_key) - -with st.expander("Advanced options"): - col_a, col_b = st.columns(2) - with col_a: - options.trim = st.checkbox("Trim leading/trailing whitespace", value=options.trim) - options.collapse_whitespace = st.checkbox( - "Collapse internal whitespace", value=options.collapse_whitespace, - ) - options.normalize_line_endings = st.checkbox( - "Normalize line endings (\\r\\n → \\n)", value=options.normalize_line_endings, - ) - options.strip_control = st.checkbox( - "Strip control characters", value=options.strip_control, - ) - options.strip_bom = st.checkbox("Strip BOM", value=options.strip_bom) - with col_b: - options.fold_smart_chars = st.checkbox( - "Fold smart characters (curly quotes, em-dash, NBSP)", - value=options.fold_smart_chars, - ) - options.strip_zero_width = st.checkbox( - "Strip zero-width / invisible characters", value=options.strip_zero_width, - ) - options.nfc = st.checkbox("Unicode NFC normalization", value=options.nfc) - options.nfkc = st.checkbox( - "Unicode NFKC compat fold (lossy: ① → 1, fi → fi)", - value=options.nfkc, - ) - - st.markdown("**Scope**") - string_cols = [ - c for c in df.columns - if pd.api.types.is_object_dtype(df[c]) or pd.api.types.is_string_dtype(df[c]) - ] - selected_cols = st.multiselect( - "Columns to clean (default: all string columns)", - options=list(df.columns), - default=string_cols, - ) - skip_cols = st.multiselect( - "Columns to skip even if they look like text", - options=list(df.columns), - default=[], - ) - options.columns = selected_cols if selected_cols else None - options.skip_columns = list(skip_cols) - - st.markdown("**Case conversion**") - case_global = st.selectbox( - "Apply case conversion to selected columns", - ["None", "UPPER", "lower", "Title", "Sentence"], +with st.expander("Options", expanded=not _has_result): + preset_label = st.radio( + "Preset", + ["excel-hygiene (recommended)", "minimal", "paranoid"], index=0, + horizontal=True, + help=( + "excel-hygiene: trim, collapse whitespace, fold smart quotes, strip " + "invisible chars, normalize line endings, NFC. " + "minimal: only trim and collapse. " + "paranoid: everything including NFKC compat fold (lossy)." + ), ) - case_map = { - "UPPER": "upper", "lower": "lower", - "Title": "title", "Sentence": "sentence", - } - if case_global != "None": - options.case = case_map[case_global] # type: ignore[assignment] + preset_key = preset_label.split(" ", 1)[0] + options = CleanOptions.from_preset(preset_key) + + with st.expander("Advanced options"): + col_a, col_b = st.columns(2) + with col_a: + options.trim = st.checkbox("Trim leading/trailing whitespace", value=options.trim) + options.collapse_whitespace = st.checkbox( + "Collapse internal whitespace", value=options.collapse_whitespace, + ) + options.normalize_line_endings = st.checkbox( + "Normalize line endings (\\r\\n → \\n)", value=options.normalize_line_endings, + ) + options.strip_control = st.checkbox( + "Strip control characters", value=options.strip_control, + ) + options.strip_bom = st.checkbox("Strip BOM", value=options.strip_bom) + with col_b: + options.fold_smart_chars = st.checkbox( + "Fold smart characters (curly quotes, em-dash, NBSP)", + value=options.fold_smart_chars, + ) + options.strip_zero_width = st.checkbox( + "Strip zero-width / invisible characters", value=options.strip_zero_width, + ) + options.nfc = st.checkbox("Unicode NFC normalization", value=options.nfc) + options.nfkc = st.checkbox( + "Unicode NFKC compat fold (lossy: ① → 1, fi → fi)", + value=options.nfkc, + ) + + st.markdown("**Scope**") + string_cols = [ + c for c in df.columns + if pd.api.types.is_object_dtype(df[c]) or pd.api.types.is_string_dtype(df[c]) + ] + selected_cols = st.multiselect( + "Columns to clean (default: all string columns)", + options=list(df.columns), + default=string_cols, + ) + skip_cols = st.multiselect( + "Columns to skip even if they look like text", + options=list(df.columns), + default=[], + ) + options.columns = selected_cols if selected_cols else None + options.skip_columns = list(skip_cols) + + st.markdown("**Case conversion**") + case_global = st.selectbox( + "Apply case conversion to selected columns", + ["None", "UPPER", "lower", "Title", "Sentence"], + index=0, + ) + case_map = { + "UPPER": "upper", "lower": "lower", + "Title": "title", "Sentence": "sentence", + } + if case_global != "None": + options.case = case_map[case_global] # type: ignore[assignment] # --------------------------------------------------------------------------- # Run @@ -215,10 +219,13 @@ if st.button("Clean Text", type="primary", use_container_width=True): st.stop() st.session_state["textclean_result"] = result st.session_state["textclean_input_name"] = uploaded.name - # Force a second rerun so the preview-section expander above sees - # the new result on its NEXT script pass and collapses itself. - # Without this the preview stays expanded until the user touches - # any other widget. + # One-shot flag picked up on the next pass to scroll the parent + # document to the Results anchor (see scroll snippet below). + st.session_state["_textclean_scroll_to_results"] = True + # Force a second rerun so the preview and options expanders see + # the new result on the NEXT script pass and collapse themselves. + # Without this they stay expanded until the user touches any + # other widget. st.rerun() result = st.session_state.get("textclean_result") @@ -229,6 +236,16 @@ if result is None: # Results # --------------------------------------------------------------------------- +# Anchor target for the auto-scroll snippet at the end of this block. +# A bare ``
`` survives Streamlit's HTML sanitizer (only +# `` + """, + height=0, + )