feat(text-cleaner): collapse options + auto-scroll to Results on run
After clicking Clean Text the user was left at the bottom of the
script with the Options block still expanded and no viewport movement
— they had to scroll to find the Results.
- Wrap the whole Options block in an outer ``st.expander("Options",
expanded=not _has_result)``. After the Clean Text rerun, both
Preview AND Options collapse, leaving the primary action button +
Results as the only prominent elements above the fold. The inner
Advanced-options expander is preserved as a nested expander
(supported in Streamlit 1.36+; this repo pins 1.35+).
- Add a 1px anchor div ``#textclean-results-anchor`` immediately
before the Results subheader.
- On Clean Text click, set a one-shot ``_textclean_scroll_to_results``
flag in session state; on the next render, pop the flag and inject
a tiny ``st.components.v1.html`` iframe whose ``<script>`` calls
``scrollIntoView`` on the parent document's anchor. One-shot so
re-renders triggered by other widgets (Show-hidden toggle, etc.)
don't jerk the viewport back to the top of Results.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -123,82 +123,86 @@ st.divider()
|
|||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Options
|
# Options
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
#
|
||||||
|
# Wrapped in an outer expander whose default state mirrors the preview
|
||||||
|
# expander above: open before a result exists, folded once the user has
|
||||||
|
# clicked Clean Text. Together they push the Results section to the top
|
||||||
|
# of the visible area after a run.
|
||||||
|
|
||||||
st.subheader("Options")
|
with st.expander("Options", expanded=not _has_result):
|
||||||
|
preset_label = st.radio(
|
||||||
preset_label = st.radio(
|
"Preset",
|
||||||
"Preset",
|
["excel-hygiene (recommended)", "minimal", "paranoid"],
|
||||||
["excel-hygiene (recommended)", "minimal", "paranoid"],
|
|
||||||
index=0,
|
|
||||||
horizontal=True,
|
|
||||||
help=(
|
|
||||||
"excel-hygiene: trim, collapse whitespace, fold smart quotes, strip "
|
|
||||||
"invisible chars, normalize line endings, NFC. "
|
|
||||||
"minimal: only trim and collapse. "
|
|
||||||
"paranoid: everything including NFKC compat fold (lossy)."
|
|
||||||
),
|
|
||||||
)
|
|
||||||
preset_key = preset_label.split(" ", 1)[0]
|
|
||||||
options = CleanOptions.from_preset(preset_key)
|
|
||||||
|
|
||||||
with st.expander("Advanced options"):
|
|
||||||
col_a, col_b = st.columns(2)
|
|
||||||
with col_a:
|
|
||||||
options.trim = st.checkbox("Trim leading/trailing whitespace", value=options.trim)
|
|
||||||
options.collapse_whitespace = st.checkbox(
|
|
||||||
"Collapse internal whitespace", value=options.collapse_whitespace,
|
|
||||||
)
|
|
||||||
options.normalize_line_endings = st.checkbox(
|
|
||||||
"Normalize line endings (\\r\\n → \\n)", value=options.normalize_line_endings,
|
|
||||||
)
|
|
||||||
options.strip_control = st.checkbox(
|
|
||||||
"Strip control characters", value=options.strip_control,
|
|
||||||
)
|
|
||||||
options.strip_bom = st.checkbox("Strip BOM", value=options.strip_bom)
|
|
||||||
with col_b:
|
|
||||||
options.fold_smart_chars = st.checkbox(
|
|
||||||
"Fold smart characters (curly quotes, em-dash, NBSP)",
|
|
||||||
value=options.fold_smart_chars,
|
|
||||||
)
|
|
||||||
options.strip_zero_width = st.checkbox(
|
|
||||||
"Strip zero-width / invisible characters", value=options.strip_zero_width,
|
|
||||||
)
|
|
||||||
options.nfc = st.checkbox("Unicode NFC normalization", value=options.nfc)
|
|
||||||
options.nfkc = st.checkbox(
|
|
||||||
"Unicode NFKC compat fold (lossy: ① → 1, fi → fi)",
|
|
||||||
value=options.nfkc,
|
|
||||||
)
|
|
||||||
|
|
||||||
st.markdown("**Scope**")
|
|
||||||
string_cols = [
|
|
||||||
c for c in df.columns
|
|
||||||
if pd.api.types.is_object_dtype(df[c]) or pd.api.types.is_string_dtype(df[c])
|
|
||||||
]
|
|
||||||
selected_cols = st.multiselect(
|
|
||||||
"Columns to clean (default: all string columns)",
|
|
||||||
options=list(df.columns),
|
|
||||||
default=string_cols,
|
|
||||||
)
|
|
||||||
skip_cols = st.multiselect(
|
|
||||||
"Columns to skip even if they look like text",
|
|
||||||
options=list(df.columns),
|
|
||||||
default=[],
|
|
||||||
)
|
|
||||||
options.columns = selected_cols if selected_cols else None
|
|
||||||
options.skip_columns = list(skip_cols)
|
|
||||||
|
|
||||||
st.markdown("**Case conversion**")
|
|
||||||
case_global = st.selectbox(
|
|
||||||
"Apply case conversion to selected columns",
|
|
||||||
["None", "UPPER", "lower", "Title", "Sentence"],
|
|
||||||
index=0,
|
index=0,
|
||||||
|
horizontal=True,
|
||||||
|
help=(
|
||||||
|
"excel-hygiene: trim, collapse whitespace, fold smart quotes, strip "
|
||||||
|
"invisible chars, normalize line endings, NFC. "
|
||||||
|
"minimal: only trim and collapse. "
|
||||||
|
"paranoid: everything including NFKC compat fold (lossy)."
|
||||||
|
),
|
||||||
)
|
)
|
||||||
case_map = {
|
preset_key = preset_label.split(" ", 1)[0]
|
||||||
"UPPER": "upper", "lower": "lower",
|
options = CleanOptions.from_preset(preset_key)
|
||||||
"Title": "title", "Sentence": "sentence",
|
|
||||||
}
|
with st.expander("Advanced options"):
|
||||||
if case_global != "None":
|
col_a, col_b = st.columns(2)
|
||||||
options.case = case_map[case_global] # type: ignore[assignment]
|
with col_a:
|
||||||
|
options.trim = st.checkbox("Trim leading/trailing whitespace", value=options.trim)
|
||||||
|
options.collapse_whitespace = st.checkbox(
|
||||||
|
"Collapse internal whitespace", value=options.collapse_whitespace,
|
||||||
|
)
|
||||||
|
options.normalize_line_endings = st.checkbox(
|
||||||
|
"Normalize line endings (\\r\\n → \\n)", value=options.normalize_line_endings,
|
||||||
|
)
|
||||||
|
options.strip_control = st.checkbox(
|
||||||
|
"Strip control characters", value=options.strip_control,
|
||||||
|
)
|
||||||
|
options.strip_bom = st.checkbox("Strip BOM", value=options.strip_bom)
|
||||||
|
with col_b:
|
||||||
|
options.fold_smart_chars = st.checkbox(
|
||||||
|
"Fold smart characters (curly quotes, em-dash, NBSP)",
|
||||||
|
value=options.fold_smart_chars,
|
||||||
|
)
|
||||||
|
options.strip_zero_width = st.checkbox(
|
||||||
|
"Strip zero-width / invisible characters", value=options.strip_zero_width,
|
||||||
|
)
|
||||||
|
options.nfc = st.checkbox("Unicode NFC normalization", value=options.nfc)
|
||||||
|
options.nfkc = st.checkbox(
|
||||||
|
"Unicode NFKC compat fold (lossy: ① → 1, fi → fi)",
|
||||||
|
value=options.nfkc,
|
||||||
|
)
|
||||||
|
|
||||||
|
st.markdown("**Scope**")
|
||||||
|
string_cols = [
|
||||||
|
c for c in df.columns
|
||||||
|
if pd.api.types.is_object_dtype(df[c]) or pd.api.types.is_string_dtype(df[c])
|
||||||
|
]
|
||||||
|
selected_cols = st.multiselect(
|
||||||
|
"Columns to clean (default: all string columns)",
|
||||||
|
options=list(df.columns),
|
||||||
|
default=string_cols,
|
||||||
|
)
|
||||||
|
skip_cols = st.multiselect(
|
||||||
|
"Columns to skip even if they look like text",
|
||||||
|
options=list(df.columns),
|
||||||
|
default=[],
|
||||||
|
)
|
||||||
|
options.columns = selected_cols if selected_cols else None
|
||||||
|
options.skip_columns = list(skip_cols)
|
||||||
|
|
||||||
|
st.markdown("**Case conversion**")
|
||||||
|
case_global = st.selectbox(
|
||||||
|
"Apply case conversion to selected columns",
|
||||||
|
["None", "UPPER", "lower", "Title", "Sentence"],
|
||||||
|
index=0,
|
||||||
|
)
|
||||||
|
case_map = {
|
||||||
|
"UPPER": "upper", "lower": "lower",
|
||||||
|
"Title": "title", "Sentence": "sentence",
|
||||||
|
}
|
||||||
|
if case_global != "None":
|
||||||
|
options.case = case_map[case_global] # type: ignore[assignment]
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Run
|
# Run
|
||||||
@@ -215,10 +219,13 @@ if st.button("Clean Text", type="primary", use_container_width=True):
|
|||||||
st.stop()
|
st.stop()
|
||||||
st.session_state["textclean_result"] = result
|
st.session_state["textclean_result"] = result
|
||||||
st.session_state["textclean_input_name"] = uploaded.name
|
st.session_state["textclean_input_name"] = uploaded.name
|
||||||
# Force a second rerun so the preview-section expander above sees
|
# One-shot flag picked up on the next pass to scroll the parent
|
||||||
# the new result on its NEXT script pass and collapses itself.
|
# document to the Results anchor (see scroll snippet below).
|
||||||
# Without this the preview stays expanded until the user touches
|
st.session_state["_textclean_scroll_to_results"] = True
|
||||||
# any other widget.
|
# Force a second rerun so the preview and options expanders see
|
||||||
|
# the new result on the NEXT script pass and collapse themselves.
|
||||||
|
# Without this they stay expanded until the user touches any
|
||||||
|
# other widget.
|
||||||
st.rerun()
|
st.rerun()
|
||||||
|
|
||||||
result = st.session_state.get("textclean_result")
|
result = st.session_state.get("textclean_result")
|
||||||
@@ -229,6 +236,16 @@ if result is None:
|
|||||||
# Results
|
# Results
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Anchor target for the auto-scroll snippet at the end of this block.
|
||||||
|
# A bare ``<div id="...">`` survives Streamlit's HTML sanitizer (only
|
||||||
|
# ``<script>`` is stripped), and a 1px-tall div doesn't visually shift
|
||||||
|
# anything. Placed before the subheader so the scrolled-to viewport
|
||||||
|
# starts a few pixels above the section heading rather than below it.
|
||||||
|
st.markdown(
|
||||||
|
'<div id="textclean-results-anchor" style="height:1px"></div>',
|
||||||
|
unsafe_allow_html=True,
|
||||||
|
)
|
||||||
|
|
||||||
st.subheader("Results")
|
st.subheader("Results")
|
||||||
|
|
||||||
pct = (result.cells_changed / result.cells_total * 100.0) if result.cells_total else 0.0
|
pct = (result.cells_changed / result.cells_total * 100.0) if result.cells_total else 0.0
|
||||||
@@ -344,3 +361,30 @@ with dl_c:
|
|||||||
|
|
||||||
st.divider()
|
st.divider()
|
||||||
st.caption("Runs locally. Your data never leaves this computer. | DataTools v3.0")
|
st.caption("Runs locally. Your data never leaves this computer. | DataTools v3.0")
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Post-run auto-scroll
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
#
|
||||||
|
# When the user clicks Clean Text, the preview + options collapse but
|
||||||
|
# Streamlit by itself doesn't scroll — the Results section is at the
|
||||||
|
# bottom of a tall script so the user has to find it. Inject a tiny
|
||||||
|
# component-html iframe that calls ``scrollIntoView`` on the parent's
|
||||||
|
# Results anchor. Streamlit's main page is same-origin with component
|
||||||
|
# iframes so ``window.parent.document`` access is allowed.
|
||||||
|
#
|
||||||
|
# The flag is one-shot (``pop`` removes it) so re-renders triggered by
|
||||||
|
# unrelated widgets in the Results section (e.g., the Show-hidden
|
||||||
|
# toggle) don't yank the viewport back to the top of Results.
|
||||||
|
if st.session_state.pop("_textclean_scroll_to_results", False):
|
||||||
|
from streamlit.components.v1 import html as _components_html
|
||||||
|
_components_html(
|
||||||
|
"""
|
||||||
|
<script>
|
||||||
|
const doc = window.parent.document;
|
||||||
|
const target = doc.getElementById('textclean-results-anchor');
|
||||||
|
if (target) target.scrollIntoView({behavior: 'smooth', block: 'start'});
|
||||||
|
</script>
|
||||||
|
""",
|
||||||
|
height=0,
|
||||||
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user