feat(tools): unified post-run UX across all Ready tool pages
Apply the Clean Text page's post-run UX pattern to every other Ready
tool page (Find Duplicates, Standardize Formats, Fix Missing Values,
Map Columns, Automated Workflows) for consistency and ease of use.
Per page:
1. Preview wrapped in ``st.expander(f"Preview: {filename}",
expanded=not _has_result)``. Open before a result exists, folded
afterwards.
2. Options / configuration controls wrapped in
``st.expander("Options", expanded=not _has_result)``. Inner
sub-expanders preserved (Streamlit 1.36+ supports nesting).
3. After the primary action stashes the result, set a one-shot
``_<tool>_scroll_to_results`` flag in session state and call
``st.rerun()`` so the preview + options expanders see the new
state on the next pass and collapse themselves.
4. ``<div id="<tool>-results-anchor" style="height:1px">`` placed
immediately before the Results subheader.
5. End-of-page: pop the scroll flag and inject a tiny
``streamlit.components.v1.html`` iframe whose ``<script>`` calls
``scrollIntoView`` on the parent document's anchor. One-shot, so
unrelated reruns (toggling Show-hidden, etc.) don't yank the
viewport.
6. Download buttons hardened against the multi-button Streamlit
footgun: byte buffers pre-computed outside the column scopes,
explicit unique ``key="<tool>_dl_<purpose>"`` per button,
``use_container_width=True``, and previously-conditional buttons
now render unconditionally with ``disabled=True`` + a help
tooltip when the underlying data is empty so layout stays steady.
Per-page judgment calls (already noted in agent reports):
- Find Duplicates: sheet picker and delimiter selector kept OUTSIDE
expanders (the user still needs to see them when a file fails to
parse).
- Fix Missing Values: missingness profile wrapped INSIDE the Options
expander together with Strategy — the Results section already
shows a before/after missingness comparison that supersedes the
static input profile.
- Map Columns: all three subsections (Target schema, Strategy,
Mapping) wrapped under one outer Options expander, matching the
Text Cleaner pattern.
- Automated Workflows: inner "Recommended tool order" expander stays
nested inside the outer Options wrap; Run button stays outside
Options so the user can re-run after tweaking the (collapsed)
editor.
2008 tests pass.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -99,9 +99,13 @@ except Exception as e:
|
||||
)
|
||||
st.stop()
|
||||
|
||||
st.subheader(f"Preview: {uploaded.name}")
|
||||
st.caption(f"{len(df)} rows, {len(df.columns)} columns")
|
||||
st.dataframe(df.head(10), use_container_width=True)
|
||||
# Collapse the input preview once the user has clicked Standardize Formats
|
||||
# so the Results section below is the primary visual focus. The user can
|
||||
# re-expand the expander to re-inspect the source rows.
|
||||
_has_result = st.session_state.get("fmtstd_result") is not None
|
||||
with st.expander(f"Preview: {uploaded.name}", expanded=not _has_result):
|
||||
st.caption(f"{len(df)} rows, {len(df.columns)} columns")
|
||||
st.dataframe(df.head(10), use_container_width=True)
|
||||
st.divider()
|
||||
|
||||
|
||||
@@ -180,328 +184,335 @@ def _detect_field_type(col: str, samples: list[str]) -> FieldType | None:
|
||||
# ---------------------------------------------------------------------------
|
||||
# Options
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
st.subheader("Column types")
|
||||
st.caption(
|
||||
"Assign each column to a field type. Auto-detected suggestions are "
|
||||
"pre-filled; pick **(skip)** to leave a column untouched."
|
||||
)
|
||||
|
||||
_FIELD_LABELS = {
|
||||
"(skip)": None,
|
||||
"Date": FieldType.DATE,
|
||||
"Phone": FieldType.PHONE,
|
||||
"Currency": FieldType.CURRENCY,
|
||||
"Name": FieldType.NAME,
|
||||
"Address": FieldType.ADDRESS,
|
||||
"Boolean": FieldType.BOOLEAN,
|
||||
}
|
||||
_LABEL_BY_TYPE = {v: k for k, v in _FIELD_LABELS.items()}
|
||||
_LABELS = list(_FIELD_LABELS.keys())
|
||||
|
||||
sample_size = min(len(df), 200)
|
||||
sample_df = df.head(sample_size)
|
||||
#
|
||||
# Wrapped in an outer expander whose default state mirrors the preview
|
||||
# expander above: open before a result exists, folded once the user has
|
||||
# clicked Standardize Formats. Together they push the Results section to
|
||||
# the top of the visible area after a run.
|
||||
|
||||
column_types: dict[str, FieldType] = {}
|
||||
cols_per_row = 3
|
||||
columns_iter = list(df.columns)
|
||||
for i in range(0, len(columns_iter), cols_per_row):
|
||||
cols_block = st.columns(cols_per_row)
|
||||
for j, col_name in enumerate(columns_iter[i:i + cols_per_row]):
|
||||
with cols_block[j]:
|
||||
detected = _detect_field_type(col_name, sample_df[col_name].tolist())
|
||||
default_label = _LABEL_BY_TYPE.get(detected, "(skip)")
|
||||
chosen = st.selectbox(
|
||||
col_name,
|
||||
_LABELS,
|
||||
index=_LABELS.index(default_label),
|
||||
key=f"fmtstd_type__{col_name}",
|
||||
)
|
||||
ft = _FIELD_LABELS[chosen]
|
||||
if ft is not None:
|
||||
column_types[col_name] = ft
|
||||
|
||||
st.divider()
|
||||
st.subheader("Format options")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Preset bundle picker
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# Picking a preset rewrites every option below to that preset's defaults.
|
||||
# It does NOT touch column-type assignments — those are user-driven and
|
||||
# orthogonal. To make the rewrite stick across the rerun, we stash the
|
||||
# preset values into the per-option session keys; the widgets below read
|
||||
# those keys via their ``index``/``value`` arguments.
|
||||
|
||||
_PRESET_LABELS = {
|
||||
"us-default": "US (default) — ISO 8601 dates · E.164 phones · USD",
|
||||
"european": "European — DMY input · INTL phones · EUR comma decimal",
|
||||
"uk": "UK — DD/MM/YYYY · GB phones · Yes/No booleans",
|
||||
"iso-strict": "ISO Strict — ISO 8601 · bare-number currency · true/false",
|
||||
"legacy-us": "Legacy US — MM/DD/YYYY · National phones · Yes/No",
|
||||
"custom": "Custom — keep current settings",
|
||||
}
|
||||
|
||||
preset_choice = st.radio(
|
||||
"Standards preset",
|
||||
list(_PRESET_LABELS.keys()),
|
||||
format_func=lambda k: _PRESET_LABELS[k],
|
||||
index=0,
|
||||
horizontal=False,
|
||||
key="fmtstd_preset",
|
||||
help=(
|
||||
"Pick a published standard or regional convention as the baseline. "
|
||||
"Every option below is still individually overridable; choose "
|
||||
"**Custom** to keep whatever you've manually adjusted."
|
||||
),
|
||||
)
|
||||
|
||||
# Detect a preset switch since the last rerun; when it changes (and the
|
||||
# new choice isn't ``custom``), purge the dependent widget keys so
|
||||
# Streamlit lets their ``index=``/``value=`` defaults take effect on the
|
||||
# new render. Without this clear, prior session_state pins the widget to
|
||||
# the previous preset's choice and the apparent picker becomes a no-op.
|
||||
_DEPENDENT_KEYS = [
|
||||
"fmtstd_date_format", "fmtstd_date_order",
|
||||
"fmtstd_phone_format", "fmtstd_phone_region",
|
||||
"fmtstd_currency_decimal", "fmtstd_currency_decimals",
|
||||
"fmtstd_currency_preserve", "fmtstd_currency_preserve_code",
|
||||
"fmtstd_name_case", "fmtstd_bool_style",
|
||||
]
|
||||
_last = st.session_state.get("fmtstd_preset_last")
|
||||
if _last != preset_choice:
|
||||
st.session_state["fmtstd_preset_last"] = preset_choice
|
||||
if preset_choice != "custom":
|
||||
for k in _DEPENDENT_KEYS:
|
||||
st.session_state.pop(k, None)
|
||||
st.rerun()
|
||||
|
||||
# Map preset → widget-state defaults. Done as labels so the radios/selects
|
||||
# below pick up the right index without us re-implementing each map twice.
|
||||
_PRESET_TO_WIDGETS: dict[str, dict[str, str]] = {
|
||||
"us-default": {
|
||||
"date_format": "YYYY-MM-DD (ISO)", "date_order": "MDY (US)",
|
||||
"phone_format": "E.164 (+15551234567)", "phone_region": "US",
|
||||
"currency_decimal": "dot (1,234.56)", "currency_decimals": 2,
|
||||
"currency_preserve_code": False,
|
||||
"name_case": "Title Case", "boolean_style": "True/False",
|
||||
},
|
||||
"european": {
|
||||
"date_format": "YYYY-MM-DD (ISO)", "date_order": "DMY (EU)",
|
||||
"phone_format": "International (+1 555-123-4567)", "phone_region": "DE",
|
||||
"currency_decimal": "comma (1.234,56)", "currency_decimals": 2,
|
||||
"currency_preserve_code": True,
|
||||
"name_case": "Title Case", "boolean_style": "True/False",
|
||||
},
|
||||
"uk": {
|
||||
"date_format": "DD/MM/YYYY", "date_order": "DMY (EU)",
|
||||
"phone_format": "International (+1 555-123-4567)", "phone_region": "GB",
|
||||
"currency_decimal": "dot (1,234.56)", "currency_decimals": 2,
|
||||
"currency_preserve_code": False,
|
||||
"name_case": "Title Case", "boolean_style": "Yes/No",
|
||||
},
|
||||
"iso-strict": {
|
||||
"date_format": "YYYY-MM-DD (ISO)", "date_order": "MDY (US)",
|
||||
"phone_format": "E.164 (+15551234567)", "phone_region": "US",
|
||||
"currency_decimal": "dot (1,234.56)", "currency_decimals": 0,
|
||||
"currency_preserve_code": True,
|
||||
"name_case": "Title Case", "boolean_style": "true/false",
|
||||
},
|
||||
"legacy-us": {
|
||||
"date_format": "MM/DD/YYYY", "date_order": "MDY (US)",
|
||||
"phone_format": "National ((555) 123-4567)", "phone_region": "US",
|
||||
"currency_decimal": "dot (1,234.56)", "currency_decimals": 2,
|
||||
"currency_preserve_code": False,
|
||||
"name_case": "Title Case", "boolean_style": "Yes/No",
|
||||
},
|
||||
}
|
||||
|
||||
# ``iso-strict`` wants currency with no rounding; the GUI exposes that via
|
||||
# the "preserve original precision" checkbox rather than a sentinel value
|
||||
# in the number-input. Map that here.
|
||||
_PRESET_PRESERVE_DECIMALS: dict[str, bool] = {
|
||||
"iso-strict": True,
|
||||
}
|
||||
|
||||
|
||||
def _preset_default(key: str, fallback):
|
||||
"""Pull the preset-driven default for *key*, or *fallback* on Custom."""
|
||||
if preset_choice == "custom":
|
||||
return fallback
|
||||
return _PRESET_TO_WIDGETS[preset_choice].get(key, fallback)
|
||||
|
||||
|
||||
opt_cols = st.columns(2)
|
||||
with opt_cols[0]:
|
||||
st.markdown("**Dates**")
|
||||
_DATE_LABELS = ["YYYY-MM-DD (ISO)", "MM/DD/YYYY", "DD/MM/YYYY", "DD-Mon-YYYY", "Mon DD, YYYY"]
|
||||
date_format_label = st.selectbox(
|
||||
"Output format",
|
||||
_DATE_LABELS,
|
||||
index=_DATE_LABELS.index(_preset_default("date_format", "YYYY-MM-DD (ISO)")),
|
||||
key="fmtstd_date_format",
|
||||
)
|
||||
date_format_map = {
|
||||
"YYYY-MM-DD (ISO)": "%Y-%m-%d",
|
||||
"MM/DD/YYYY": "%m/%d/%Y",
|
||||
"DD/MM/YYYY": "%d/%m/%Y",
|
||||
"DD-Mon-YYYY": "%d-%b-%Y",
|
||||
"Mon DD, YYYY": "%b %d, %Y",
|
||||
}
|
||||
_DATE_ORDER_LABELS = ["MDY (US)", "DMY (EU)"]
|
||||
date_order = st.radio(
|
||||
"Ambiguous input order (e.g. 01/02/2024)",
|
||||
_DATE_ORDER_LABELS,
|
||||
index=_DATE_ORDER_LABELS.index(_preset_default("date_order", "MDY (US)")),
|
||||
horizontal=True,
|
||||
key="fmtstd_date_order",
|
||||
)
|
||||
|
||||
st.markdown("**Phones**")
|
||||
_PHONE_LABELS = [
|
||||
"E.164 (+15551234567)", "International (+1 555-123-4567)",
|
||||
"National ((555) 123-4567)", "Digits only",
|
||||
]
|
||||
phone_format_label = st.selectbox(
|
||||
"Output format",
|
||||
_PHONE_LABELS,
|
||||
index=_PHONE_LABELS.index(_preset_default("phone_format", "E.164 (+15551234567)")),
|
||||
key="fmtstd_phone_format",
|
||||
)
|
||||
phone_format_map = {
|
||||
"E.164 (+15551234567)": "E164",
|
||||
"International (+1 555-123-4567)": "INTERNATIONAL",
|
||||
"National ((555) 123-4567)": "NATIONAL",
|
||||
"Digits only": "DIGITS",
|
||||
}
|
||||
phone_region = st.text_input(
|
||||
"Default region (ISO-2)",
|
||||
value=_preset_default("phone_region", "US"),
|
||||
max_chars=2,
|
||||
help="Region used when the input has no country code. ``US``, ``GB``, ``DE``, etc.",
|
||||
key="fmtstd_phone_region",
|
||||
).upper() or "US"
|
||||
|
||||
with opt_cols[1]:
|
||||
st.markdown("**Currency**")
|
||||
_CURR_DECIMAL_LABELS = ["dot (1,234.56)", "comma (1.234,56)"]
|
||||
currency_decimal = st.radio(
|
||||
"Decimal separator in input",
|
||||
_CURR_DECIMAL_LABELS,
|
||||
index=_CURR_DECIMAL_LABELS.index(_preset_default("currency_decimal", "dot (1,234.56)")),
|
||||
horizontal=True,
|
||||
key="fmtstd_currency_decimal",
|
||||
)
|
||||
currency_decimals = st.number_input(
|
||||
"Round to decimals",
|
||||
min_value=0, max_value=8,
|
||||
value=int(_preset_default("currency_decimals", 2)),
|
||||
step=1,
|
||||
key="fmtstd_currency_decimals",
|
||||
)
|
||||
preserve_decimals = st.checkbox(
|
||||
"Preserve original precision (don't round)",
|
||||
value=_PRESET_PRESERVE_DECIMALS.get(preset_choice, False),
|
||||
key="fmtstd_currency_preserve",
|
||||
)
|
||||
currency_preserve_code = st.checkbox(
|
||||
"Preserve currency code (emit `USD 1234.56`, `EUR 99.00`, etc.)",
|
||||
value=bool(_preset_default("currency_preserve_code", False)),
|
||||
help=(
|
||||
"Detects an ISO 4217 code or symbol in the input ($/€/£/¥/USD/"
|
||||
"EUR/...) and re-emits it as a space-separated prefix on the "
|
||||
"standardized number. Cells without a currency marker emit "
|
||||
"just the number."
|
||||
),
|
||||
key="fmtstd_currency_preserve_code",
|
||||
)
|
||||
|
||||
st.markdown("**Names**")
|
||||
_NAME_CASE_LABELS = ["Title Case", "UPPER", "lower"]
|
||||
name_case_label = st.selectbox(
|
||||
"Casing",
|
||||
_NAME_CASE_LABELS,
|
||||
index=_NAME_CASE_LABELS.index(_preset_default("name_case", "Title Case")),
|
||||
key="fmtstd_name_case",
|
||||
)
|
||||
name_case_map = {"Title Case": "title", "UPPER": "upper", "lower": "lower"}
|
||||
|
||||
st.markdown("**Booleans**")
|
||||
_BOOL_LABELS = ["True/False", "true/false", "Yes/No", "Y/N", "1/0"]
|
||||
boolean_style = st.selectbox(
|
||||
"Output style",
|
||||
_BOOL_LABELS,
|
||||
index=_BOOL_LABELS.index(_preset_default("boolean_style", "True/False")),
|
||||
key="fmtstd_bool_style",
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Address abbreviations — built-in USPS table is editable
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# Users with international addresses (German Strasse, Spanish-language
|
||||
# Avenida, French Boulevard variants) need to override the built-in
|
||||
# table. Show it in a data_editor so the override is visible — the table
|
||||
# is small, this is the right surface.
|
||||
|
||||
extra_abbreviations: dict[str, str] = {}
|
||||
if any(ft == FieldType.ADDRESS for ft in column_types.values()):
|
||||
with st.expander("Custom address abbreviations (advanced)", expanded=False):
|
||||
st.caption(
|
||||
"Add or override entries in the address abbreviation table. "
|
||||
"Each row maps a short form (case-insensitive, periods OK) to "
|
||||
"the long form the standardizer should emit. Built-in USPS "
|
||||
"Pub. 28 entries (`St` → `Street`, `Ave` → `Avenue`, …) apply "
|
||||
"automatically; rows here merge on top and can override them."
|
||||
)
|
||||
starter = pd.DataFrame(
|
||||
[
|
||||
{"abbreviation": "", "expansion": ""},
|
||||
{"abbreviation": "", "expansion": ""},
|
||||
{"abbreviation": "", "expansion": ""},
|
||||
]
|
||||
)
|
||||
edited = st.data_editor(
|
||||
starter,
|
||||
num_rows="dynamic",
|
||||
use_container_width=True,
|
||||
column_config={
|
||||
"abbreviation": st.column_config.TextColumn(
|
||||
"Short form",
|
||||
help="Case-insensitive, trailing period optional. e.g. ``Strasse``",
|
||||
),
|
||||
"expansion": st.column_config.TextColumn(
|
||||
"Long form",
|
||||
help="What the standardizer emits. e.g. ``Straße``",
|
||||
),
|
||||
},
|
||||
key="fmtstd_extra_abbrev",
|
||||
)
|
||||
for _, row in edited.iterrows():
|
||||
k = str(row.get("abbreviation") or "").strip()
|
||||
v = str(row.get("expansion") or "").strip()
|
||||
if k and v:
|
||||
extra_abbreviations[k] = v
|
||||
if extra_abbreviations:
|
||||
st.success(
|
||||
f"{len(extra_abbreviations)} custom mapping(s) will merge "
|
||||
"with the built-in table."
|
||||
)
|
||||
|
||||
options = StandardizeOptions(
|
||||
column_types=column_types,
|
||||
date_output_format=date_format_map[date_format_label],
|
||||
date_order="MDY" if date_order.startswith("MDY") else "DMY",
|
||||
phone_format=phone_format_map[phone_format_label], # type: ignore[arg-type]
|
||||
phone_region=phone_region,
|
||||
currency_decimal="dot" if currency_decimal.startswith("dot") else "comma",
|
||||
currency_decimals=None if preserve_decimals else int(currency_decimals),
|
||||
currency_preserve_code=currency_preserve_code,
|
||||
name_case=name_case_map[name_case_label], # type: ignore[arg-type]
|
||||
boolean_style=boolean_style, # type: ignore[arg-type]
|
||||
extra_abbreviations=extra_abbreviations,
|
||||
)
|
||||
with st.expander("Options", expanded=not _has_result):
|
||||
st.subheader("Column types")
|
||||
st.caption(
|
||||
"Assign each column to a field type. Auto-detected suggestions are "
|
||||
"pre-filled; pick **(skip)** to leave a column untouched."
|
||||
)
|
||||
|
||||
_FIELD_LABELS = {
|
||||
"(skip)": None,
|
||||
"Date": FieldType.DATE,
|
||||
"Phone": FieldType.PHONE,
|
||||
"Currency": FieldType.CURRENCY,
|
||||
"Name": FieldType.NAME,
|
||||
"Address": FieldType.ADDRESS,
|
||||
"Boolean": FieldType.BOOLEAN,
|
||||
}
|
||||
_LABEL_BY_TYPE = {v: k for k, v in _FIELD_LABELS.items()}
|
||||
_LABELS = list(_FIELD_LABELS.keys())
|
||||
|
||||
sample_size = min(len(df), 200)
|
||||
sample_df = df.head(sample_size)
|
||||
|
||||
cols_per_row = 3
|
||||
columns_iter = list(df.columns)
|
||||
for i in range(0, len(columns_iter), cols_per_row):
|
||||
cols_block = st.columns(cols_per_row)
|
||||
for j, col_name in enumerate(columns_iter[i:i + cols_per_row]):
|
||||
with cols_block[j]:
|
||||
detected = _detect_field_type(col_name, sample_df[col_name].tolist())
|
||||
default_label = _LABEL_BY_TYPE.get(detected, "(skip)")
|
||||
chosen = st.selectbox(
|
||||
col_name,
|
||||
_LABELS,
|
||||
index=_LABELS.index(default_label),
|
||||
key=f"fmtstd_type__{col_name}",
|
||||
)
|
||||
ft = _FIELD_LABELS[chosen]
|
||||
if ft is not None:
|
||||
column_types[col_name] = ft
|
||||
|
||||
st.divider()
|
||||
st.subheader("Format options")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Preset bundle picker
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# Picking a preset rewrites every option below to that preset's defaults.
|
||||
# It does NOT touch column-type assignments — those are user-driven and
|
||||
# orthogonal. To make the rewrite stick across the rerun, we stash the
|
||||
# preset values into the per-option session keys; the widgets below read
|
||||
# those keys via their ``index``/``value`` arguments.
|
||||
|
||||
_PRESET_LABELS = {
|
||||
"us-default": "US (default) — ISO 8601 dates · E.164 phones · USD",
|
||||
"european": "European — DMY input · INTL phones · EUR comma decimal",
|
||||
"uk": "UK — DD/MM/YYYY · GB phones · Yes/No booleans",
|
||||
"iso-strict": "ISO Strict — ISO 8601 · bare-number currency · true/false",
|
||||
"legacy-us": "Legacy US — MM/DD/YYYY · National phones · Yes/No",
|
||||
"custom": "Custom — keep current settings",
|
||||
}
|
||||
|
||||
preset_choice = st.radio(
|
||||
"Standards preset",
|
||||
list(_PRESET_LABELS.keys()),
|
||||
format_func=lambda k: _PRESET_LABELS[k],
|
||||
index=0,
|
||||
horizontal=False,
|
||||
key="fmtstd_preset",
|
||||
help=(
|
||||
"Pick a published standard or regional convention as the baseline. "
|
||||
"Every option below is still individually overridable; choose "
|
||||
"**Custom** to keep whatever you've manually adjusted."
|
||||
),
|
||||
)
|
||||
|
||||
# Detect a preset switch since the last rerun; when it changes (and the
|
||||
# new choice isn't ``custom``), purge the dependent widget keys so
|
||||
# Streamlit lets their ``index=``/``value=`` defaults take effect on the
|
||||
# new render. Without this clear, prior session_state pins the widget to
|
||||
# the previous preset's choice and the apparent picker becomes a no-op.
|
||||
_DEPENDENT_KEYS = [
|
||||
"fmtstd_date_format", "fmtstd_date_order",
|
||||
"fmtstd_phone_format", "fmtstd_phone_region",
|
||||
"fmtstd_currency_decimal", "fmtstd_currency_decimals",
|
||||
"fmtstd_currency_preserve", "fmtstd_currency_preserve_code",
|
||||
"fmtstd_name_case", "fmtstd_bool_style",
|
||||
]
|
||||
_last = st.session_state.get("fmtstd_preset_last")
|
||||
if _last != preset_choice:
|
||||
st.session_state["fmtstd_preset_last"] = preset_choice
|
||||
if preset_choice != "custom":
|
||||
for k in _DEPENDENT_KEYS:
|
||||
st.session_state.pop(k, None)
|
||||
st.rerun()
|
||||
|
||||
# Map preset → widget-state defaults. Done as labels so the radios/selects
|
||||
# below pick up the right index without us re-implementing each map twice.
|
||||
_PRESET_TO_WIDGETS: dict[str, dict[str, str]] = {
|
||||
"us-default": {
|
||||
"date_format": "YYYY-MM-DD (ISO)", "date_order": "MDY (US)",
|
||||
"phone_format": "E.164 (+15551234567)", "phone_region": "US",
|
||||
"currency_decimal": "dot (1,234.56)", "currency_decimals": 2,
|
||||
"currency_preserve_code": False,
|
||||
"name_case": "Title Case", "boolean_style": "True/False",
|
||||
},
|
||||
"european": {
|
||||
"date_format": "YYYY-MM-DD (ISO)", "date_order": "DMY (EU)",
|
||||
"phone_format": "International (+1 555-123-4567)", "phone_region": "DE",
|
||||
"currency_decimal": "comma (1.234,56)", "currency_decimals": 2,
|
||||
"currency_preserve_code": True,
|
||||
"name_case": "Title Case", "boolean_style": "True/False",
|
||||
},
|
||||
"uk": {
|
||||
"date_format": "DD/MM/YYYY", "date_order": "DMY (EU)",
|
||||
"phone_format": "International (+1 555-123-4567)", "phone_region": "GB",
|
||||
"currency_decimal": "dot (1,234.56)", "currency_decimals": 2,
|
||||
"currency_preserve_code": False,
|
||||
"name_case": "Title Case", "boolean_style": "Yes/No",
|
||||
},
|
||||
"iso-strict": {
|
||||
"date_format": "YYYY-MM-DD (ISO)", "date_order": "MDY (US)",
|
||||
"phone_format": "E.164 (+15551234567)", "phone_region": "US",
|
||||
"currency_decimal": "dot (1,234.56)", "currency_decimals": 0,
|
||||
"currency_preserve_code": True,
|
||||
"name_case": "Title Case", "boolean_style": "true/false",
|
||||
},
|
||||
"legacy-us": {
|
||||
"date_format": "MM/DD/YYYY", "date_order": "MDY (US)",
|
||||
"phone_format": "National ((555) 123-4567)", "phone_region": "US",
|
||||
"currency_decimal": "dot (1,234.56)", "currency_decimals": 2,
|
||||
"currency_preserve_code": False,
|
||||
"name_case": "Title Case", "boolean_style": "Yes/No",
|
||||
},
|
||||
}
|
||||
|
||||
# ``iso-strict`` wants currency with no rounding; the GUI exposes that via
|
||||
# the "preserve original precision" checkbox rather than a sentinel value
|
||||
# in the number-input. Map that here.
|
||||
_PRESET_PRESERVE_DECIMALS: dict[str, bool] = {
|
||||
"iso-strict": True,
|
||||
}
|
||||
|
||||
|
||||
def _preset_default(key: str, fallback):
|
||||
"""Pull the preset-driven default for *key*, or *fallback* on Custom."""
|
||||
if preset_choice == "custom":
|
||||
return fallback
|
||||
return _PRESET_TO_WIDGETS[preset_choice].get(key, fallback)
|
||||
|
||||
|
||||
opt_cols = st.columns(2)
|
||||
with opt_cols[0]:
|
||||
st.markdown("**Dates**")
|
||||
_DATE_LABELS = ["YYYY-MM-DD (ISO)", "MM/DD/YYYY", "DD/MM/YYYY", "DD-Mon-YYYY", "Mon DD, YYYY"]
|
||||
date_format_label = st.selectbox(
|
||||
"Output format",
|
||||
_DATE_LABELS,
|
||||
index=_DATE_LABELS.index(_preset_default("date_format", "YYYY-MM-DD (ISO)")),
|
||||
key="fmtstd_date_format",
|
||||
)
|
||||
date_format_map = {
|
||||
"YYYY-MM-DD (ISO)": "%Y-%m-%d",
|
||||
"MM/DD/YYYY": "%m/%d/%Y",
|
||||
"DD/MM/YYYY": "%d/%m/%Y",
|
||||
"DD-Mon-YYYY": "%d-%b-%Y",
|
||||
"Mon DD, YYYY": "%b %d, %Y",
|
||||
}
|
||||
_DATE_ORDER_LABELS = ["MDY (US)", "DMY (EU)"]
|
||||
date_order = st.radio(
|
||||
"Ambiguous input order (e.g. 01/02/2024)",
|
||||
_DATE_ORDER_LABELS,
|
||||
index=_DATE_ORDER_LABELS.index(_preset_default("date_order", "MDY (US)")),
|
||||
horizontal=True,
|
||||
key="fmtstd_date_order",
|
||||
)
|
||||
|
||||
st.markdown("**Phones**")
|
||||
_PHONE_LABELS = [
|
||||
"E.164 (+15551234567)", "International (+1 555-123-4567)",
|
||||
"National ((555) 123-4567)", "Digits only",
|
||||
]
|
||||
phone_format_label = st.selectbox(
|
||||
"Output format",
|
||||
_PHONE_LABELS,
|
||||
index=_PHONE_LABELS.index(_preset_default("phone_format", "E.164 (+15551234567)")),
|
||||
key="fmtstd_phone_format",
|
||||
)
|
||||
phone_format_map = {
|
||||
"E.164 (+15551234567)": "E164",
|
||||
"International (+1 555-123-4567)": "INTERNATIONAL",
|
||||
"National ((555) 123-4567)": "NATIONAL",
|
||||
"Digits only": "DIGITS",
|
||||
}
|
||||
phone_region = st.text_input(
|
||||
"Default region (ISO-2)",
|
||||
value=_preset_default("phone_region", "US"),
|
||||
max_chars=2,
|
||||
help="Region used when the input has no country code. ``US``, ``GB``, ``DE``, etc.",
|
||||
key="fmtstd_phone_region",
|
||||
).upper() or "US"
|
||||
|
||||
with opt_cols[1]:
|
||||
st.markdown("**Currency**")
|
||||
_CURR_DECIMAL_LABELS = ["dot (1,234.56)", "comma (1.234,56)"]
|
||||
currency_decimal = st.radio(
|
||||
"Decimal separator in input",
|
||||
_CURR_DECIMAL_LABELS,
|
||||
index=_CURR_DECIMAL_LABELS.index(_preset_default("currency_decimal", "dot (1,234.56)")),
|
||||
horizontal=True,
|
||||
key="fmtstd_currency_decimal",
|
||||
)
|
||||
currency_decimals = st.number_input(
|
||||
"Round to decimals",
|
||||
min_value=0, max_value=8,
|
||||
value=int(_preset_default("currency_decimals", 2)),
|
||||
step=1,
|
||||
key="fmtstd_currency_decimals",
|
||||
)
|
||||
preserve_decimals = st.checkbox(
|
||||
"Preserve original precision (don't round)",
|
||||
value=_PRESET_PRESERVE_DECIMALS.get(preset_choice, False),
|
||||
key="fmtstd_currency_preserve",
|
||||
)
|
||||
currency_preserve_code = st.checkbox(
|
||||
"Preserve currency code (emit `USD 1234.56`, `EUR 99.00`, etc.)",
|
||||
value=bool(_preset_default("currency_preserve_code", False)),
|
||||
help=(
|
||||
"Detects an ISO 4217 code or symbol in the input ($/€/£/¥/USD/"
|
||||
"EUR/...) and re-emits it as a space-separated prefix on the "
|
||||
"standardized number. Cells without a currency marker emit "
|
||||
"just the number."
|
||||
),
|
||||
key="fmtstd_currency_preserve_code",
|
||||
)
|
||||
|
||||
st.markdown("**Names**")
|
||||
_NAME_CASE_LABELS = ["Title Case", "UPPER", "lower"]
|
||||
name_case_label = st.selectbox(
|
||||
"Casing",
|
||||
_NAME_CASE_LABELS,
|
||||
index=_NAME_CASE_LABELS.index(_preset_default("name_case", "Title Case")),
|
||||
key="fmtstd_name_case",
|
||||
)
|
||||
name_case_map = {"Title Case": "title", "UPPER": "upper", "lower": "lower"}
|
||||
|
||||
st.markdown("**Booleans**")
|
||||
_BOOL_LABELS = ["True/False", "true/false", "Yes/No", "Y/N", "1/0"]
|
||||
boolean_style = st.selectbox(
|
||||
"Output style",
|
||||
_BOOL_LABELS,
|
||||
index=_BOOL_LABELS.index(_preset_default("boolean_style", "True/False")),
|
||||
key="fmtstd_bool_style",
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Address abbreviations — built-in USPS table is editable
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# Users with international addresses (German Strasse, Spanish-language
|
||||
# Avenida, French Boulevard variants) need to override the built-in
|
||||
# table. Show it in a data_editor so the override is visible — the table
|
||||
# is small, this is the right surface.
|
||||
|
||||
if any(ft == FieldType.ADDRESS for ft in column_types.values()):
|
||||
with st.expander("Custom address abbreviations (advanced)", expanded=False):
|
||||
st.caption(
|
||||
"Add or override entries in the address abbreviation table. "
|
||||
"Each row maps a short form (case-insensitive, periods OK) to "
|
||||
"the long form the standardizer should emit. Built-in USPS "
|
||||
"Pub. 28 entries (`St` → `Street`, `Ave` → `Avenue`, …) apply "
|
||||
"automatically; rows here merge on top and can override them."
|
||||
)
|
||||
starter = pd.DataFrame(
|
||||
[
|
||||
{"abbreviation": "", "expansion": ""},
|
||||
{"abbreviation": "", "expansion": ""},
|
||||
{"abbreviation": "", "expansion": ""},
|
||||
]
|
||||
)
|
||||
edited = st.data_editor(
|
||||
starter,
|
||||
num_rows="dynamic",
|
||||
use_container_width=True,
|
||||
column_config={
|
||||
"abbreviation": st.column_config.TextColumn(
|
||||
"Short form",
|
||||
help="Case-insensitive, trailing period optional. e.g. ``Strasse``",
|
||||
),
|
||||
"expansion": st.column_config.TextColumn(
|
||||
"Long form",
|
||||
help="What the standardizer emits. e.g. ``Straße``",
|
||||
),
|
||||
},
|
||||
key="fmtstd_extra_abbrev",
|
||||
)
|
||||
for _, row in edited.iterrows():
|
||||
k = str(row.get("abbreviation") or "").strip()
|
||||
v = str(row.get("expansion") or "").strip()
|
||||
if k and v:
|
||||
extra_abbreviations[k] = v
|
||||
if extra_abbreviations:
|
||||
st.success(
|
||||
f"{len(extra_abbreviations)} custom mapping(s) will merge "
|
||||
"with the built-in table."
|
||||
)
|
||||
|
||||
options = StandardizeOptions(
|
||||
column_types=column_types,
|
||||
date_output_format=date_format_map[date_format_label],
|
||||
date_order="MDY" if date_order.startswith("MDY") else "DMY",
|
||||
phone_format=phone_format_map[phone_format_label], # type: ignore[arg-type]
|
||||
phone_region=phone_region,
|
||||
currency_decimal="dot" if currency_decimal.startswith("dot") else "comma",
|
||||
currency_decimals=None if preserve_decimals else int(currency_decimals),
|
||||
currency_preserve_code=currency_preserve_code,
|
||||
name_case=name_case_map[name_case_label], # type: ignore[arg-type]
|
||||
boolean_style=boolean_style, # type: ignore[arg-type]
|
||||
extra_abbreviations=extra_abbreviations,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -528,6 +539,14 @@ if st.button(
|
||||
st.stop()
|
||||
st.session_state["fmtstd_result"] = result
|
||||
st.session_state["fmtstd_input_name"] = uploaded.name
|
||||
# One-shot flag picked up on the next pass to scroll the parent
|
||||
# document to the Results anchor (see scroll snippet below).
|
||||
st.session_state["_fmtstd_scroll_to_results"] = True
|
||||
# Force a second rerun so the preview and options expanders see
|
||||
# the new result on the NEXT script pass and collapse themselves.
|
||||
# Without this they stay expanded until the user touches any
|
||||
# other widget.
|
||||
st.rerun()
|
||||
|
||||
result = st.session_state.get("fmtstd_result")
|
||||
if result is None:
|
||||
@@ -538,6 +557,16 @@ if result is None:
|
||||
# Results
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Anchor target for the auto-scroll snippet at the end of this block.
|
||||
# A bare ``<div id="...">`` survives Streamlit's HTML sanitizer (only
|
||||
# ``<script>`` is stripped), and a 1px-tall div doesn't visually shift
|
||||
# anything. Placed before the subheader so the scrolled-to viewport
|
||||
# starts a few pixels above the section heading rather than below it.
|
||||
st.markdown(
|
||||
'<div id="fmtstd-results-anchor" style="height:1px"></div>',
|
||||
unsafe_allow_html=True,
|
||||
)
|
||||
|
||||
st.subheader("Results")
|
||||
|
||||
pct = (result.cells_changed / result.cells_total * 100.0) if result.cells_total else 0.0
|
||||
@@ -574,36 +603,83 @@ st.dataframe(result.standardized_df.head(10), use_container_width=True)
|
||||
# ---------------------------------------------------------------------------
|
||||
# Downloads
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# All three byte buffers are prepared up front (outside the columns) so
|
||||
# each ``st.download_button`` sees stable ``data`` across reruns and an
|
||||
# explicit ``key`` — without those, Streamlit auto-derived widget IDs
|
||||
# can collide for multiple download_buttons in adjacent columns and
|
||||
# only the first one actually fires on click. The empty-changes case
|
||||
# now renders a disabled button (rather than vanishing) so the layout
|
||||
# stays steady and the user understands why nothing's available.
|
||||
|
||||
st.divider()
|
||||
stem = Path(st.session_state.get("fmtstd_input_name", "input")).stem
|
||||
|
||||
standardized_bytes = result.standardized_df.to_csv(index=False).encode("utf-8-sig")
|
||||
changes_bytes = (
|
||||
result.changes.to_csv(index=False).encode("utf-8-sig")
|
||||
if not result.changes.empty
|
||||
else b""
|
||||
)
|
||||
config_bytes = json.dumps(options.to_dict(), indent=2).encode("utf-8")
|
||||
|
||||
dl_a, dl_b, dl_c = st.columns(3)
|
||||
with dl_a:
|
||||
standardized_bytes = result.standardized_df.to_csv(index=False).encode("utf-8-sig")
|
||||
st.download_button(
|
||||
"Download standardized CSV",
|
||||
data=standardized_bytes,
|
||||
file_name=f"{stem}_standardized.csv",
|
||||
mime="text/csv",
|
||||
key="fmtstd_dl_standardized",
|
||||
use_container_width=True,
|
||||
)
|
||||
with dl_b:
|
||||
if not result.changes.empty:
|
||||
changes_bytes = result.changes.to_csv(index=False).encode("utf-8-sig")
|
||||
st.download_button(
|
||||
"Download changes audit",
|
||||
data=changes_bytes,
|
||||
file_name=f"{stem}_changes.csv",
|
||||
mime="text/csv",
|
||||
)
|
||||
st.download_button(
|
||||
"Download changes audit",
|
||||
data=changes_bytes,
|
||||
file_name=f"{stem}_changes.csv",
|
||||
mime="text/csv",
|
||||
key="fmtstd_dl_changes",
|
||||
disabled=result.changes.empty,
|
||||
help="No changes to audit." if result.changes.empty else None,
|
||||
use_container_width=True,
|
||||
)
|
||||
with dl_c:
|
||||
config_bytes = json.dumps(options.to_dict(), indent=2).encode("utf-8")
|
||||
st.download_button(
|
||||
"Download config JSON",
|
||||
data=config_bytes,
|
||||
file_name="format_standardize_config.json",
|
||||
mime="application/json",
|
||||
key="fmtstd_dl_config",
|
||||
use_container_width=True,
|
||||
)
|
||||
|
||||
st.divider()
|
||||
st.caption("Runs locally. Your data never leaves this computer. | DataTools v3.0")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Post-run auto-scroll
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# When the user clicks Standardize Formats, the preview + options collapse
|
||||
# but Streamlit by itself doesn't scroll — the Results section is at the
|
||||
# bottom of a tall script so the user has to find it. Inject a tiny
|
||||
# component-html iframe that calls ``scrollIntoView`` on the parent's
|
||||
# Results anchor. Streamlit's main page is same-origin with component
|
||||
# iframes so ``window.parent.document`` access is allowed.
|
||||
#
|
||||
# The flag is one-shot (``pop`` removes it) so re-renders triggered by
|
||||
# unrelated widgets in the Results section don't yank the viewport back
|
||||
# to the top of Results.
|
||||
if st.session_state.pop("_fmtstd_scroll_to_results", False):
|
||||
from streamlit.components.v1 import html as _components_html
|
||||
_components_html(
|
||||
"""
|
||||
<script>
|
||||
const doc = window.parent.document;
|
||||
const target = doc.getElementById('fmtstd-results-anchor');
|
||||
if (target) target.scrollIntoView({behavior: 'smooth', block: 'start'});
|
||||
</script>
|
||||
""",
|
||||
height=0,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user