feat(gui): hidden-char-aware preview tables in Text Cleaner
The Text Cleaner had two st.dataframe previews — the initial upload
preview ("Preview: filename") and the post-clean "Cleaned preview"
table — that both rendered cells with the same browser-collapses-
whitespace, hides-invisibles problem the analyzer findings panel had
before commit 1049c03.
components.render_hidden_aware_preview(df, n_rows, caption) renders a
DataFrame as an HTML table where:
- every cell uses visualize_hidden_html(mark_outer_whitespace=True),
so leading/trailing ASCII spaces appear as per-character "·" badges
- white-space: pre-wrap on every cell preserves internal multi-space
runs and embedded newlines visually
- headers route through the same visualizer so dirty column names
(NBSP padding, ZWSP, smart quotes) show their badges too
- NaN cells render as a faint "NaN" placeholder
- rows are sticky-headed and scrollable inside a 26rem capped
container so a 10-row preview doesn't push the rest of the UI off
screen
2_Text_Cleaner.py wires it into both previews:
- The upload preview gains its own "Show hidden characters in preview"
toggle (default on).
- The cleaned preview reuses the existing show_hidden toggle that
already governs the Examples changes table, so one switch controls
the whole results section.
Either toggle off falls back to the original st.dataframe view.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -797,6 +797,111 @@ def render_findings_panel(findings, *, header: str = "Detected issues") -> None:
|
||||
_render_one_finding(f)
|
||||
|
||||
|
||||
_PREVIEW_TABLE_CSS = """
|
||||
<style>
|
||||
.hidden-aware-preview {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
.hidden-aware-preview th,
|
||||
.hidden-aware-preview td {
|
||||
padding: 4px 8px;
|
||||
border: 1px solid #eee;
|
||||
text-align: left;
|
||||
vertical-align: top;
|
||||
font-family: ui-monospace, SFMono-Regular, monospace;
|
||||
/* pre-wrap so internal ASCII whitespace and embedded newlines render
|
||||
as the user wrote them; otherwise browsers collapse adjacent spaces. */
|
||||
white-space: pre-wrap;
|
||||
word-break: break-word;
|
||||
max-width: 32em;
|
||||
}
|
||||
.hidden-aware-preview thead th {
|
||||
background: #f6f8fa;
|
||||
position: sticky;
|
||||
top: 0;
|
||||
}
|
||||
.hidden-aware-preview tbody tr:nth-child(even) { background: #fafafa; }
|
||||
.hidden-aware-preview .row-num {
|
||||
color: #888;
|
||||
font-family: inherit;
|
||||
background: #f6f8fa;
|
||||
text-align: right;
|
||||
}
|
||||
.hidden-aware-preview-wrap {
|
||||
max-height: 26rem;
|
||||
overflow: auto;
|
||||
border: 1px solid #eee;
|
||||
border-radius: 4px;
|
||||
}
|
||||
</style>
|
||||
"""
|
||||
|
||||
|
||||
def render_hidden_aware_preview(
|
||||
df,
|
||||
*,
|
||||
n_rows: int = 10,
|
||||
caption: str | None = None,
|
||||
) -> None:
|
||||
"""Render a DataFrame preview that shows hidden characters in every cell.
|
||||
|
||||
Used for the Text Cleaner's "before" and "after" previews so the user
|
||||
can actually see the leading/trailing whitespace, NBSP padding,
|
||||
zero-width characters, and smart punctuation that the cleaner is going
|
||||
to remove (or just removed). A plain ``st.dataframe`` collapses outer
|
||||
ASCII whitespace and renders invisibles as nothing, defeating the
|
||||
point of a preview in a cleanup tool.
|
||||
|
||||
Headers and cell values are both routed through
|
||||
:func:`visualize_hidden_html` with ``mark_outer_whitespace=True``.
|
||||
"""
|
||||
import pandas as pd
|
||||
from src.core.text_clean import hidden_char_css, visualize_hidden_html
|
||||
|
||||
if df is None or len(df) == 0:
|
||||
st.info("No rows to preview.")
|
||||
return
|
||||
|
||||
sliced = df.head(n_rows) if len(df) > n_rows else df
|
||||
|
||||
st.markdown(hidden_char_css() + _PREVIEW_TABLE_CSS, unsafe_allow_html=True)
|
||||
if caption:
|
||||
st.caption(caption)
|
||||
|
||||
header_cells = "".join(
|
||||
f"<th>{visualize_hidden_html(str(c), mark_outer_whitespace=True)}</th>"
|
||||
for c in sliced.columns
|
||||
)
|
||||
|
||||
body_rows: list[str] = []
|
||||
for row_idx, (orig_idx, row) in enumerate(sliced.iterrows(), start=1):
|
||||
cells = ["<td class='row-num'>" + str(row_idx) + "</td>"]
|
||||
for col in sliced.columns:
|
||||
value = row[col]
|
||||
if isinstance(value, str):
|
||||
rendered = visualize_hidden_html(value, mark_outer_whitespace=True)
|
||||
elif pd.isna(value):
|
||||
rendered = "<span style='color:#aaa'>NaN</span>"
|
||||
else:
|
||||
# Non-string scalars (numerics, bools) just stringify; they
|
||||
# won't have invisible chars but we still need html-escape.
|
||||
rendered = visualize_hidden_html(str(value))
|
||||
cells.append(f"<td>{rendered}</td>")
|
||||
body_rows.append("<tr>" + "".join(cells) + "</tr>")
|
||||
|
||||
st.markdown(
|
||||
"<div class='hidden-aware-preview-wrap'>"
|
||||
"<table class='hidden-aware-preview'>"
|
||||
f"<thead><tr><th class='row-num'>#</th>{header_cells}</tr></thead>"
|
||||
f"<tbody>{''.join(body_rows)}</tbody>"
|
||||
"</table>"
|
||||
"</div>",
|
||||
unsafe_allow_html=True,
|
||||
)
|
||||
|
||||
|
||||
_SAMPLE_TABLE_CSS = """
|
||||
<style>
|
||||
.findings-sample-table {
|
||||
|
||||
@@ -14,7 +14,11 @@ _project_root = Path(__file__).resolve().parent.parent.parent.parent
|
||||
if str(_project_root) not in sys.path:
|
||||
sys.path.insert(0, str(_project_root))
|
||||
|
||||
from src.gui.components import hide_streamlit_chrome, pickup_or_upload
|
||||
from src.gui.components import (
|
||||
hide_streamlit_chrome,
|
||||
pickup_or_upload,
|
||||
render_hidden_aware_preview,
|
||||
)
|
||||
from src.core.text_clean import (
|
||||
PRESETS,
|
||||
CleanOptions,
|
||||
@@ -81,7 +85,16 @@ except Exception as e:
|
||||
|
||||
st.subheader(f"Preview: {uploaded.name}")
|
||||
st.caption(f"{len(df)} rows, {len(df.columns)} columns")
|
||||
st.dataframe(df.head(10), use_container_width=True)
|
||||
preview_show_hidden = st.toggle(
|
||||
"Show hidden characters in preview",
|
||||
value=True,
|
||||
help="Highlights NBSP, zero-width chars, smart quotes, and leading/trailing whitespace.",
|
||||
key="textclean_preview_show_hidden",
|
||||
)
|
||||
if preview_show_hidden:
|
||||
render_hidden_aware_preview(df, n_rows=10)
|
||||
else:
|
||||
st.dataframe(df.head(10), use_container_width=True)
|
||||
|
||||
st.divider()
|
||||
|
||||
@@ -257,7 +270,12 @@ if result.cells_changed:
|
||||
st.dataframe(examples, use_container_width=True, hide_index=True)
|
||||
|
||||
st.markdown("**Cleaned preview (first 10 rows)**")
|
||||
st.dataframe(result.cleaned_df.head(10), use_container_width=True)
|
||||
# Reuse the same toggle the Examples table uses so the user controls both
|
||||
# the changes audit and the cleaned preview with one switch.
|
||||
if show_hidden:
|
||||
render_hidden_aware_preview(result.cleaned_df, n_rows=10)
|
||||
else:
|
||||
st.dataframe(result.cleaned_df.head(10), use_container_width=True)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Downloads
|
||||
|
||||
Reference in New Issue
Block a user