diff --git a/src/gui/components.py b/src/gui/components.py index 25641d1..59c47a3 100644 --- a/src/gui/components.py +++ b/src/gui/components.py @@ -797,6 +797,111 @@ def render_findings_panel(findings, *, header: str = "Detected issues") -> None: _render_one_finding(f) +_PREVIEW_TABLE_CSS = """ + +""" + + +def render_hidden_aware_preview( + df, + *, + n_rows: int = 10, + caption: str | None = None, +) -> None: + """Render a DataFrame preview that shows hidden characters in every cell. + + Used for the Text Cleaner's "before" and "after" previews so the user + can actually see the leading/trailing whitespace, NBSP padding, + zero-width characters, and smart punctuation that the cleaner is going + to remove (or just removed). A plain ``st.dataframe`` collapses outer + ASCII whitespace and renders invisibles as nothing, defeating the + point of a preview in a cleanup tool. + + Headers and cell values are both routed through + :func:`visualize_hidden_html` with ``mark_outer_whitespace=True``. + """ + import pandas as pd + from src.core.text_clean import hidden_char_css, visualize_hidden_html + + if df is None or len(df) == 0: + st.info("No rows to preview.") + return + + sliced = df.head(n_rows) if len(df) > n_rows else df + + st.markdown(hidden_char_css() + _PREVIEW_TABLE_CSS, unsafe_allow_html=True) + if caption: + st.caption(caption) + + header_cells = "".join( + f"{visualize_hidden_html(str(c), mark_outer_whitespace=True)}" + for c in sliced.columns + ) + + body_rows: list[str] = [] + for row_idx, (orig_idx, row) in enumerate(sliced.iterrows(), start=1): + cells = ["" + str(row_idx) + ""] + for col in sliced.columns: + value = row[col] + if isinstance(value, str): + rendered = visualize_hidden_html(value, mark_outer_whitespace=True) + elif pd.isna(value): + rendered = "NaN" + else: + # Non-string scalars (numerics, bools) just stringify; they + # won't have invisible chars but we still need html-escape. + rendered = visualize_hidden_html(str(value)) + cells.append(f"{rendered}") + body_rows.append("" + "".join(cells) + "") + + st.markdown( + "
" + "" + f"{header_cells}" + f"{''.join(body_rows)}" + "
#
" + "
", + unsafe_allow_html=True, + ) + + _SAMPLE_TABLE_CSS = """