From 1049c033cb933fbed8835a94dd998b617ca7ff44 Mon Sep 17 00:00:00 2001 From: Michael Date: Wed, 29 Apr 2026 16:21:39 +0000 Subject: [PATCH] feat(gui): visualize leading/trailing whitespace in analyzer findings The analyzer's "Run Analysis" panel rendered sample cells via st.dataframe, which (a) silently collapses leading/trailing ASCII whitespace and (b) displays NBSP/ZWSP/control chars as nothing. The user couldn't see the exact pollution they were being told about. visualize_hidden_html gains a mark_outer_whitespace=True option that wraps each leading and trailing ASCII space/tab in its own badge with a "SP LEAD" / "SP TRAIL" tooltip. The badges are per-character so the user can count exactly how much padding the cleaner will strip. components.render_findings_panel now: - injects hidden_char_css() once at the top of the panel - replaces st.dataframe(samples) with a custom HTML table - renders the value column with mark_outer_whitespace=True - applies white-space: pre-wrap on value cells so any internal ASCII whitespace also stays visible (browsers collapse runs by default) Four new tests cover: leading+trailing badge counts, default-off behaviour, leading tab badge, all-whitespace string treated entirely as leading. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/core/text_clean.py | 44 ++++++++++++++++++++++++++-- src/gui/components.py | 63 ++++++++++++++++++++++++++++++++++++++-- tests/test_text_clean.py | 25 ++++++++++++++++ 3 files changed, 126 insertions(+), 6 deletions(-) diff --git a/src/core/text_clean.py b/src/core/text_clean.py index 4eb2fc4..e7fbc71 100644 --- a/src/core/text_clean.py +++ b/src/core/text_clean.py @@ -680,7 +680,7 @@ def visualize_hidden_text(s: str) -> str: return "".join(out) -def visualize_hidden_html(s: str) -> str: +def visualize_hidden_html(s: str, *, mark_outer_whitespace: bool = False) -> str: """Return an HTML rendering of *s* with hidden characters highlighted. Each invisible/control/smart character is wrapped in a ```` with @@ -688,13 +688,50 @@ def visualize_hidden_html(s: str) -> str: so the user gets a tooltip on hover. ASCII printable text is HTML- escaped but otherwise left as-is. + When *mark_outer_whitespace* is True, leading and trailing runs of + plain ASCII space and tab are also wrapped in highlight spans. This + is essential for analyzer/audit views where browsers would otherwise + silently collapse the leading/trailing space and the user would never + see the padding the cleaner is going to strip. + Pair with :func:`hidden_char_css` to inject the matching styles into the page. """ if not isinstance(s, str): return "" - parts: list[str] = [] - for ch in s: + + leading = "" + trailing = "" + body = s + if mark_outer_whitespace and s: + i = 0 + while i < len(body) and body[i] in (" ", "\t"): + i += 1 + leading = body[:i] + body = body[i:] + j = len(body) + while j > 0 and body[j - 1] in (" ", "\t"): + j -= 1 + trailing = body[j:] + body = body[:j] + + def _render_outer(run: str, label: str) -> str: + if not run: + return "" + # Each character rendered as a discrete badge so the user sees the + # exact count of leading/trailing chars, not a single fused block. + out: list[str] = [] + for ch in run: + glyph = "→" if ch == "\t" else "·" + char_label = "TAB" if ch == "\t" else f"SP {label}" + out.append( + f'{glyph}' + ) + return "".join(out) + + parts: list[str] = [_render_outer(leading, "LEAD")] + for ch in body: mapped = _VISIBLE_CHAR_MAP.get(ch) if mapped is not None: glyph, label = mapped @@ -721,6 +758,7 @@ def visualize_hidden_html(s: str) -> str: parts.append(">") else: parts.append(ch) + parts.append(_render_outer(trailing, "TRAIL")) return "".join(parts) diff --git a/src/gui/components.py b/src/gui/components.py index 0ca4739..25641d1 100644 --- a/src/gui/components.py +++ b/src/gui/components.py @@ -750,11 +750,16 @@ def render_findings_panel(findings, *, header: str = "Detected issues") -> None: the user can decide which tool to open first. """ from src.core.analyze import findings_by_tool # local import to avoid cycle + from src.core.text_clean import hidden_char_css if not findings: st.success("No issues detected. Open any tool below to start working.") return + # Inject the hidden-char badge styles once so every sample value below + # can render leading/trailing whitespace and invisibles as visible badges. + st.markdown(hidden_char_css() + _SAMPLE_TABLE_CSS, unsafe_allow_html=True) + by_sev: dict[str, int] = {} for f in findings: by_sev[f.severity] = by_sev.get(f.severity, 0) + 1 @@ -792,7 +797,35 @@ def render_findings_panel(findings, *, header: str = "Detected issues") -> None: _render_one_finding(f) +_SAMPLE_TABLE_CSS = """ + +""" + + def _render_one_finding(f) -> None: + from src.core.text_clean import visualize_hidden_html + color = _SEVERITY_COLOR[f.severity] icon = _SEVERITY_ICON[f.severity] column_part = f" in `{f.column}`" if getattr(f, "column", None) else "" @@ -800,10 +833,34 @@ def _render_one_finding(f) -> None: f"{icon} :{color}[**{f.id}**]{column_part} — {f.description}" ) if f.samples: - sample_df = pd.DataFrame( - f.samples, columns=["row", "column", "value"], + # Render samples as an HTML table so leading/trailing whitespace + # and invisible characters in the value column show up as badges. + # A plain st.dataframe collapses outer whitespace and renders + # NBSP/ZWSP as nothing, defeating the point of the audit. + rows_html = [] + for row, col, value in f.samples: + rendered_value = visualize_hidden_html( + str(value), mark_outer_whitespace=True, + ) + rendered_col = visualize_hidden_html( + str(col), mark_outer_whitespace=True, + ) + rows_html.append( + "" + f"{int(row) + 1 if isinstance(row, int) else row}" + f"{rendered_col}" + f"{rendered_value}" + "" + ) + st.markdown( + "" + "" + "" + "" + f"{''.join(rows_html)}" + "
RowColumnValue
", + unsafe_allow_html=True, ) - st.dataframe(sample_df, use_container_width=True, hide_index=True) def upload_and_analyze_section() -> None: diff --git a/tests/test_text_clean.py b/tests/test_text_clean.py index 4c88cba..6d30fa1 100644 --- a/tests/test_text_clean.py +++ b/tests/test_text_clean.py @@ -539,3 +539,28 @@ class TestVisualizeHidden: from src.core.text_clean import visualize_hidden_text, visualize_hidden_html assert visualize_hidden_text(None) is None # type: ignore[arg-type] assert visualize_hidden_html(None) == "" + def test_html_marks_leading_trailing_ascii_space(self): + from src.core.text_clean import visualize_hidden_html + out = visualize_hidden_html(" Alice ", mark_outer_whitespace=True) + # Two leading and two trailing space badges + assert out.count("SP LEAD") == 2 + assert out.count("SP TRAIL") == 2 + # Inner "Alice" untouched + assert "Alice" in out + + def test_html_default_does_not_mark_outer_ascii_space(self): + from src.core.text_clean import visualize_hidden_html + out = visualize_hidden_html(" Alice ") + assert "SP LEAD" not in out and "SP TRAIL" not in out + + def test_html_marks_leading_tab(self): + from src.core.text_clean import visualize_hidden_html + out = visualize_hidden_html("\tAlice", mark_outer_whitespace=True) + assert "TAB" in out # tab gets a badge + + def test_html_only_whitespace_string_marked_as_leading(self): + from src.core.text_clean import visualize_hidden_html + out = visualize_hidden_html(" ", mark_outer_whitespace=True) + # All three chars treated as leading; trailing run is empty. + assert out.count("SP LEAD") == 3 +