feat(gui): visualize leading/trailing whitespace in analyzer findings

The analyzer's "Run Analysis" panel rendered sample cells via st.dataframe,
which (a) silently collapses leading/trailing ASCII whitespace and (b)
displays NBSP/ZWSP/control chars as nothing. The user couldn't see the
exact pollution they were being told about.

visualize_hidden_html gains a mark_outer_whitespace=True option that
wraps each leading and trailing ASCII space/tab in its own badge with a
"SP LEAD" / "SP TRAIL" tooltip. The badges are per-character so the
user can count exactly how much padding the cleaner will strip.

components.render_findings_panel now:
  - injects hidden_char_css() once at the top of the panel
  - replaces st.dataframe(samples) with a custom HTML table
  - renders the value column with mark_outer_whitespace=True
  - applies white-space: pre-wrap on value cells so any internal ASCII
    whitespace also stays visible (browsers collapse runs by default)

Four new tests cover: leading+trailing badge counts, default-off
behaviour, leading tab badge, all-whitespace string treated entirely
as leading.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-29 16:21:39 +00:00
parent e12615357d
commit 1049c033cb
3 changed files with 126 additions and 6 deletions

View File

@@ -680,7 +680,7 @@ def visualize_hidden_text(s: str) -> str:
return "".join(out) return "".join(out)
def visualize_hidden_html(s: str) -> str: def visualize_hidden_html(s: str, *, mark_outer_whitespace: bool = False) -> str:
"""Return an HTML rendering of *s* with hidden characters highlighted. """Return an HTML rendering of *s* with hidden characters highlighted.
Each invisible/control/smart character is wrapped in a ``<span>`` with Each invisible/control/smart character is wrapped in a ``<span>`` with
@@ -688,13 +688,50 @@ def visualize_hidden_html(s: str) -> str:
so the user gets a tooltip on hover. ASCII printable text is HTML- so the user gets a tooltip on hover. ASCII printable text is HTML-
escaped but otherwise left as-is. escaped but otherwise left as-is.
When *mark_outer_whitespace* is True, leading and trailing runs of
plain ASCII space and tab are also wrapped in highlight spans. This
is essential for analyzer/audit views where browsers would otherwise
silently collapse the leading/trailing space and the user would never
see the padding the cleaner is going to strip.
Pair with :func:`hidden_char_css` to inject the matching styles into Pair with :func:`hidden_char_css` to inject the matching styles into
the page. the page.
""" """
if not isinstance(s, str): if not isinstance(s, str):
return "" return ""
parts: list[str] = []
for ch in s: leading = ""
trailing = ""
body = s
if mark_outer_whitespace and s:
i = 0
while i < len(body) and body[i] in (" ", "\t"):
i += 1
leading = body[:i]
body = body[i:]
j = len(body)
while j > 0 and body[j - 1] in (" ", "\t"):
j -= 1
trailing = body[j:]
body = body[:j]
def _render_outer(run: str, label: str) -> str:
if not run:
return ""
# Each character rendered as a discrete badge so the user sees the
# exact count of leading/trailing chars, not a single fused block.
out: list[str] = []
for ch in run:
glyph = "" if ch == "\t" else "·"
char_label = "TAB" if ch == "\t" else f"SP {label}"
out.append(
f'<span class="hidden-char hidden-whitespace" '
f'title="U+{ord(ch):04X} {char_label}">{glyph}</span>'
)
return "".join(out)
parts: list[str] = [_render_outer(leading, "LEAD")]
for ch in body:
mapped = _VISIBLE_CHAR_MAP.get(ch) mapped = _VISIBLE_CHAR_MAP.get(ch)
if mapped is not None: if mapped is not None:
glyph, label = mapped glyph, label = mapped
@@ -721,6 +758,7 @@ def visualize_hidden_html(s: str) -> str:
parts.append("&gt;") parts.append("&gt;")
else: else:
parts.append(ch) parts.append(ch)
parts.append(_render_outer(trailing, "TRAIL"))
return "".join(parts) return "".join(parts)

View File

@@ -750,11 +750,16 @@ def render_findings_panel(findings, *, header: str = "Detected issues") -> None:
the user can decide which tool to open first. the user can decide which tool to open first.
""" """
from src.core.analyze import findings_by_tool # local import to avoid cycle from src.core.analyze import findings_by_tool # local import to avoid cycle
from src.core.text_clean import hidden_char_css
if not findings: if not findings:
st.success("No issues detected. Open any tool below to start working.") st.success("No issues detected. Open any tool below to start working.")
return return
# Inject the hidden-char badge styles once so every sample value below
# can render leading/trailing whitespace and invisibles as visible badges.
st.markdown(hidden_char_css() + _SAMPLE_TABLE_CSS, unsafe_allow_html=True)
by_sev: dict[str, int] = {} by_sev: dict[str, int] = {}
for f in findings: for f in findings:
by_sev[f.severity] = by_sev.get(f.severity, 0) + 1 by_sev[f.severity] = by_sev.get(f.severity, 0) + 1
@@ -792,7 +797,35 @@ def render_findings_panel(findings, *, header: str = "Detected issues") -> None:
_render_one_finding(f) _render_one_finding(f)
_SAMPLE_TABLE_CSS = """
<style>
.findings-sample-table {
width: 100%;
border-collapse: collapse;
font-size: 0.9em;
}
.findings-sample-table th,
.findings-sample-table td {
padding: 4px 8px;
border-bottom: 1px solid #eee;
text-align: left;
vertical-align: top;
}
.findings-sample-table td.value {
font-family: ui-monospace, SFMono-Regular, monospace;
/* pre-wrap so any ASCII whitespace inside the value is preserved
visually (browsers collapse adjacent spaces by default). */
white-space: pre-wrap;
word-break: break-word;
}
.findings-sample-table tbody tr:hover { background: #fafafa; }
</style>
"""
def _render_one_finding(f) -> None: def _render_one_finding(f) -> None:
from src.core.text_clean import visualize_hidden_html
color = _SEVERITY_COLOR[f.severity] color = _SEVERITY_COLOR[f.severity]
icon = _SEVERITY_ICON[f.severity] icon = _SEVERITY_ICON[f.severity]
column_part = f" in `{f.column}`" if getattr(f, "column", None) else "" column_part = f" in `{f.column}`" if getattr(f, "column", None) else ""
@@ -800,10 +833,34 @@ def _render_one_finding(f) -> None:
f"{icon} :{color}[**{f.id}**]{column_part}{f.description}" f"{icon} :{color}[**{f.id}**]{column_part}{f.description}"
) )
if f.samples: if f.samples:
sample_df = pd.DataFrame( # Render samples as an HTML table so leading/trailing whitespace
f.samples, columns=["row", "column", "value"], # and invisible characters in the value column show up as badges.
# A plain st.dataframe collapses outer whitespace and renders
# NBSP/ZWSP as nothing, defeating the point of the audit.
rows_html = []
for row, col, value in f.samples:
rendered_value = visualize_hidden_html(
str(value), mark_outer_whitespace=True,
)
rendered_col = visualize_hidden_html(
str(col), mark_outer_whitespace=True,
)
rows_html.append(
"<tr>"
f"<td>{int(row) + 1 if isinstance(row, int) else row}</td>"
f"<td><code>{rendered_col}</code></td>"
f"<td class='value'>{rendered_value}</td>"
"</tr>"
)
st.markdown(
"<table class='findings-sample-table'>"
"<thead><tr>"
"<th>Row</th><th>Column</th><th>Value</th>"
"</tr></thead>"
f"<tbody>{''.join(rows_html)}</tbody>"
"</table>",
unsafe_allow_html=True,
) )
st.dataframe(sample_df, use_container_width=True, hide_index=True)
def upload_and_analyze_section() -> None: def upload_and_analyze_section() -> None:

View File

@@ -539,3 +539,28 @@ class TestVisualizeHidden:
from src.core.text_clean import visualize_hidden_text, visualize_hidden_html from src.core.text_clean import visualize_hidden_text, visualize_hidden_html
assert visualize_hidden_text(None) is None # type: ignore[arg-type] assert visualize_hidden_text(None) is None # type: ignore[arg-type]
assert visualize_hidden_html(None) == "" assert visualize_hidden_html(None) == ""
def test_html_marks_leading_trailing_ascii_space(self):
from src.core.text_clean import visualize_hidden_html
out = visualize_hidden_html(" Alice ", mark_outer_whitespace=True)
# Two leading and two trailing space badges
assert out.count("SP LEAD") == 2
assert out.count("SP TRAIL") == 2
# Inner "Alice" untouched
assert "Alice" in out
def test_html_default_does_not_mark_outer_ascii_space(self):
from src.core.text_clean import visualize_hidden_html
out = visualize_hidden_html(" Alice ")
assert "SP LEAD" not in out and "SP TRAIL" not in out
def test_html_marks_leading_tab(self):
from src.core.text_clean import visualize_hidden_html
out = visualize_hidden_html("\tAlice", mark_outer_whitespace=True)
assert "TAB" in out # tab gets a badge
def test_html_only_whitespace_string_marked_as_leading(self):
from src.core.text_clean import visualize_hidden_html
out = visualize_hidden_html(" ", mark_outer_whitespace=True)
# All three chars treated as leading; trailing run is empty.
assert out.count("SP LEAD") == 3