feat(gui): visualize leading/trailing whitespace in analyzer findings

The analyzer's "Run Analysis" panel rendered sample cells via st.dataframe,
which (a) silently collapses leading/trailing ASCII whitespace and (b)
displays NBSP/ZWSP/control chars as nothing. The user couldn't see the
exact pollution they were being told about.

visualize_hidden_html gains a mark_outer_whitespace=True option that
wraps each leading and trailing ASCII space/tab in its own badge with a
"SP LEAD" / "SP TRAIL" tooltip. The badges are per-character so the
user can count exactly how much padding the cleaner will strip.

components.render_findings_panel now:
  - injects hidden_char_css() once at the top of the panel
  - replaces st.dataframe(samples) with a custom HTML table
  - renders the value column with mark_outer_whitespace=True
  - applies white-space: pre-wrap on value cells so any internal ASCII
    whitespace also stays visible (browsers collapse runs by default)

Four new tests cover: leading+trailing badge counts, default-off
behaviour, leading tab badge, all-whitespace string treated entirely
as leading.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-29 16:21:39 +00:00
parent e12615357d
commit 1049c033cb
3 changed files with 126 additions and 6 deletions

View File

@@ -680,7 +680,7 @@ def visualize_hidden_text(s: str) -> str:
return "".join(out)
def visualize_hidden_html(s: str) -> str:
def visualize_hidden_html(s: str, *, mark_outer_whitespace: bool = False) -> str:
"""Return an HTML rendering of *s* with hidden characters highlighted.
Each invisible/control/smart character is wrapped in a ``<span>`` with
@@ -688,13 +688,50 @@ def visualize_hidden_html(s: str) -> str:
so the user gets a tooltip on hover. ASCII printable text is HTML-
escaped but otherwise left as-is.
When *mark_outer_whitespace* is True, leading and trailing runs of
plain ASCII space and tab are also wrapped in highlight spans. This
is essential for analyzer/audit views where browsers would otherwise
silently collapse the leading/trailing space and the user would never
see the padding the cleaner is going to strip.
Pair with :func:`hidden_char_css` to inject the matching styles into
the page.
"""
if not isinstance(s, str):
return ""
parts: list[str] = []
for ch in s:
leading = ""
trailing = ""
body = s
if mark_outer_whitespace and s:
i = 0
while i < len(body) and body[i] in (" ", "\t"):
i += 1
leading = body[:i]
body = body[i:]
j = len(body)
while j > 0 and body[j - 1] in (" ", "\t"):
j -= 1
trailing = body[j:]
body = body[:j]
def _render_outer(run: str, label: str) -> str:
if not run:
return ""
# Each character rendered as a discrete badge so the user sees the
# exact count of leading/trailing chars, not a single fused block.
out: list[str] = []
for ch in run:
glyph = "" if ch == "\t" else "·"
char_label = "TAB" if ch == "\t" else f"SP {label}"
out.append(
f'<span class="hidden-char hidden-whitespace" '
f'title="U+{ord(ch):04X} {char_label}">{glyph}</span>'
)
return "".join(out)
parts: list[str] = [_render_outer(leading, "LEAD")]
for ch in body:
mapped = _VISIBLE_CHAR_MAP.get(ch)
if mapped is not None:
glyph, label = mapped
@@ -721,6 +758,7 @@ def visualize_hidden_html(s: str) -> str:
parts.append("&gt;")
else:
parts.append(ch)
parts.append(_render_outer(trailing, "TRAIL"))
return "".join(parts)

View File

@@ -750,11 +750,16 @@ def render_findings_panel(findings, *, header: str = "Detected issues") -> None:
the user can decide which tool to open first.
"""
from src.core.analyze import findings_by_tool # local import to avoid cycle
from src.core.text_clean import hidden_char_css
if not findings:
st.success("No issues detected. Open any tool below to start working.")
return
# Inject the hidden-char badge styles once so every sample value below
# can render leading/trailing whitespace and invisibles as visible badges.
st.markdown(hidden_char_css() + _SAMPLE_TABLE_CSS, unsafe_allow_html=True)
by_sev: dict[str, int] = {}
for f in findings:
by_sev[f.severity] = by_sev.get(f.severity, 0) + 1
@@ -792,7 +797,35 @@ def render_findings_panel(findings, *, header: str = "Detected issues") -> None:
_render_one_finding(f)
_SAMPLE_TABLE_CSS = """
<style>
.findings-sample-table {
width: 100%;
border-collapse: collapse;
font-size: 0.9em;
}
.findings-sample-table th,
.findings-sample-table td {
padding: 4px 8px;
border-bottom: 1px solid #eee;
text-align: left;
vertical-align: top;
}
.findings-sample-table td.value {
font-family: ui-monospace, SFMono-Regular, monospace;
/* pre-wrap so any ASCII whitespace inside the value is preserved
visually (browsers collapse adjacent spaces by default). */
white-space: pre-wrap;
word-break: break-word;
}
.findings-sample-table tbody tr:hover { background: #fafafa; }
</style>
"""
def _render_one_finding(f) -> None:
from src.core.text_clean import visualize_hidden_html
color = _SEVERITY_COLOR[f.severity]
icon = _SEVERITY_ICON[f.severity]
column_part = f" in `{f.column}`" if getattr(f, "column", None) else ""
@@ -800,10 +833,34 @@ def _render_one_finding(f) -> None:
f"{icon} :{color}[**{f.id}**]{column_part}{f.description}"
)
if f.samples:
sample_df = pd.DataFrame(
f.samples, columns=["row", "column", "value"],
# Render samples as an HTML table so leading/trailing whitespace
# and invisible characters in the value column show up as badges.
# A plain st.dataframe collapses outer whitespace and renders
# NBSP/ZWSP as nothing, defeating the point of the audit.
rows_html = []
for row, col, value in f.samples:
rendered_value = visualize_hidden_html(
str(value), mark_outer_whitespace=True,
)
rendered_col = visualize_hidden_html(
str(col), mark_outer_whitespace=True,
)
rows_html.append(
"<tr>"
f"<td>{int(row) + 1 if isinstance(row, int) else row}</td>"
f"<td><code>{rendered_col}</code></td>"
f"<td class='value'>{rendered_value}</td>"
"</tr>"
)
st.markdown(
"<table class='findings-sample-table'>"
"<thead><tr>"
"<th>Row</th><th>Column</th><th>Value</th>"
"</tr></thead>"
f"<tbody>{''.join(rows_html)}</tbody>"
"</table>",
unsafe_allow_html=True,
)
st.dataframe(sample_df, use_container_width=True, hide_index=True)
def upload_and_analyze_section() -> None: