feat(gui): visualize leading/trailing whitespace in analyzer findings
The analyzer's "Run Analysis" panel rendered sample cells via st.dataframe,
which (a) silently collapses leading/trailing ASCII whitespace and (b)
displays NBSP/ZWSP/control chars as nothing. The user couldn't see the
exact pollution they were being told about.
visualize_hidden_html gains a mark_outer_whitespace=True option that
wraps each leading and trailing ASCII space/tab in its own badge with a
"SP LEAD" / "SP TRAIL" tooltip. The badges are per-character so the
user can count exactly how much padding the cleaner will strip.
components.render_findings_panel now:
- injects hidden_char_css() once at the top of the panel
- replaces st.dataframe(samples) with a custom HTML table
- renders the value column with mark_outer_whitespace=True
- applies white-space: pre-wrap on value cells so any internal ASCII
whitespace also stays visible (browsers collapse runs by default)
Four new tests cover: leading+trailing badge counts, default-off
behaviour, leading tab badge, all-whitespace string treated entirely
as leading.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -680,7 +680,7 @@ def visualize_hidden_text(s: str) -> str:
|
|||||||
return "".join(out)
|
return "".join(out)
|
||||||
|
|
||||||
|
|
||||||
def visualize_hidden_html(s: str) -> str:
|
def visualize_hidden_html(s: str, *, mark_outer_whitespace: bool = False) -> str:
|
||||||
"""Return an HTML rendering of *s* with hidden characters highlighted.
|
"""Return an HTML rendering of *s* with hidden characters highlighted.
|
||||||
|
|
||||||
Each invisible/control/smart character is wrapped in a ``<span>`` with
|
Each invisible/control/smart character is wrapped in a ``<span>`` with
|
||||||
@@ -688,13 +688,50 @@ def visualize_hidden_html(s: str) -> str:
|
|||||||
so the user gets a tooltip on hover. ASCII printable text is HTML-
|
so the user gets a tooltip on hover. ASCII printable text is HTML-
|
||||||
escaped but otherwise left as-is.
|
escaped but otherwise left as-is.
|
||||||
|
|
||||||
|
When *mark_outer_whitespace* is True, leading and trailing runs of
|
||||||
|
plain ASCII space and tab are also wrapped in highlight spans. This
|
||||||
|
is essential for analyzer/audit views where browsers would otherwise
|
||||||
|
silently collapse the leading/trailing space and the user would never
|
||||||
|
see the padding the cleaner is going to strip.
|
||||||
|
|
||||||
Pair with :func:`hidden_char_css` to inject the matching styles into
|
Pair with :func:`hidden_char_css` to inject the matching styles into
|
||||||
the page.
|
the page.
|
||||||
"""
|
"""
|
||||||
if not isinstance(s, str):
|
if not isinstance(s, str):
|
||||||
return ""
|
return ""
|
||||||
parts: list[str] = []
|
|
||||||
for ch in s:
|
leading = ""
|
||||||
|
trailing = ""
|
||||||
|
body = s
|
||||||
|
if mark_outer_whitespace and s:
|
||||||
|
i = 0
|
||||||
|
while i < len(body) and body[i] in (" ", "\t"):
|
||||||
|
i += 1
|
||||||
|
leading = body[:i]
|
||||||
|
body = body[i:]
|
||||||
|
j = len(body)
|
||||||
|
while j > 0 and body[j - 1] in (" ", "\t"):
|
||||||
|
j -= 1
|
||||||
|
trailing = body[j:]
|
||||||
|
body = body[:j]
|
||||||
|
|
||||||
|
def _render_outer(run: str, label: str) -> str:
|
||||||
|
if not run:
|
||||||
|
return ""
|
||||||
|
# Each character rendered as a discrete badge so the user sees the
|
||||||
|
# exact count of leading/trailing chars, not a single fused block.
|
||||||
|
out: list[str] = []
|
||||||
|
for ch in run:
|
||||||
|
glyph = "→" if ch == "\t" else "·"
|
||||||
|
char_label = "TAB" if ch == "\t" else f"SP {label}"
|
||||||
|
out.append(
|
||||||
|
f'<span class="hidden-char hidden-whitespace" '
|
||||||
|
f'title="U+{ord(ch):04X} {char_label}">{glyph}</span>'
|
||||||
|
)
|
||||||
|
return "".join(out)
|
||||||
|
|
||||||
|
parts: list[str] = [_render_outer(leading, "LEAD")]
|
||||||
|
for ch in body:
|
||||||
mapped = _VISIBLE_CHAR_MAP.get(ch)
|
mapped = _VISIBLE_CHAR_MAP.get(ch)
|
||||||
if mapped is not None:
|
if mapped is not None:
|
||||||
glyph, label = mapped
|
glyph, label = mapped
|
||||||
@@ -721,6 +758,7 @@ def visualize_hidden_html(s: str) -> str:
|
|||||||
parts.append(">")
|
parts.append(">")
|
||||||
else:
|
else:
|
||||||
parts.append(ch)
|
parts.append(ch)
|
||||||
|
parts.append(_render_outer(trailing, "TRAIL"))
|
||||||
return "".join(parts)
|
return "".join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -750,11 +750,16 @@ def render_findings_panel(findings, *, header: str = "Detected issues") -> None:
|
|||||||
the user can decide which tool to open first.
|
the user can decide which tool to open first.
|
||||||
"""
|
"""
|
||||||
from src.core.analyze import findings_by_tool # local import to avoid cycle
|
from src.core.analyze import findings_by_tool # local import to avoid cycle
|
||||||
|
from src.core.text_clean import hidden_char_css
|
||||||
|
|
||||||
if not findings:
|
if not findings:
|
||||||
st.success("No issues detected. Open any tool below to start working.")
|
st.success("No issues detected. Open any tool below to start working.")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Inject the hidden-char badge styles once so every sample value below
|
||||||
|
# can render leading/trailing whitespace and invisibles as visible badges.
|
||||||
|
st.markdown(hidden_char_css() + _SAMPLE_TABLE_CSS, unsafe_allow_html=True)
|
||||||
|
|
||||||
by_sev: dict[str, int] = {}
|
by_sev: dict[str, int] = {}
|
||||||
for f in findings:
|
for f in findings:
|
||||||
by_sev[f.severity] = by_sev.get(f.severity, 0) + 1
|
by_sev[f.severity] = by_sev.get(f.severity, 0) + 1
|
||||||
@@ -792,7 +797,35 @@ def render_findings_panel(findings, *, header: str = "Detected issues") -> None:
|
|||||||
_render_one_finding(f)
|
_render_one_finding(f)
|
||||||
|
|
||||||
|
|
||||||
|
_SAMPLE_TABLE_CSS = """
|
||||||
|
<style>
|
||||||
|
.findings-sample-table {
|
||||||
|
width: 100%;
|
||||||
|
border-collapse: collapse;
|
||||||
|
font-size: 0.9em;
|
||||||
|
}
|
||||||
|
.findings-sample-table th,
|
||||||
|
.findings-sample-table td {
|
||||||
|
padding: 4px 8px;
|
||||||
|
border-bottom: 1px solid #eee;
|
||||||
|
text-align: left;
|
||||||
|
vertical-align: top;
|
||||||
|
}
|
||||||
|
.findings-sample-table td.value {
|
||||||
|
font-family: ui-monospace, SFMono-Regular, monospace;
|
||||||
|
/* pre-wrap so any ASCII whitespace inside the value is preserved
|
||||||
|
visually (browsers collapse adjacent spaces by default). */
|
||||||
|
white-space: pre-wrap;
|
||||||
|
word-break: break-word;
|
||||||
|
}
|
||||||
|
.findings-sample-table tbody tr:hover { background: #fafafa; }
|
||||||
|
</style>
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
def _render_one_finding(f) -> None:
|
def _render_one_finding(f) -> None:
|
||||||
|
from src.core.text_clean import visualize_hidden_html
|
||||||
|
|
||||||
color = _SEVERITY_COLOR[f.severity]
|
color = _SEVERITY_COLOR[f.severity]
|
||||||
icon = _SEVERITY_ICON[f.severity]
|
icon = _SEVERITY_ICON[f.severity]
|
||||||
column_part = f" in `{f.column}`" if getattr(f, "column", None) else ""
|
column_part = f" in `{f.column}`" if getattr(f, "column", None) else ""
|
||||||
@@ -800,10 +833,34 @@ def _render_one_finding(f) -> None:
|
|||||||
f"{icon} :{color}[**{f.id}**]{column_part} — {f.description}"
|
f"{icon} :{color}[**{f.id}**]{column_part} — {f.description}"
|
||||||
)
|
)
|
||||||
if f.samples:
|
if f.samples:
|
||||||
sample_df = pd.DataFrame(
|
# Render samples as an HTML table so leading/trailing whitespace
|
||||||
f.samples, columns=["row", "column", "value"],
|
# and invisible characters in the value column show up as badges.
|
||||||
|
# A plain st.dataframe collapses outer whitespace and renders
|
||||||
|
# NBSP/ZWSP as nothing, defeating the point of the audit.
|
||||||
|
rows_html = []
|
||||||
|
for row, col, value in f.samples:
|
||||||
|
rendered_value = visualize_hidden_html(
|
||||||
|
str(value), mark_outer_whitespace=True,
|
||||||
|
)
|
||||||
|
rendered_col = visualize_hidden_html(
|
||||||
|
str(col), mark_outer_whitespace=True,
|
||||||
|
)
|
||||||
|
rows_html.append(
|
||||||
|
"<tr>"
|
||||||
|
f"<td>{int(row) + 1 if isinstance(row, int) else row}</td>"
|
||||||
|
f"<td><code>{rendered_col}</code></td>"
|
||||||
|
f"<td class='value'>{rendered_value}</td>"
|
||||||
|
"</tr>"
|
||||||
|
)
|
||||||
|
st.markdown(
|
||||||
|
"<table class='findings-sample-table'>"
|
||||||
|
"<thead><tr>"
|
||||||
|
"<th>Row</th><th>Column</th><th>Value</th>"
|
||||||
|
"</tr></thead>"
|
||||||
|
f"<tbody>{''.join(rows_html)}</tbody>"
|
||||||
|
"</table>",
|
||||||
|
unsafe_allow_html=True,
|
||||||
)
|
)
|
||||||
st.dataframe(sample_df, use_container_width=True, hide_index=True)
|
|
||||||
|
|
||||||
|
|
||||||
def upload_and_analyze_section() -> None:
|
def upload_and_analyze_section() -> None:
|
||||||
|
|||||||
@@ -539,3 +539,28 @@ class TestVisualizeHidden:
|
|||||||
from src.core.text_clean import visualize_hidden_text, visualize_hidden_html
|
from src.core.text_clean import visualize_hidden_text, visualize_hidden_html
|
||||||
assert visualize_hidden_text(None) is None # type: ignore[arg-type]
|
assert visualize_hidden_text(None) is None # type: ignore[arg-type]
|
||||||
assert visualize_hidden_html(None) == ""
|
assert visualize_hidden_html(None) == ""
|
||||||
|
def test_html_marks_leading_trailing_ascii_space(self):
|
||||||
|
from src.core.text_clean import visualize_hidden_html
|
||||||
|
out = visualize_hidden_html(" Alice ", mark_outer_whitespace=True)
|
||||||
|
# Two leading and two trailing space badges
|
||||||
|
assert out.count("SP LEAD") == 2
|
||||||
|
assert out.count("SP TRAIL") == 2
|
||||||
|
# Inner "Alice" untouched
|
||||||
|
assert "Alice" in out
|
||||||
|
|
||||||
|
def test_html_default_does_not_mark_outer_ascii_space(self):
|
||||||
|
from src.core.text_clean import visualize_hidden_html
|
||||||
|
out = visualize_hidden_html(" Alice ")
|
||||||
|
assert "SP LEAD" not in out and "SP TRAIL" not in out
|
||||||
|
|
||||||
|
def test_html_marks_leading_tab(self):
|
||||||
|
from src.core.text_clean import visualize_hidden_html
|
||||||
|
out = visualize_hidden_html("\tAlice", mark_outer_whitespace=True)
|
||||||
|
assert "TAB" in out # tab gets a badge
|
||||||
|
|
||||||
|
def test_html_only_whitespace_string_marked_as_leading(self):
|
||||||
|
from src.core.text_clean import visualize_hidden_html
|
||||||
|
out = visualize_hidden_html(" ", mark_outer_whitespace=True)
|
||||||
|
# All three chars treated as leading; trailing run is empty.
|
||||||
|
assert out.count("SP LEAD") == 3
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user