test(gui): add Streamlit AppTest layer (139 tests)

Until now every test ran against core or the CLI; the Streamlit GUI
was verified by hand. This commit adds tests/gui/ — 139 AppTest-
driven tests behind a 'gui' marker so the quick loop
(``pytest -m 'not gui'``) stays at 1777 tests / ~10s while
``pytest`` runs everything (1916 / ~14s).

Coverage:
- test_smoke.py (59): every page renders in EN and ES, expected
  substring present, sidebar selector mounted.
- test_chrome.py (18): language selector flips session state and
  re-renders; quit button + farewell strings localize; tool-card
  names use the active language.
- test_gate.py (9): require_normalization_gate no-op / warning /
  short-circuit / hash-mismatch invariants; warning + button
  localized.
- test_workflows.py (14): happy path per Ready tool — stash
  upload, render, find primary action, verify result lands in
  session state.
- test_dedup_review.py (8): Accept All / Reject All / Clear
  Decisions wire through to review_decisions; apply_review_decisions
  semantics (keep-all, merge, column override).
- test_advanced_panels.py (15): config_panel widget defaults and
  options (algorithm, threshold, survivor rule, merge, multiselects,
  config save/load).
- test_errors.py (4): garbage / empty / single-column uploads don't
  crash; duplicate-target mapping raises InputValidationError.
- test_findings_panel.py (12): driven via a small standalone harness
  page so we test the component without faking a file_uploader. EN
  + ES strings, per-tool grouping, open-tool button label, untargeted
  expander, severity summary.

Shared infrastructure in tests/gui/conftest.py:
- ``stash_upload`` / ``stash_upload_without_gate`` — populate
  session_state to pre-pass or block the gate.
- ``with_language`` — set ``ui_lang`` before run().
- ``collected_text`` — flatten title/caption/markdown/etc. into
  one string for substring assertions.
- Auto-marking: every test in tests/gui/ gets ``@pytest.mark.gui``
  via ``pytest_collection_modifyitems``, so the marker isn't
  per-test boilerplate.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-13 16:13:40 +00:00
parent d0423a8912
commit 35d46a0c1a
12 changed files with 1676 additions and 0 deletions

View File

@@ -0,0 +1,227 @@
"""Findings panel rendering tests.
``render_findings_panel`` is the central widget on the home page and
the Review page; failures here cascade into the user's first
impression. We drive it via a tiny test harness page
(``_findings_panel_harness.py``) so the test can inject findings
directly into session state — no file_uploader simulation needed.
We verify:
- Empty findings list → localized "no issues" success message.
- Findings with tool ids → one expander per tool, labeled in the
active language.
- Header + severity summary render at the top.
- Untargeted findings land in the "Other / file-level" expander.
Pack-key parity is already pinned by ``test_lang_packs.py``; this
file pins the call sites instead.
"""
from __future__ import annotations
from pathlib import Path
import pandas as pd
import pytest
from streamlit.testing.v1 import AppTest
from .conftest import PROJECT_ROOT, collected_text, with_language
HARNESS_PATH = Path(__file__).resolve().parent / "_findings_panel_harness.py"
def _harness(findings, lang: str = "en") -> AppTest:
"""Build an AppTest of the harness page with ``findings`` pre-stashed."""
app = AppTest.from_file(str(HARNESS_PATH))
app.session_state["test_findings"] = findings
if lang != "en":
app.session_state["ui_lang"] = lang
return app
def _make_finding(tool: str = "", **overrides):
"""Build a minimal :class:`Finding` object. ``Finding`` is a frozen
dataclass; constructor signature is well-pinned by core tests, so
we use it directly here rather than building dicts."""
from src.core.analyze import Finding
kwargs = dict(
id="test_finding",
severity="warn",
tool=tool,
count=1,
description="A test finding.",
column=None,
samples=[],
confidence="medium",
fix_action="",
)
kwargs.update(overrides)
return Finding(**kwargs)
# ---------------------------------------------------------------------------
# Empty findings → success message
# ---------------------------------------------------------------------------
class TestEmptyFindings:
def test_empty_renders_no_issues_english(self):
app = _harness([])
app.run()
text = collected_text(app)
assert "No issues detected" in text
def test_empty_renders_no_issues_spanish(self):
app = _harness([], lang="es")
app.run()
text = collected_text(app)
assert "No se detectaron problemas" in text
# ---------------------------------------------------------------------------
# Header text
# ---------------------------------------------------------------------------
class TestHeader:
def test_header_english(self):
app = _harness([_make_finding(tool="02_text_cleaner")])
app.run()
text = collected_text(app)
assert "Detected issues" in text
def test_header_spanish(self):
app = _harness([_make_finding(tool="02_text_cleaner")], lang="es")
app.run()
text = collected_text(app)
assert "Problemas detectados" in text
# ---------------------------------------------------------------------------
# Per-tool grouping → one expander per tool id
# ---------------------------------------------------------------------------
class TestGrouping:
def test_findings_grouped_into_per_tool_expanders(self):
findings = [
_make_finding(tool="02_text_cleaner", id="whitespace_padding"),
_make_finding(tool="02_text_cleaner", id="nbsp_padding"),
_make_finding(tool="03_format_standardizer", id="mixed_case_email"),
]
app = _harness(findings)
app.run()
labels = [e.label for e in app.expander]
# Two unique tools → two expanders. Each label carries the
# tool's display name + finding count.
text_cleaner_expanders = [lbl for lbl in labels if "Text Cleaner" in lbl]
format_expanders = [lbl for lbl in labels if "Format Standardizer" in lbl]
assert len(text_cleaner_expanders) == 1, (
f"expected one Text Cleaner expander; got: {labels}"
)
assert len(format_expanders) == 1, (
f"expected one Format Standardizer expander; got: {labels}"
)
def test_tool_names_localize_in_spanish(self):
findings = [_make_finding(tool="02_text_cleaner")]
app = _harness(findings, lang="es")
app.run()
labels = [e.label for e in app.expander]
assert any("Limpiador de texto" in lbl for lbl in labels), (
f"Spanish tool name missing; expanders: {labels}"
)
def test_finding_count_in_expander_label(self):
findings = [
_make_finding(tool="02_text_cleaner", id=f"f{i}")
for i in range(3)
]
app = _harness(findings)
app.run()
labels = [e.label for e in app.expander]
# Pack template: "{tool} — {n} finding(s)"
text_cleaner_label = next(l for l in labels if "Text Cleaner" in l)
assert "3" in text_cleaner_label, (
f"expected count '3' in expander label; got {text_cleaner_label!r}"
)
# ---------------------------------------------------------------------------
# Open-tool button localizes
# ---------------------------------------------------------------------------
class TestOpenToolButton:
"""Each tool section has an ``st.page_link`` to jump to that tool's
page. AppTest exposes page_links as ``app.button`` entries with
label ``"Open {tool}"`` (English) / ``"Abrir {tool}"`` (Spanish)."""
def test_open_tool_label_english(self):
findings = [_make_finding(tool="02_text_cleaner")]
app = _harness(findings)
app.run()
# ``st.page_link`` may show up under ``app.button`` or in the
# raw markdown. We probe both.
text = collected_text(app)
# Pack template: "Open {tool} →"
assert "Open Text Cleaner" in text
def test_open_tool_label_spanish(self):
findings = [_make_finding(tool="02_text_cleaner")]
app = _harness(findings, lang="es")
app.run()
text = collected_text(app)
# Pack template: "Abrir {tool} →"
assert "Abrir Limpiador de texto" in text
# ---------------------------------------------------------------------------
# Untargeted findings (file-level) go in the "Other" expander
# ---------------------------------------------------------------------------
class TestUntargetedFindings:
def test_untargeted_goes_to_other_expander_en(self):
findings = [
_make_finding(tool="", id="csv_bom_stripped"),
_make_finding(tool="02_text_cleaner", id="nbsp_padding"),
]
app = _harness(findings)
app.run()
labels = [e.label for e in app.expander]
# Pack template: "Other / file-level — {n} finding(s)"
assert any("Other / file-level" in lbl for lbl in labels), (
f"untargeted expander missing; got: {labels}"
)
def test_untargeted_label_spanish(self):
findings = [_make_finding(tool="", id="csv_bom_stripped")]
app = _harness(findings, lang="es")
app.run()
labels = [e.label for e in app.expander]
# Spanish pack: "Otros / a nivel de archivo — {n} hallazgo(s)"
assert any("Otros / a nivel de archivo" in lbl for lbl in labels), (
f"Spanish 'Other' expander missing; got: {labels}"
)
# ---------------------------------------------------------------------------
# Severity summary
# ---------------------------------------------------------------------------
class TestSeveritySummary:
"""The panel renders a per-severity summary caption like
``⚠️ 2 warn · 1 info``. We pin the icon + count rendering."""
def test_severity_icons_render(self):
findings = [
_make_finding(tool="02_text_cleaner", severity="warn"),
_make_finding(tool="02_text_cleaner", severity="warn"),
_make_finding(tool="03_format_standardizer", severity="info"),
]
app = _harness(findings)
app.run()
text = collected_text(app)
# Icons live in the per-language pack ("findings.severity_*").
# The summary template is shared between languages.
assert "⚠️" in text or "warn" in text
# Counts present.
assert "2 warn" in text or "2 warn" in text