Files
datatools-dev/tests/gui/conftest.py
Michael 35d46a0c1a test(gui): add Streamlit AppTest layer (139 tests)
Until now every test ran against core or the CLI; the Streamlit GUI
was verified by hand. This commit adds tests/gui/ — 139 AppTest-
driven tests behind a 'gui' marker so the quick loop
(``pytest -m 'not gui'``) stays at 1777 tests / ~10s while
``pytest`` runs everything (1916 / ~14s).

Coverage:
- test_smoke.py (59): every page renders in EN and ES, expected
  substring present, sidebar selector mounted.
- test_chrome.py (18): language selector flips session state and
  re-renders; quit button + farewell strings localize; tool-card
  names use the active language.
- test_gate.py (9): require_normalization_gate no-op / warning /
  short-circuit / hash-mismatch invariants; warning + button
  localized.
- test_workflows.py (14): happy path per Ready tool — stash
  upload, render, find primary action, verify result lands in
  session state.
- test_dedup_review.py (8): Accept All / Reject All / Clear
  Decisions wire through to review_decisions; apply_review_decisions
  semantics (keep-all, merge, column override).
- test_advanced_panels.py (15): config_panel widget defaults and
  options (algorithm, threshold, survivor rule, merge, multiselects,
  config save/load).
- test_errors.py (4): garbage / empty / single-column uploads don't
  crash; duplicate-target mapping raises InputValidationError.
- test_findings_panel.py (12): driven via a small standalone harness
  page so we test the component without faking a file_uploader. EN
  + ES strings, per-tool grouping, open-tool button label, untargeted
  expander, severity summary.

Shared infrastructure in tests/gui/conftest.py:
- ``stash_upload`` / ``stash_upload_without_gate`` — populate
  session_state to pre-pass or block the gate.
- ``with_language`` — set ``ui_lang`` before run().
- ``collected_text`` — flatten title/caption/markdown/etc. into
  one string for substring assertions.
- Auto-marking: every test in tests/gui/ gets ``@pytest.mark.gui``
  via ``pytest_collection_modifyitems``, so the marker isn't
  per-test boilerplate.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-13 16:13:40 +00:00

205 lines
7.2 KiB
Python

"""Shared fixtures for the GUI test layer.
Streamlit's ``AppTest.from_file`` runs a page module in-process inside
its own ScriptRunContext. Each fixture here returns either bytes (for
the upload-session-state path) or a configured ``AppTest`` ready to
``.run()``. Tests should NOT call ``AppTest.from_file`` directly so the
project-root path and the default session-state stash live in one place.
Page paths are resolved relative to the repo root so the test suite
works from any cwd (e.g., ``pytest`` from a subdir, IDE runners).
"""
from __future__ import annotations
import hashlib
from pathlib import Path
from typing import Iterable
import pandas as pd
import pytest
from streamlit.testing.v1 import AppTest
# ---------------------------------------------------------------------------
# Paths
# ---------------------------------------------------------------------------
PROJECT_ROOT: Path = Path(__file__).resolve().parent.parent.parent
PAGES_DIR: Path = PROJECT_ROOT / "src" / "gui" / "pages"
APP_PATH: Path = PROJECT_ROOT / "src" / "gui" / "app.py"
SAMPLES_DIR: Path = PROJECT_ROOT / "samples"
# All GUI tests get the marker automatically so a single
# ``pytest -m 'not gui'`` skips the whole subdir.
def pytest_collection_modifyitems(config, items):
for item in items:
# Only mark items collected from this subdir.
if "tests/gui/" in str(item.fspath).replace("\\", "/"):
item.add_marker(pytest.mark.gui)
# ---------------------------------------------------------------------------
# Sample data
# ---------------------------------------------------------------------------
@pytest.fixture
def messy_sales_bytes() -> bytes:
"""Raw bytes of the ``messy_sales.csv`` sample (mixed dup + text noise)."""
return (SAMPLES_DIR / "messy_sales.csv").read_bytes()
@pytest.fixture
def small_csv_bytes() -> bytes:
"""A tiny CSV the tool pages can chew through fast — three rows, three
columns, one obvious duplicate. Suitable for happy-path workflow tests
where wall-clock matters."""
return (
b"name,email,phone\n"
b"Alice,alice@gmail.com,5551234567\n"
b"Alice,Alice@Gmail.com,(555) 123-4567\n"
b"Bob,bob@example.com,5559876543\n"
)
@pytest.fixture
def small_csv_df(small_csv_bytes) -> pd.DataFrame:
"""The ``small_csv_bytes`` parsed — for tests that need the DataFrame
form (e.g., direct ``deduplicate()`` calls within a test)."""
import io
return pd.read_csv(io.BytesIO(small_csv_bytes), dtype=str, keep_default_na=False)
# ---------------------------------------------------------------------------
# AppTest builders
# ---------------------------------------------------------------------------
def _new_app(page_path: Path) -> AppTest:
"""Build an AppTest pointed at *page_path*. Keeps the project-root /
page-resolution boilerplate out of the test bodies."""
if not page_path.exists():
raise FileNotFoundError(f"GUI test target missing: {page_path}")
return AppTest.from_file(str(page_path))
@pytest.fixture
def home_app() -> AppTest:
"""A fresh AppTest pointed at the home page (``src/gui/app.py``)."""
return _new_app(APP_PATH)
@pytest.fixture
def app_factory():
"""Callable returning a fresh AppTest for any page slug.
Usage::
app = app_factory("1_Deduplicator")
app.run()
"""
def _make(slug: str) -> AppTest:
return _new_app(PAGES_DIR / f"{slug}.py")
return _make
# ---------------------------------------------------------------------------
# Upload-session helpers
# ---------------------------------------------------------------------------
def stash_upload(app: AppTest, *, name: str, data: bytes) -> str:
"""Pre-populate the home-screen upload stash + the gate's normalisation
result so a tool page renders past ``require_normalization_gate()``.
Returns the SHA-256 hex of *data* (used as the gate key) in case the
test wants to assert against it.
The gate checks::
- ``home_uploaded_bytes`` is set
- ``normalization_for == sha256(home_uploaded_bytes)``
- ``normalization_result.passed is True``
We synthesise a passing result via a tiny stub object that satisfies
the gate's only attribute access (``.passed``). Tests that want to
exercise gate-blocking behaviour should NOT call this helper — they
should stash bytes without the normalisation result.
"""
sha = hashlib.sha256(data).hexdigest()
app.session_state["home_uploaded_bytes"] = data
app.session_state["home_uploaded_name"] = name
app.session_state["home_uploaded_size"] = len(data)
app.session_state["normalization_for"] = sha
app.session_state["normalization_result"] = _PassedGateResult()
return sha
class _PassedGateResult:
"""Minimal stand-in for the real NormalizationResult shape — the gate
only reads ``.passed``. Using a real NormalizationResult here would
pull in core.normalize and tie GUI tests to its constructor surface.
"""
passed: bool = True
def stash_upload_without_gate(app: AppTest, *, name: str, data: bytes) -> None:
"""Stash the upload bytes but do NOT pre-pass the gate. Used by gate
tests that want the warning + Go-to-Review button to appear."""
app.session_state["home_uploaded_bytes"] = data
app.session_state["home_uploaded_name"] = name
app.session_state["home_uploaded_size"] = len(data)
# ---------------------------------------------------------------------------
# i18n helpers
# ---------------------------------------------------------------------------
def with_language(app: AppTest, lang: str) -> None:
"""Set the active GUI language *before* ``app.run()``. The selector
widget reads ``_ui_lang_select`` on first render; we set the canonical
``ui_lang`` key (what ``current_language()`` reads) directly so the
first render is already in the chosen language."""
app.session_state["ui_lang"] = lang
def collected_text(app: AppTest) -> str:
"""Flatten every text-bearing element on the page into one big string
so a test can assert ``"Maestría" in collected_text(app)`` without
juggling individual widget collections.
Covers: title, header, subheader, caption, markdown, info, warning,
error, success, code, metric labels/values, button labels, expander
labels, page_link labels. Doesn't try to be exhaustive — if a widget
type isn't here, add it (cheap, mechanical).
"""
chunks: list[str] = []
def _extend(seq: Iterable, attrs: tuple[str, ...] = ("value", "label", "body")) -> None:
for el in seq:
for a in attrs:
v = getattr(el, a, None)
if isinstance(v, str) and v:
chunks.append(v)
break # one value per element is enough
_extend(app.title)
_extend(app.header)
_extend(app.subheader)
_extend(app.caption)
_extend(app.markdown)
_extend(app.info)
_extend(app.warning)
_extend(app.error)
_extend(app.success)
_extend(app.button)
# Sidebar caption / markdown / button (Streamlit exposes the sidebar
# as a sub-tree with the same widget collections).
if hasattr(app, "sidebar"):
sb = app.sidebar
_extend(sb.markdown)
_extend(sb.caption)
_extend(sb.button)
return "\n".join(chunks)