"""Shared fixtures for the GUI test layer. Streamlit's ``AppTest.from_file`` runs a page module in-process inside its own ScriptRunContext. Each fixture here returns either bytes (for the upload-session-state path) or a configured ``AppTest`` ready to ``.run()``. Tests should NOT call ``AppTest.from_file`` directly so the project-root path and the default session-state stash live in one place. Page paths are resolved relative to the repo root so the test suite works from any cwd (e.g., ``pytest`` from a subdir, IDE runners). """ from __future__ import annotations import hashlib from pathlib import Path from typing import Iterable import pandas as pd import pytest from streamlit.testing.v1 import AppTest # --------------------------------------------------------------------------- # Paths # --------------------------------------------------------------------------- PROJECT_ROOT: Path = Path(__file__).resolve().parent.parent.parent PAGES_DIR: Path = PROJECT_ROOT / "src" / "gui" / "pages" APP_PATH: Path = PROJECT_ROOT / "src" / "gui" / "app.py" SAMPLES_DIR: Path = PROJECT_ROOT / "samples" # All GUI tests get the marker automatically so a single # ``pytest -m 'not gui'`` skips the whole subdir. def pytest_collection_modifyitems(config, items): for item in items: # Only mark items collected from this subdir. if "tests/gui/" in str(item.fspath).replace("\\", "/"): item.add_marker(pytest.mark.gui) # --------------------------------------------------------------------------- # Sample data # --------------------------------------------------------------------------- @pytest.fixture def messy_sales_bytes() -> bytes: """Raw bytes of the ``messy_sales.csv`` sample (mixed dup + text noise).""" return (SAMPLES_DIR / "messy_sales.csv").read_bytes() @pytest.fixture def small_csv_bytes() -> bytes: """A tiny CSV the tool pages can chew through fast — three rows, three columns, one obvious duplicate. Suitable for happy-path workflow tests where wall-clock matters.""" return ( b"name,email,phone\n" b"Alice,alice@gmail.com,5551234567\n" b"Alice,Alice@Gmail.com,(555) 123-4567\n" b"Bob,bob@example.com,5559876543\n" ) @pytest.fixture def small_csv_df(small_csv_bytes) -> pd.DataFrame: """The ``small_csv_bytes`` parsed — for tests that need the DataFrame form (e.g., direct ``deduplicate()`` calls within a test).""" import io return pd.read_csv(io.BytesIO(small_csv_bytes), dtype=str, keep_default_na=False) # --------------------------------------------------------------------------- # AppTest builders # --------------------------------------------------------------------------- def _new_app(page_path: Path) -> AppTest: """Build an AppTest pointed at *page_path*. Keeps the project-root / page-resolution boilerplate out of the test bodies.""" if not page_path.exists(): raise FileNotFoundError(f"GUI test target missing: {page_path}") return AppTest.from_file(str(page_path)) @pytest.fixture def home_app() -> AppTest: """A fresh AppTest pointed at the home page (``src/gui/app.py``).""" return _new_app(APP_PATH) @pytest.fixture def app_factory(): """Callable returning a fresh AppTest for any page slug. Usage:: app = app_factory("1_Deduplicator") app.run() """ def _make(slug: str) -> AppTest: return _new_app(PAGES_DIR / f"{slug}.py") return _make # --------------------------------------------------------------------------- # Upload-session helpers # --------------------------------------------------------------------------- def stash_upload(app: AppTest, *, name: str, data: bytes) -> str: """Pre-populate the home-screen upload stash + the gate's normalisation result so a tool page renders past ``require_normalization_gate()``. Returns the SHA-256 hex of *data* (used as the gate key) in case the test wants to assert against it. The gate checks:: - ``home_uploaded_bytes`` is set - ``normalization_for == sha256(home_uploaded_bytes)`` - ``normalization_result.passed is True`` We synthesise a passing result via a tiny stub object that satisfies the gate's only attribute access (``.passed``). Tests that want to exercise gate-blocking behaviour should NOT call this helper — they should stash bytes without the normalisation result. """ sha = hashlib.sha256(data).hexdigest() app.session_state["home_uploaded_bytes"] = data app.session_state["home_uploaded_name"] = name app.session_state["home_uploaded_size"] = len(data) app.session_state["normalization_for"] = sha app.session_state["normalization_result"] = _PassedGateResult() return sha class _PassedGateResult: """Minimal stand-in for the real NormalizationResult shape — the gate only reads ``.passed``. Using a real NormalizationResult here would pull in core.normalize and tie GUI tests to its constructor surface. """ passed: bool = True def stash_upload_without_gate(app: AppTest, *, name: str, data: bytes) -> None: """Stash the upload bytes but do NOT pre-pass the gate. Used by gate tests that want the warning + Go-to-Review button to appear.""" app.session_state["home_uploaded_bytes"] = data app.session_state["home_uploaded_name"] = name app.session_state["home_uploaded_size"] = len(data) # --------------------------------------------------------------------------- # i18n helpers # --------------------------------------------------------------------------- def with_language(app: AppTest, lang: str) -> None: """Set the active GUI language *before* ``app.run()``. The selector widget reads ``_ui_lang_select`` on first render; we set the canonical ``ui_lang`` key (what ``current_language()`` reads) directly so the first render is already in the chosen language.""" app.session_state["ui_lang"] = lang def collected_text(app: AppTest) -> str: """Flatten every text-bearing element on the page into one big string so a test can assert ``"Maestría" in collected_text(app)`` without juggling individual widget collections. Covers: title, header, subheader, caption, markdown, info, warning, error, success, code, metric labels/values, button labels, expander labels, page_link labels. Doesn't try to be exhaustive — if a widget type isn't here, add it (cheap, mechanical). """ chunks: list[str] = [] def _extend(seq: Iterable, attrs: tuple[str, ...] = ("value", "label", "body")) -> None: for el in seq: for a in attrs: v = getattr(el, a, None) if isinstance(v, str) and v: chunks.append(v) break # one value per element is enough _extend(app.title) _extend(app.header) _extend(app.subheader) _extend(app.caption) _extend(app.markdown) _extend(app.info) _extend(app.warning) _extend(app.error) _extend(app.success) _extend(app.button) # Sidebar caption / markdown / button (Streamlit exposes the sidebar # as a sub-tree with the same widget collections). if hasattr(app, "sidebar"): sb = app.sidebar _extend(sb.markdown) _extend(sb.caption) _extend(sb.button) return "\n".join(chunks)