datatools-dev/tests/gui/conftest.py

"""Shared fixtures for the GUI test layer.

Streamlit's ``AppTest.from_file`` runs a page module in-process inside
its own ScriptRunContext. Each fixture here returns either bytes (for
the upload-session-state path) or a configured ``AppTest`` ready to
``.run()``. Tests should NOT call ``AppTest.from_file`` directly so the
project-root path and the default session-state stash live in one place.

Page paths are resolved relative to the repo root so the test suite
works from any cwd (e.g., ``pytest`` from a subdir, IDE runners).
"""

from __future__ import annotations

import hashlib
from pathlib import Path
from typing import Iterable

import pandas as pd
import pytest

from streamlit.testing.v1 import AppTest


# ---------------------------------------------------------------------------
# Paths
# ---------------------------------------------------------------------------

PROJECT_ROOT: Path = Path(__file__).resolve().parent.parent.parent
PAGES_DIR: Path = PROJECT_ROOT / "src" / "gui" / "pages"
APP_PATH: Path = PROJECT_ROOT / "src" / "gui" / "app.py"
SAMPLES_DIR: Path = PROJECT_ROOT / "samples"


# All GUI tests get the marker automatically so a single
# ``pytest -m 'not gui'`` skips the whole subdir.
def pytest_collection_modifyitems(config, items):
    for item in items:
        # Only mark items collected from this subdir.
        if "tests/gui/" in str(item.fspath).replace("\\", "/"):
            item.add_marker(pytest.mark.gui)


# ---------------------------------------------------------------------------
# Sample data
# ---------------------------------------------------------------------------

@pytest.fixture
def messy_sales_bytes() -> bytes:
    """Raw bytes of the ``messy_sales.csv`` sample (mixed dup + text noise)."""
    return (SAMPLES_DIR / "messy_sales.csv").read_bytes()


@pytest.fixture
def small_csv_bytes() -> bytes:
    """A tiny CSV the tool pages can chew through fast — three rows, three
    columns, one obvious duplicate. Suitable for happy-path workflow tests
    where wall-clock matters."""
    return (
        b"name,email,phone\n"
        b"Alice,alice@gmail.com,5551234567\n"
        b"Alice,Alice@Gmail.com,(555) 123-4567\n"
        b"Bob,bob@example.com,5559876543\n"
    )


@pytest.fixture
def small_csv_df(small_csv_bytes) -> pd.DataFrame:
    """The ``small_csv_bytes`` parsed — for tests that need the DataFrame
    form (e.g., direct ``deduplicate()`` calls within a test)."""
    import io
    return pd.read_csv(io.BytesIO(small_csv_bytes), dtype=str, keep_default_na=False)


# ---------------------------------------------------------------------------
# AppTest builders
# ---------------------------------------------------------------------------

def _new_app(page_path: Path) -> AppTest:
    """Build an AppTest pointed at *page_path*. Keeps the project-root /
    page-resolution boilerplate out of the test bodies."""
    if not page_path.exists():
        raise FileNotFoundError(f"GUI test target missing: {page_path}")
    return AppTest.from_file(str(page_path))


@pytest.fixture
def home_app() -> AppTest:
    """A fresh AppTest pointed at the home page (``src/gui/app.py``)."""
    return _new_app(APP_PATH)


@pytest.fixture
def app_factory():
    """Callable returning a fresh AppTest for any page slug.

    Usage::

        app = app_factory("1_Deduplicator")
        app.run()
    """
    def _make(slug: str) -> AppTest:
        return _new_app(PAGES_DIR / f"{slug}.py")
    return _make


# ---------------------------------------------------------------------------
# Upload-session helpers
# ---------------------------------------------------------------------------

def stash_upload(app: AppTest, *, name: str, data: bytes) -> str:
    """Pre-populate the home-screen upload stash so a tool page renders
    as if the user had uploaded *name* / *data* on the home screen.

    Returns the SHA-256 hex of *data* in case the test wants to assert
    against it.
    """
    sha = hashlib.sha256(data).hexdigest()
    app.session_state["home_uploaded_bytes"] = data
    app.session_state["home_uploaded_name"] = name
    app.session_state["home_uploaded_size"] = len(data)
    return sha


# ---------------------------------------------------------------------------
# i18n helpers
# ---------------------------------------------------------------------------

def with_language(app: AppTest, lang: str) -> None:
    """Set the active GUI language *before* ``app.run()``. The selector
    widget reads ``_ui_lang_select`` on first render; we set the canonical
    ``ui_lang`` key (what ``current_language()`` reads) directly so the
    first render is already in the chosen language."""
    app.session_state["ui_lang"] = lang


def collected_text(app: AppTest) -> str:
    """Flatten every text-bearing element on the page into one big string
    so a test can assert ``"Maestría" in collected_text(app)`` without
    juggling individual widget collections.

    Covers: title, header, subheader, caption, markdown, info, warning,
    error, success, code, metric labels/values, button labels, expander
    labels, page_link labels. Doesn't try to be exhaustive — if a widget
    type isn't here, add it (cheap, mechanical).
    """
    chunks: list[str] = []

    def _extend(seq: Iterable, attrs: tuple[str, ...] = ("value", "label", "body")) -> None:
        for el in seq:
            for a in attrs:
                v = getattr(el, a, None)
                if isinstance(v, str) and v:
                    chunks.append(v)
                    break  # one value per element is enough

    _extend(app.title)
    _extend(app.header)
    _extend(app.subheader)
    _extend(app.caption)
    _extend(app.markdown)
    _extend(app.info)
    _extend(app.warning)
    _extend(app.error)
    _extend(app.success)
    _extend(app.button)
    # Sidebar caption / markdown / button (Streamlit exposes the sidebar
    # as a sub-tree with the same widget collections).
    if hasattr(app, "sidebar"):
        sb = app.sidebar
        _extend(sb.markdown)
        _extend(sb.caption)
        _extend(sb.button)
    return "\n".join(chunks)