test(gui): add Streamlit AppTest layer (139 tests)
Until now every test ran against core or the CLI; the Streamlit GUI was verified by hand. This commit adds tests/gui/ — 139 AppTest- driven tests behind a 'gui' marker so the quick loop (``pytest -m 'not gui'``) stays at 1777 tests / ~10s while ``pytest`` runs everything (1916 / ~14s). Coverage: - test_smoke.py (59): every page renders in EN and ES, expected substring present, sidebar selector mounted. - test_chrome.py (18): language selector flips session state and re-renders; quit button + farewell strings localize; tool-card names use the active language. - test_gate.py (9): require_normalization_gate no-op / warning / short-circuit / hash-mismatch invariants; warning + button localized. - test_workflows.py (14): happy path per Ready tool — stash upload, render, find primary action, verify result lands in session state. - test_dedup_review.py (8): Accept All / Reject All / Clear Decisions wire through to review_decisions; apply_review_decisions semantics (keep-all, merge, column override). - test_advanced_panels.py (15): config_panel widget defaults and options (algorithm, threshold, survivor rule, merge, multiselects, config save/load). - test_errors.py (4): garbage / empty / single-column uploads don't crash; duplicate-target mapping raises InputValidationError. - test_findings_panel.py (12): driven via a small standalone harness page so we test the component without faking a file_uploader. EN + ES strings, per-tool grouping, open-tool button label, untargeted expander, severity summary. Shared infrastructure in tests/gui/conftest.py: - ``stash_upload`` / ``stash_upload_without_gate`` — populate session_state to pre-pass or block the gate. - ``with_language`` — set ``ui_lang`` before run(). - ``collected_text`` — flatten title/caption/markdown/etc. into one string for substring assertions. - Auto-marking: every test in tests/gui/ gets ``@pytest.mark.gui`` via ``pytest_collection_modifyitems``, so the marker isn't per-test boilerplate. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -12,6 +12,7 @@ markers =
|
||||
e2e: end-to-end CLI / integration tests
|
||||
install: import / dependency sanity tests
|
||||
fixture_sweep: parametrized sweep over the test-cases/ folder
|
||||
gui: Streamlit AppTest-driven tests (live in tests/gui/)
|
||||
|
||||
# Warnings discipline: fail on unexpected DeprecationWarning from our own
|
||||
# code, but tolerate third-party deprecations that we can't fix.
|
||||
|
||||
6
tests/gui/__init__.py
Normal file
6
tests/gui/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
"""GUI tests — Streamlit AppTest-driven coverage of pages/, components/, and i18n.
|
||||
|
||||
Marked with ``@pytest.mark.gui`` so a quick core-only run can skip them
|
||||
via ``pytest -m 'not gui'``. The default ``pytest`` invocation still runs
|
||||
everything; the marker is opt-out, not opt-in.
|
||||
"""
|
||||
44
tests/gui/_findings_panel_harness.py
Normal file
44
tests/gui/_findings_panel_harness.py
Normal file
@@ -0,0 +1,44 @@
|
||||
"""Test harness page for ``render_findings_panel``.
|
||||
|
||||
A standalone Streamlit page module the AppTest layer can drive
|
||||
directly. Renders the findings panel with whatever findings live in
|
||||
``st.session_state["test_findings"]`` so test code can inject a list
|
||||
and inspect what's rendered, without having to fake a file_uploader
|
||||
widget.
|
||||
|
||||
Lives next to its test file so it ships with the GUI test layer and
|
||||
never gets confused with a real page.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import streamlit as st
|
||||
|
||||
# Same sys.path bootstrap as the real pages so ``src.*`` imports work
|
||||
# regardless of how AppTest invokes the script.
|
||||
_project_root = Path(__file__).resolve().parent.parent.parent
|
||||
if str(_project_root) not in sys.path:
|
||||
sys.path.insert(0, str(_project_root))
|
||||
|
||||
|
||||
# ``st.page_link`` requires a multipage-app context (Streamlit looks up
|
||||
# the target page's URL from the app's PagesManager). AppTest doesn't
|
||||
# wire that up for a standalone page, so any ``page_link`` call raises
|
||||
# ``KeyError: 'url_pathname'`` here. We swap it for a markdown stub
|
||||
# that renders the label inline — same observable text, no nav, no
|
||||
# crash.
|
||||
def _page_link_stub(page: str, *, label: str, **_kwargs) -> None:
|
||||
st.markdown(f"[{label}]")
|
||||
|
||||
st.page_link = _page_link_stub # type: ignore[assignment]
|
||||
|
||||
from src.gui.components import hide_streamlit_chrome, render_findings_panel
|
||||
|
||||
st.set_page_config(page_title="findings test", page_icon="🧪", layout="wide")
|
||||
hide_streamlit_chrome()
|
||||
|
||||
findings = st.session_state.get("test_findings", [])
|
||||
render_findings_panel(findings)
|
||||
204
tests/gui/conftest.py
Normal file
204
tests/gui/conftest.py
Normal file
@@ -0,0 +1,204 @@
|
||||
"""Shared fixtures for the GUI test layer.
|
||||
|
||||
Streamlit's ``AppTest.from_file`` runs a page module in-process inside
|
||||
its own ScriptRunContext. Each fixture here returns either bytes (for
|
||||
the upload-session-state path) or a configured ``AppTest`` ready to
|
||||
``.run()``. Tests should NOT call ``AppTest.from_file`` directly so the
|
||||
project-root path and the default session-state stash live in one place.
|
||||
|
||||
Page paths are resolved relative to the repo root so the test suite
|
||||
works from any cwd (e.g., ``pytest`` from a subdir, IDE runners).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
from typing import Iterable
|
||||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
|
||||
from streamlit.testing.v1 import AppTest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Paths
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
PROJECT_ROOT: Path = Path(__file__).resolve().parent.parent.parent
|
||||
PAGES_DIR: Path = PROJECT_ROOT / "src" / "gui" / "pages"
|
||||
APP_PATH: Path = PROJECT_ROOT / "src" / "gui" / "app.py"
|
||||
SAMPLES_DIR: Path = PROJECT_ROOT / "samples"
|
||||
|
||||
|
||||
# All GUI tests get the marker automatically so a single
|
||||
# ``pytest -m 'not gui'`` skips the whole subdir.
|
||||
def pytest_collection_modifyitems(config, items):
|
||||
for item in items:
|
||||
# Only mark items collected from this subdir.
|
||||
if "tests/gui/" in str(item.fspath).replace("\\", "/"):
|
||||
item.add_marker(pytest.mark.gui)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sample data
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@pytest.fixture
|
||||
def messy_sales_bytes() -> bytes:
|
||||
"""Raw bytes of the ``messy_sales.csv`` sample (mixed dup + text noise)."""
|
||||
return (SAMPLES_DIR / "messy_sales.csv").read_bytes()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def small_csv_bytes() -> bytes:
|
||||
"""A tiny CSV the tool pages can chew through fast — three rows, three
|
||||
columns, one obvious duplicate. Suitable for happy-path workflow tests
|
||||
where wall-clock matters."""
|
||||
return (
|
||||
b"name,email,phone\n"
|
||||
b"Alice,alice@gmail.com,5551234567\n"
|
||||
b"Alice,Alice@Gmail.com,(555) 123-4567\n"
|
||||
b"Bob,bob@example.com,5559876543\n"
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def small_csv_df(small_csv_bytes) -> pd.DataFrame:
|
||||
"""The ``small_csv_bytes`` parsed — for tests that need the DataFrame
|
||||
form (e.g., direct ``deduplicate()`` calls within a test)."""
|
||||
import io
|
||||
return pd.read_csv(io.BytesIO(small_csv_bytes), dtype=str, keep_default_na=False)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# AppTest builders
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _new_app(page_path: Path) -> AppTest:
|
||||
"""Build an AppTest pointed at *page_path*. Keeps the project-root /
|
||||
page-resolution boilerplate out of the test bodies."""
|
||||
if not page_path.exists():
|
||||
raise FileNotFoundError(f"GUI test target missing: {page_path}")
|
||||
return AppTest.from_file(str(page_path))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def home_app() -> AppTest:
|
||||
"""A fresh AppTest pointed at the home page (``src/gui/app.py``)."""
|
||||
return _new_app(APP_PATH)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def app_factory():
|
||||
"""Callable returning a fresh AppTest for any page slug.
|
||||
|
||||
Usage::
|
||||
|
||||
app = app_factory("1_Deduplicator")
|
||||
app.run()
|
||||
"""
|
||||
def _make(slug: str) -> AppTest:
|
||||
return _new_app(PAGES_DIR / f"{slug}.py")
|
||||
return _make
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Upload-session helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def stash_upload(app: AppTest, *, name: str, data: bytes) -> str:
|
||||
"""Pre-populate the home-screen upload stash + the gate's normalisation
|
||||
result so a tool page renders past ``require_normalization_gate()``.
|
||||
|
||||
Returns the SHA-256 hex of *data* (used as the gate key) in case the
|
||||
test wants to assert against it.
|
||||
|
||||
The gate checks::
|
||||
|
||||
- ``home_uploaded_bytes`` is set
|
||||
- ``normalization_for == sha256(home_uploaded_bytes)``
|
||||
- ``normalization_result.passed is True``
|
||||
|
||||
We synthesise a passing result via a tiny stub object that satisfies
|
||||
the gate's only attribute access (``.passed``). Tests that want to
|
||||
exercise gate-blocking behaviour should NOT call this helper — they
|
||||
should stash bytes without the normalisation result.
|
||||
"""
|
||||
sha = hashlib.sha256(data).hexdigest()
|
||||
app.session_state["home_uploaded_bytes"] = data
|
||||
app.session_state["home_uploaded_name"] = name
|
||||
app.session_state["home_uploaded_size"] = len(data)
|
||||
app.session_state["normalization_for"] = sha
|
||||
app.session_state["normalization_result"] = _PassedGateResult()
|
||||
return sha
|
||||
|
||||
|
||||
class _PassedGateResult:
|
||||
"""Minimal stand-in for the real NormalizationResult shape — the gate
|
||||
only reads ``.passed``. Using a real NormalizationResult here would
|
||||
pull in core.normalize and tie GUI tests to its constructor surface.
|
||||
"""
|
||||
|
||||
passed: bool = True
|
||||
|
||||
|
||||
def stash_upload_without_gate(app: AppTest, *, name: str, data: bytes) -> None:
|
||||
"""Stash the upload bytes but do NOT pre-pass the gate. Used by gate
|
||||
tests that want the warning + Go-to-Review button to appear."""
|
||||
app.session_state["home_uploaded_bytes"] = data
|
||||
app.session_state["home_uploaded_name"] = name
|
||||
app.session_state["home_uploaded_size"] = len(data)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# i18n helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def with_language(app: AppTest, lang: str) -> None:
|
||||
"""Set the active GUI language *before* ``app.run()``. The selector
|
||||
widget reads ``_ui_lang_select`` on first render; we set the canonical
|
||||
``ui_lang`` key (what ``current_language()`` reads) directly so the
|
||||
first render is already in the chosen language."""
|
||||
app.session_state["ui_lang"] = lang
|
||||
|
||||
|
||||
def collected_text(app: AppTest) -> str:
|
||||
"""Flatten every text-bearing element on the page into one big string
|
||||
so a test can assert ``"Maestría" in collected_text(app)`` without
|
||||
juggling individual widget collections.
|
||||
|
||||
Covers: title, header, subheader, caption, markdown, info, warning,
|
||||
error, success, code, metric labels/values, button labels, expander
|
||||
labels, page_link labels. Doesn't try to be exhaustive — if a widget
|
||||
type isn't here, add it (cheap, mechanical).
|
||||
"""
|
||||
chunks: list[str] = []
|
||||
|
||||
def _extend(seq: Iterable, attrs: tuple[str, ...] = ("value", "label", "body")) -> None:
|
||||
for el in seq:
|
||||
for a in attrs:
|
||||
v = getattr(el, a, None)
|
||||
if isinstance(v, str) and v:
|
||||
chunks.append(v)
|
||||
break # one value per element is enough
|
||||
|
||||
_extend(app.title)
|
||||
_extend(app.header)
|
||||
_extend(app.subheader)
|
||||
_extend(app.caption)
|
||||
_extend(app.markdown)
|
||||
_extend(app.info)
|
||||
_extend(app.warning)
|
||||
_extend(app.error)
|
||||
_extend(app.success)
|
||||
_extend(app.button)
|
||||
# Sidebar caption / markdown / button (Streamlit exposes the sidebar
|
||||
# as a sub-tree with the same widget collections).
|
||||
if hasattr(app, "sidebar"):
|
||||
sb = app.sidebar
|
||||
_extend(sb.markdown)
|
||||
_extend(sb.caption)
|
||||
_extend(sb.button)
|
||||
return "\n".join(chunks)
|
||||
194
tests/gui/test_advanced_panels.py
Normal file
194
tests/gui/test_advanced_panels.py
Normal file
@@ -0,0 +1,194 @@
|
||||
"""Advanced-options panel tests.
|
||||
|
||||
``config_panel`` (in ``src.gui.components``) is the dedup-page's
|
||||
expander that houses every per-column / per-strategy knob. It's the
|
||||
densest single widget surface in the GUI, so a session-state key drift
|
||||
in there cascades into every dedup session.
|
||||
|
||||
We exercise it via the Deduplicator page (rendering ``config_panel``
|
||||
in isolation requires a fake Streamlit context). The page provides
|
||||
the surrounding state; we poke widgets and verify their effects.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
|
||||
from .conftest import stash_upload
|
||||
|
||||
|
||||
GATED_PAGE = "1_Deduplicator"
|
||||
|
||||
|
||||
def _render_page(app_factory, small_csv_bytes):
|
||||
app = app_factory(GATED_PAGE)
|
||||
stash_upload(app, name="messy.csv", data=small_csv_bytes)
|
||||
app.run()
|
||||
return app
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Expander presence + collapsed state
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestAdvancedExpander:
|
||||
def test_advanced_options_expander_renders(self, app_factory, small_csv_bytes):
|
||||
app = _render_page(app_factory, small_csv_bytes)
|
||||
labels = [e.label for e in app.expander]
|
||||
assert any("Advanced Options" in lbl for lbl in labels), (
|
||||
f"Advanced Options expander missing; expanders: {labels}"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Algorithm selector
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestAlgorithmSelector:
|
||||
"""The fuzzy-algorithm dropdown drives ``Algorithm.{LEVENSHTEIN,
|
||||
JARO_WINKLER, TOKEN_SET_RATIO}`` on every column. Default value
|
||||
must be jaro_winkler — the strong-key build_default_strategies
|
||||
assumes it."""
|
||||
|
||||
def test_default_algorithm_is_jaro_winkler(self, app_factory, small_csv_bytes):
|
||||
app = _render_page(app_factory, small_csv_bytes)
|
||||
# Find the selectbox by label.
|
||||
algo_boxes = [
|
||||
sb for sb in app.selectbox
|
||||
if sb.label == "Fuzzy algorithm"
|
||||
]
|
||||
assert len(algo_boxes) == 1
|
||||
assert algo_boxes[0].value == "jaro_winkler"
|
||||
|
||||
def test_algorithm_options_complete(self, app_factory, small_csv_bytes):
|
||||
app = _render_page(app_factory, small_csv_bytes)
|
||||
algo = next(sb for sb in app.selectbox if sb.label == "Fuzzy algorithm")
|
||||
assert set(algo.options) == {
|
||||
"jaro_winkler", "levenshtein", "token_set_ratio",
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Threshold slider
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestThresholdSlider:
|
||||
def test_default_threshold_is_85(self, app_factory, small_csv_bytes):
|
||||
app = _render_page(app_factory, small_csv_bytes)
|
||||
thresholds = [s for s in app.slider if "Similarity" in (s.label or "")]
|
||||
assert len(thresholds) == 1
|
||||
assert thresholds[0].value == 85
|
||||
|
||||
def test_threshold_bounds(self, app_factory, small_csv_bytes):
|
||||
app = _render_page(app_factory, small_csv_bytes)
|
||||
thr = next(s for s in app.slider if "Similarity" in (s.label or ""))
|
||||
assert thr.min == 50
|
||||
assert thr.max == 100
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Survivor rule selector
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestSurvivorSelector:
|
||||
def test_default_is_first(self, app_factory, small_csv_bytes):
|
||||
app = _render_page(app_factory, small_csv_bytes)
|
||||
surv = next(
|
||||
sb for sb in app.selectbox
|
||||
if sb.label == "Survivor rule"
|
||||
)
|
||||
assert surv.value == "first"
|
||||
|
||||
def test_all_four_rules_offered(self, app_factory, small_csv_bytes):
|
||||
app = _render_page(app_factory, small_csv_bytes)
|
||||
surv = next(sb for sb in app.selectbox if sb.label == "Survivor rule")
|
||||
assert set(surv.options) == {
|
||||
"first", "last", "most-complete", "most-recent",
|
||||
}
|
||||
|
||||
def test_selecting_most_recent_does_not_crash(
|
||||
self, app_factory, small_csv_bytes,
|
||||
):
|
||||
"""When ``most-recent`` is chosen the page should reveal a
|
||||
Date column dropdown. Pin the no-crash invariant."""
|
||||
app = _render_page(app_factory, small_csv_bytes)
|
||||
surv = next(sb for sb in app.selectbox if sb.label == "Survivor rule")
|
||||
surv.select("most-recent").run()
|
||||
assert not app.exception
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Merge checkbox
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestMergeCheckbox:
|
||||
def test_merge_default_off(self, app_factory, small_csv_bytes):
|
||||
app = _render_page(app_factory, small_csv_bytes)
|
||||
merge_boxes = [c for c in app.checkbox if c.label == "Merge mode"]
|
||||
assert len(merge_boxes) == 1
|
||||
assert merge_boxes[0].value is False
|
||||
|
||||
def test_toggling_merge_does_not_crash(
|
||||
self, app_factory, small_csv_bytes,
|
||||
):
|
||||
app = _render_page(app_factory, small_csv_bytes)
|
||||
merge = next(c for c in app.checkbox if c.label == "Merge mode")
|
||||
merge.check().run()
|
||||
assert not app.exception
|
||||
# After checking, the value persists in session_state via the
|
||||
# widget's own key.
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Column multiselects
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestColumnMultiselects:
|
||||
"""Match-on / Strong-keys / Fuzzy multiselects use ``st.multiselect``
|
||||
on every column. Empty default = auto-detect."""
|
||||
|
||||
def test_three_multiselects_present(self, app_factory, small_csv_bytes):
|
||||
app = _render_page(app_factory, small_csv_bytes)
|
||||
labels = {m.label for m in app.multiselect}
|
||||
assert {"Match on columns", "Strong keys", "Fuzzy columns"} <= labels
|
||||
|
||||
def test_defaults_are_empty(self, app_factory, small_csv_bytes):
|
||||
app = _render_page(app_factory, small_csv_bytes)
|
||||
for ms in app.multiselect:
|
||||
if ms.label in {
|
||||
"Match on columns", "Strong keys", "Fuzzy columns",
|
||||
}:
|
||||
assert ms.value == [], (
|
||||
f"{ms.label!r} default should be []; got {ms.value}"
|
||||
)
|
||||
|
||||
def test_options_match_dataframe_columns(self, app_factory, small_csv_bytes):
|
||||
app = _render_page(app_factory, small_csv_bytes)
|
||||
df_cols = list(app.session_state["df"].columns)
|
||||
for ms in app.multiselect:
|
||||
if ms.label in {
|
||||
"Match on columns", "Strong keys", "Fuzzy columns",
|
||||
}:
|
||||
assert list(ms.options) == df_cols
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Save / Load config
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestConfigSaveLoadButtons:
|
||||
def test_save_settings_button_present(self, app_factory, small_csv_bytes):
|
||||
app = _render_page(app_factory, small_csv_bytes)
|
||||
labels = [b.label for b in app.button]
|
||||
assert any("Save current settings" in lbl for lbl in labels)
|
||||
|
||||
def test_config_file_uploader_present(self, app_factory, small_csv_bytes):
|
||||
app = _render_page(app_factory, small_csv_bytes)
|
||||
# AppTest exposes uploaders via ``app.file_uploader``. There
|
||||
# are two: the main file (pickup-or-upload) and the config
|
||||
# JSON. Make sure the config one is there.
|
||||
labels = [u.label for u in app.file_uploader]
|
||||
assert any("Load config profile" in lbl for lbl in labels), (
|
||||
f"config uploader missing; uploaders: {labels}"
|
||||
)
|
||||
181
tests/gui/test_chrome.py
Normal file
181
tests/gui/test_chrome.py
Normal file
@@ -0,0 +1,181 @@
|
||||
"""Chrome tests — language selector, hide_streamlit_chrome, quit flow.
|
||||
|
||||
These verify the GUI plumbing that every page depends on. Failures here
|
||||
cascade into every other page, so they run cheap and run first
|
||||
(alphabetical name ordering after smoke).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from .conftest import collected_text, with_language
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# hide_streamlit_chrome mounts the selector
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestHideChromeMountsSelector:
|
||||
"""``hide_streamlit_chrome()`` is the one place the language selector
|
||||
is mounted. Every page that hides chrome (= every page) must get
|
||||
exactly one sidebar selectbox with the i18n label."""
|
||||
|
||||
def test_home_has_one_sidebar_selectbox(self, home_app):
|
||||
home_app.run()
|
||||
# Only one selectbox in the sidebar today; if a page adds
|
||||
# another, this becomes a weaker bound.
|
||||
assert len(home_app.sidebar.selectbox) == 1, (
|
||||
"expected exactly one sidebar selectbox (the language picker); "
|
||||
f"got {len(home_app.sidebar.selectbox)}"
|
||||
)
|
||||
|
||||
def test_selector_label_is_localized(self, home_app):
|
||||
with_language(home_app, "es")
|
||||
home_app.run()
|
||||
labels = [sb.label for sb in home_app.sidebar.selectbox]
|
||||
assert "Idioma" in labels, (
|
||||
f"Spanish selector should be labelled 'Idioma'; got {labels}"
|
||||
)
|
||||
|
||||
def test_selector_label_english_default(self, home_app):
|
||||
home_app.run() # no with_language → default = en
|
||||
labels = [sb.label for sb in home_app.sidebar.selectbox]
|
||||
assert "Language" in labels
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Language selector switches session state
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestLanguageSwitch:
|
||||
"""Picking 'es' in the selector flips ``st.session_state['ui_lang']``
|
||||
and re-renders the page with Spanish strings on the next run."""
|
||||
|
||||
def test_default_language_is_english(self, home_app):
|
||||
home_app.run()
|
||||
# AppTest's session_state proxy doesn't implement .get(); use
|
||||
# membership check + attribute access. Absence == default ("en").
|
||||
lang = home_app.session_state["ui_lang"] if "ui_lang" in home_app.session_state else "en"
|
||||
assert lang == "en"
|
||||
text = collected_text(home_app)
|
||||
assert "Data Cleaning Mastery" in text
|
||||
|
||||
def test_selecting_spanish_persists_in_session(self, home_app):
|
||||
home_app.run()
|
||||
selector = home_app.sidebar.selectbox[0]
|
||||
selector.select("es").run()
|
||||
assert home_app.session_state["ui_lang"] == "es"
|
||||
|
||||
def test_selecting_spanish_re_renders_in_spanish(self, home_app):
|
||||
home_app.run()
|
||||
selector = home_app.sidebar.selectbox[0]
|
||||
selector.select("es").run()
|
||||
text = collected_text(home_app)
|
||||
assert "Maestría" in text, (
|
||||
"after selecting Spanish, the home title should switch to "
|
||||
f"'🧹 DataTools — Maestría…'; got:\n{text[:300]}"
|
||||
)
|
||||
|
||||
def test_selecting_back_to_english_reverts(self, home_app):
|
||||
# Start in Spanish, then flip back.
|
||||
with_language(home_app, "es")
|
||||
home_app.run()
|
||||
assert "Maestría" in collected_text(home_app)
|
||||
|
||||
selector = home_app.sidebar.selectbox[0]
|
||||
selector.select("en").run()
|
||||
text = collected_text(home_app)
|
||||
assert "Data Cleaning Mastery" in text
|
||||
assert "Maestría" not in text
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Footer + page_title localization
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestLocalizedChrome:
|
||||
"""A spot-check on the parts of the chrome that aren't the selector:
|
||||
the bottom footer caption and the home-page hero text. Other strings
|
||||
are pinned indirectly by ``TestEveryPageRenders.test_expected_*``."""
|
||||
|
||||
def test_footer_english(self, home_app):
|
||||
home_app.run()
|
||||
text = collected_text(home_app)
|
||||
assert "Your data never leaves" in text
|
||||
|
||||
def test_footer_spanish(self, home_app):
|
||||
with_language(home_app, "es")
|
||||
home_app.run()
|
||||
text = collected_text(home_app)
|
||||
assert "Tus datos nunca salen" in text
|
||||
|
||||
def test_upload_section_heading_localizes(self, home_app):
|
||||
with_language(home_app, "es")
|
||||
home_app.run()
|
||||
text = collected_text(home_app)
|
||||
# ``📤 Sube un archivo para empezar`` from the es pack.
|
||||
assert "Sube un archivo" in text
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Quit / Close page
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestQuitButtonRenders:
|
||||
"""The Close page must show the localized title, body, and the
|
||||
Close-the-app button. We don't actually click the button — that
|
||||
would call ``os._exit(0)`` and kill the test process. We only
|
||||
assert the button is present and its label is localized."""
|
||||
|
||||
def test_close_page_english(self, app_factory):
|
||||
app = app_factory("99_Close")
|
||||
app.run()
|
||||
text = collected_text(app)
|
||||
assert "Close DataTools" in text
|
||||
labels = [b.label for b in app.button]
|
||||
assert any("Close the app" in lbl for lbl in labels), (
|
||||
f"Close-the-app button missing; buttons: {labels}"
|
||||
)
|
||||
|
||||
def test_close_page_spanish(self, app_factory):
|
||||
app = app_factory("99_Close")
|
||||
with_language(app, "es")
|
||||
app.run()
|
||||
text = collected_text(app)
|
||||
assert "Cerrar DataTools" in text
|
||||
labels = [b.label for b in app.button]
|
||||
assert any("Cerrar la app" in lbl for lbl in labels), (
|
||||
f"Spanish Close button missing; buttons: {labels}"
|
||||
)
|
||||
|
||||
def test_close_body_describes_unsaved_work_warning_es(self, app_factory):
|
||||
app = app_factory("99_Close")
|
||||
with_language(app, "es")
|
||||
app.run()
|
||||
text = collected_text(app)
|
||||
assert "trabajo sin guardar" in text
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tool cards use localized names on the home grid
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestHomeToolGridLocalization:
|
||||
"""The home grid pulls tool display names through ``tool_name()`` in
|
||||
``tools_registry``. The Spanish pack provides translations for every
|
||||
tool id; a regression in that wiring would make Spanish users see
|
||||
English names. Pin a few representative ones."""
|
||||
|
||||
@pytest.mark.parametrize("needle", [
|
||||
"Eliminador de duplicados",
|
||||
"Limpiador de texto",
|
||||
"Estandarizador de formatos",
|
||||
"Gestor de valores faltantes",
|
||||
"Mapeador de columnas",
|
||||
])
|
||||
def test_es_tool_name_on_home_grid(self, home_app, needle):
|
||||
with_language(home_app, "es")
|
||||
home_app.run()
|
||||
text = collected_text(home_app)
|
||||
assert needle in text, f"missing localized tool name {needle!r}"
|
||||
205
tests/gui/test_dedup_review.py
Normal file
205
tests/gui/test_dedup_review.py
Normal file
@@ -0,0 +1,205 @@
|
||||
"""Dedup review widget tests.
|
||||
|
||||
``match_group_card`` from ``src.gui.components`` has two modes (decided
|
||||
/ undecided) and a Confirm/Undo flow keyed by session_state. We test
|
||||
each state by exercising the parent Deduplicator page end to end and
|
||||
then poking at ``review_decisions`` directly.
|
||||
|
||||
Why not unit-test ``match_group_card`` in isolation? AppTest needs a
|
||||
real page module, not a function call, so we drive the page and verify
|
||||
the side effects on session_state. This catches integration bugs the
|
||||
unit test couldn't see (e.g., session-state key drift between the
|
||||
page and the component).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
|
||||
from .conftest import collected_text, stash_upload
|
||||
|
||||
|
||||
# We need a frame that produces at least one match group. The 3-row
|
||||
# small_csv has two Alice rows that share an email (case-folded) → one
|
||||
# group of two members.
|
||||
def _run_with_results(app):
|
||||
"""Drive the page through to the post-Find-Duplicates state.
|
||||
|
||||
1. First ``run()`` — page picks up the stashed upload, reads it, and
|
||||
renders the preview + Find Duplicates button. ``result`` is None.
|
||||
2. Click Find Duplicates and ``run()`` again — page calls
|
||||
``deduplicate()`` and stashes the result. Match group cards
|
||||
render on this pass.
|
||||
|
||||
Mirrors what a real user does instead of trying to short-circuit
|
||||
the page by stashing ``result`` directly (the page resets it to
|
||||
None on every new upload).
|
||||
"""
|
||||
app.run()
|
||||
target = next(b for b in app.button if "Find Duplicates" in b.label)
|
||||
target.click().run()
|
||||
|
||||
|
||||
class TestMatchGroupCardUndecided:
|
||||
"""A freshly-found group has no decision → the card renders the
|
||||
interactive editor + Confirm button."""
|
||||
|
||||
def test_card_expander_present(self, app_factory, small_csv_bytes):
|
||||
app = app_factory("1_Deduplicator")
|
||||
stash_upload(app, name="messy.csv", data=small_csv_bytes)
|
||||
_run_with_results(app)
|
||||
# An expander per group. The dedup result should produce
|
||||
# exactly one match group on this fixture.
|
||||
result = app.session_state["result"]
|
||||
assert len(result.match_groups) >= 1, (
|
||||
"fixture should produce at least one match group"
|
||||
)
|
||||
# Match group cards use ``st.expander``. AppTest exposes them
|
||||
# via ``app.expander``.
|
||||
labels = [e.label for e in app.expander]
|
||||
assert any("Group 1" in lbl for lbl in labels), (
|
||||
f"undecided card expander missing; got: {labels}"
|
||||
)
|
||||
|
||||
def test_confirm_button_renders_for_undecided_group(
|
||||
self, app_factory, small_csv_bytes,
|
||||
):
|
||||
app = app_factory("1_Deduplicator")
|
||||
stash_upload(app, name="messy.csv", data=small_csv_bytes)
|
||||
_run_with_results(app)
|
||||
# Confirm button is keyed ``confirm_<group_id>``.
|
||||
result = app.session_state["result"]
|
||||
gid = result.match_groups[0].group_id
|
||||
labels = [b.label for b in app.button]
|
||||
# Streamlit renders the button label as "Confirm".
|
||||
assert any(lbl == "Confirm" for lbl in labels), (
|
||||
f"undecided card missing Confirm button; buttons: {labels}"
|
||||
)
|
||||
|
||||
|
||||
class TestBatchActions:
|
||||
"""Accept All / Reject All / Clear Decisions are the three batch
|
||||
buttons that mutate ``review_decisions`` across all groups."""
|
||||
|
||||
def test_accept_all_populates_decisions(self, app_factory, small_csv_bytes):
|
||||
app = app_factory("1_Deduplicator")
|
||||
stash_upload(app, name="messy.csv", data=small_csv_bytes)
|
||||
_run_with_results(app)
|
||||
|
||||
target = next(b for b in app.button if b.label == "Accept All")
|
||||
target.click().run()
|
||||
decisions = app.session_state["review_decisions"]
|
||||
result = app.session_state["result"]
|
||||
assert len(decisions) == len(result.match_groups), (
|
||||
"Accept All should record a decision per group; "
|
||||
f"got {len(decisions)} decisions for "
|
||||
f"{len(result.match_groups)} groups"
|
||||
)
|
||||
# Each Accept-All decision keeps exactly one row (the survivor).
|
||||
for d in decisions.values():
|
||||
assert len(d["keep_indices"]) == 1
|
||||
|
||||
def test_reject_all_keeps_every_member(self, app_factory, small_csv_bytes):
|
||||
app = app_factory("1_Deduplicator")
|
||||
stash_upload(app, name="messy.csv", data=small_csv_bytes)
|
||||
_run_with_results(app)
|
||||
|
||||
target = next(b for b in app.button if b.label == "Reject All")
|
||||
target.click().run()
|
||||
decisions = app.session_state["review_decisions"]
|
||||
result = app.session_state["result"]
|
||||
# Reject = keep every member → keep_indices == row_indices.
|
||||
for g in result.match_groups:
|
||||
assert set(decisions[g.group_id]["keep_indices"]) == set(g.row_indices)
|
||||
|
||||
def test_clear_decisions_wipes_state(self, app_factory, small_csv_bytes):
|
||||
app = app_factory("1_Deduplicator")
|
||||
stash_upload(app, name="messy.csv", data=small_csv_bytes)
|
||||
_run_with_results(app)
|
||||
|
||||
# Populate decisions via Accept All, then Clear, then verify.
|
||||
accept = next(b for b in app.button if b.label == "Accept All")
|
||||
accept.click().run()
|
||||
assert app.session_state["review_decisions"], (
|
||||
"precondition failed: Accept All didn't populate"
|
||||
)
|
||||
|
||||
clear = next(b for b in app.button if "Clear Decisions" in b.label)
|
||||
clear.click().run()
|
||||
assert app.session_state["review_decisions"] == {}
|
||||
|
||||
|
||||
class TestApplyReviewDecisions:
|
||||
"""The component-layer ``apply_review_decisions`` function is the
|
||||
actual semantic engine; unit-test it directly. The GUI just feeds
|
||||
its output to a download button."""
|
||||
|
||||
def test_keep_all_means_no_rows_removed(
|
||||
self, app_factory, small_csv_bytes,
|
||||
):
|
||||
from src.gui.components import apply_review_decisions
|
||||
from src.core import deduplicate
|
||||
import io
|
||||
|
||||
df = pd.read_csv(io.BytesIO(small_csv_bytes), dtype=str, keep_default_na=False)
|
||||
result = deduplicate(df, preview=True)
|
||||
decisions = {
|
||||
g.group_id: {
|
||||
"keep_indices": list(g.row_indices),
|
||||
"overrides": {},
|
||||
}
|
||||
for g in result.match_groups
|
||||
}
|
||||
deduped, removed = apply_review_decisions(df, result.match_groups, decisions)
|
||||
assert len(deduped) == len(df), (
|
||||
"Keep-All should preserve every row"
|
||||
)
|
||||
assert removed.empty
|
||||
|
||||
def test_merge_decision_drops_losers(
|
||||
self, app_factory, small_csv_bytes,
|
||||
):
|
||||
from src.gui.components import apply_review_decisions
|
||||
from src.core import deduplicate
|
||||
import io
|
||||
|
||||
df = pd.read_csv(io.BytesIO(small_csv_bytes), dtype=str, keep_default_na=False)
|
||||
result = deduplicate(df, preview=True)
|
||||
# Merge each group to its first member.
|
||||
decisions = {
|
||||
g.group_id: {
|
||||
"keep_indices": [g.row_indices[0]],
|
||||
"overrides": {},
|
||||
}
|
||||
for g in result.match_groups
|
||||
}
|
||||
deduped, removed = apply_review_decisions(df, result.match_groups, decisions)
|
||||
expected_removed = sum(len(g.row_indices) - 1 for g in result.match_groups)
|
||||
assert len(removed) == expected_removed
|
||||
assert len(deduped) == len(df) - expected_removed
|
||||
|
||||
def test_column_override_applies_to_survivor(
|
||||
self, app_factory, small_csv_bytes,
|
||||
):
|
||||
from src.gui.components import apply_review_decisions
|
||||
from src.core import deduplicate
|
||||
import io
|
||||
|
||||
df = pd.read_csv(io.BytesIO(small_csv_bytes), dtype=str, keep_default_na=False)
|
||||
result = deduplicate(df, preview=True)
|
||||
group = result.match_groups[0]
|
||||
survivor = group.row_indices[0]
|
||||
decisions = {
|
||||
group.group_id: {
|
||||
"keep_indices": [survivor],
|
||||
"overrides": {"phone": "OVERRIDE_VALUE"},
|
||||
}
|
||||
}
|
||||
deduped, _ = apply_review_decisions(df, result.match_groups, decisions)
|
||||
# The survivor row in ``deduped`` must carry the override. Find
|
||||
# it via the original (non-loser) name.
|
||||
match = deduped[deduped["phone"] == "OVERRIDE_VALUE"]
|
||||
assert len(match) == 1, (
|
||||
f"override didn't apply; deduped frame: {deduped.to_dict()}"
|
||||
)
|
||||
103
tests/gui/test_errors.py
Normal file
103
tests/gui/test_errors.py
Normal file
@@ -0,0 +1,103 @@
|
||||
"""Error-display tests.
|
||||
|
||||
Tool pages catch core exceptions (via ``format_for_user``) and surface
|
||||
them through ``st.error``. We verify that the message structure makes
|
||||
it through the GUI layer, not just that it gets raised by core (the
|
||||
core tests already cover that).
|
||||
|
||||
These tests deliberately feed garbage bytes / malformed content and
|
||||
check the rendered error, not just that the page didn't crash.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from .conftest import collected_text, stash_upload
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Malformed upload
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestMalformedUploadErrors:
|
||||
"""Bytes that look like a CSV but aren't parseable. The Deduplicator
|
||||
page wraps ``read_file`` failures in an ``st.error`` with the file
|
||||
name and the structured ``format_for_user`` output."""
|
||||
|
||||
@pytest.fixture
|
||||
def garbage_bytes(self) -> bytes:
|
||||
"""Binary garbage with embedded NULs and non-UTF-8 sequences —
|
||||
triggers the gate's repair pipeline failures, ultimately
|
||||
produces a parse error on the dedup page if it makes it that
|
||||
far. We bypass the gate so the dedup page sees it raw."""
|
||||
return b"\xff\xfe\x00\x01\x02garbage,without,structure\n\x00\xff" * 50
|
||||
|
||||
def test_garbage_bytes_do_not_crash_dedup(
|
||||
self, app_factory, garbage_bytes,
|
||||
):
|
||||
app = app_factory("1_Deduplicator")
|
||||
stash_upload(app, name="garbage.csv", data=garbage_bytes)
|
||||
app.run()
|
||||
# The page should either render an error OR successfully parse
|
||||
# the bytes as text (the gate has been pre-passed, so the
|
||||
# pre-parse repair didn't run on this fixture). We just need
|
||||
# no uncaught Python exception.
|
||||
assert not app.exception
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Empty upload
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestEmptyUpload:
|
||||
"""Zero-byte upload — must be handled gracefully."""
|
||||
|
||||
def test_empty_bytes_renders(self, app_factory):
|
||||
app = app_factory("1_Deduplicator")
|
||||
stash_upload(app, name="empty.csv", data=b"")
|
||||
app.run()
|
||||
# Either: (a) we render an error, or (b) we render the page
|
||||
# with no preview. Either is acceptable — what's NOT is an
|
||||
# uncaught Python exception bubbling up.
|
||||
assert not app.exception
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Single-column file
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestSingleColumnFile:
|
||||
"""A 1-column CSV is technically valid but produces no auto-detect
|
||||
strategies. The page must explain this to the user rather than
|
||||
silently producing zero match groups."""
|
||||
|
||||
def test_single_column_does_not_crash(self, app_factory):
|
||||
app = app_factory("1_Deduplicator")
|
||||
data = b"only_col\nvalue1\nvalue2\nvalue3\n"
|
||||
stash_upload(app, name="single.csv", data=data)
|
||||
app.run()
|
||||
assert not app.exception
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Header collision in column_mapper
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestColumnMapperDuplicateTarget:
|
||||
"""The column mapper rejects mappings where two source columns
|
||||
point at the same target. This is surfaced as an error.
|
||||
|
||||
Test approach: ``map_columns`` validates upfront via core, and
|
||||
raises ``InputValidationError`` — the GUI wraps it. We invoke the
|
||||
core function directly to pin the validation contract."""
|
||||
|
||||
def test_duplicate_target_raises(self):
|
||||
import pandas as pd
|
||||
from src.core.column_mapper import map_columns, MapOptions
|
||||
from src.core.errors import InputValidationError
|
||||
|
||||
df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
|
||||
opts = MapOptions(mapping={"a": "name", "b": "name"})
|
||||
with pytest.raises(InputValidationError):
|
||||
map_columns(df, opts)
|
||||
227
tests/gui/test_findings_panel.py
Normal file
227
tests/gui/test_findings_panel.py
Normal file
@@ -0,0 +1,227 @@
|
||||
"""Findings panel rendering tests.
|
||||
|
||||
``render_findings_panel`` is the central widget on the home page and
|
||||
the Review page; failures here cascade into the user's first
|
||||
impression. We drive it via a tiny test harness page
|
||||
(``_findings_panel_harness.py``) so the test can inject findings
|
||||
directly into session state — no file_uploader simulation needed.
|
||||
|
||||
We verify:
|
||||
|
||||
- Empty findings list → localized "no issues" success message.
|
||||
- Findings with tool ids → one expander per tool, labeled in the
|
||||
active language.
|
||||
- Header + severity summary render at the top.
|
||||
- Untargeted findings land in the "Other / file-level" expander.
|
||||
|
||||
Pack-key parity is already pinned by ``test_lang_packs.py``; this
|
||||
file pins the call sites instead.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
|
||||
from streamlit.testing.v1 import AppTest
|
||||
|
||||
from .conftest import PROJECT_ROOT, collected_text, with_language
|
||||
|
||||
HARNESS_PATH = Path(__file__).resolve().parent / "_findings_panel_harness.py"
|
||||
|
||||
|
||||
def _harness(findings, lang: str = "en") -> AppTest:
|
||||
"""Build an AppTest of the harness page with ``findings`` pre-stashed."""
|
||||
app = AppTest.from_file(str(HARNESS_PATH))
|
||||
app.session_state["test_findings"] = findings
|
||||
if lang != "en":
|
||||
app.session_state["ui_lang"] = lang
|
||||
return app
|
||||
|
||||
|
||||
def _make_finding(tool: str = "", **overrides):
|
||||
"""Build a minimal :class:`Finding` object. ``Finding`` is a frozen
|
||||
dataclass; constructor signature is well-pinned by core tests, so
|
||||
we use it directly here rather than building dicts."""
|
||||
from src.core.analyze import Finding
|
||||
kwargs = dict(
|
||||
id="test_finding",
|
||||
severity="warn",
|
||||
tool=tool,
|
||||
count=1,
|
||||
description="A test finding.",
|
||||
column=None,
|
||||
samples=[],
|
||||
confidence="medium",
|
||||
fix_action="",
|
||||
)
|
||||
kwargs.update(overrides)
|
||||
return Finding(**kwargs)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Empty findings → success message
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestEmptyFindings:
|
||||
def test_empty_renders_no_issues_english(self):
|
||||
app = _harness([])
|
||||
app.run()
|
||||
text = collected_text(app)
|
||||
assert "No issues detected" in text
|
||||
|
||||
def test_empty_renders_no_issues_spanish(self):
|
||||
app = _harness([], lang="es")
|
||||
app.run()
|
||||
text = collected_text(app)
|
||||
assert "No se detectaron problemas" in text
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Header text
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestHeader:
|
||||
def test_header_english(self):
|
||||
app = _harness([_make_finding(tool="02_text_cleaner")])
|
||||
app.run()
|
||||
text = collected_text(app)
|
||||
assert "Detected issues" in text
|
||||
|
||||
def test_header_spanish(self):
|
||||
app = _harness([_make_finding(tool="02_text_cleaner")], lang="es")
|
||||
app.run()
|
||||
text = collected_text(app)
|
||||
assert "Problemas detectados" in text
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Per-tool grouping → one expander per tool id
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestGrouping:
|
||||
def test_findings_grouped_into_per_tool_expanders(self):
|
||||
findings = [
|
||||
_make_finding(tool="02_text_cleaner", id="whitespace_padding"),
|
||||
_make_finding(tool="02_text_cleaner", id="nbsp_padding"),
|
||||
_make_finding(tool="03_format_standardizer", id="mixed_case_email"),
|
||||
]
|
||||
app = _harness(findings)
|
||||
app.run()
|
||||
labels = [e.label for e in app.expander]
|
||||
# Two unique tools → two expanders. Each label carries the
|
||||
# tool's display name + finding count.
|
||||
text_cleaner_expanders = [lbl for lbl in labels if "Text Cleaner" in lbl]
|
||||
format_expanders = [lbl for lbl in labels if "Format Standardizer" in lbl]
|
||||
assert len(text_cleaner_expanders) == 1, (
|
||||
f"expected one Text Cleaner expander; got: {labels}"
|
||||
)
|
||||
assert len(format_expanders) == 1, (
|
||||
f"expected one Format Standardizer expander; got: {labels}"
|
||||
)
|
||||
|
||||
def test_tool_names_localize_in_spanish(self):
|
||||
findings = [_make_finding(tool="02_text_cleaner")]
|
||||
app = _harness(findings, lang="es")
|
||||
app.run()
|
||||
labels = [e.label for e in app.expander]
|
||||
assert any("Limpiador de texto" in lbl for lbl in labels), (
|
||||
f"Spanish tool name missing; expanders: {labels}"
|
||||
)
|
||||
|
||||
def test_finding_count_in_expander_label(self):
|
||||
findings = [
|
||||
_make_finding(tool="02_text_cleaner", id=f"f{i}")
|
||||
for i in range(3)
|
||||
]
|
||||
app = _harness(findings)
|
||||
app.run()
|
||||
labels = [e.label for e in app.expander]
|
||||
# Pack template: "{tool} — {n} finding(s)"
|
||||
text_cleaner_label = next(l for l in labels if "Text Cleaner" in l)
|
||||
assert "3" in text_cleaner_label, (
|
||||
f"expected count '3' in expander label; got {text_cleaner_label!r}"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Open-tool button localizes
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestOpenToolButton:
|
||||
"""Each tool section has an ``st.page_link`` to jump to that tool's
|
||||
page. AppTest exposes page_links as ``app.button`` entries with
|
||||
label ``"Open {tool} →"`` (English) / ``"Abrir {tool} →"`` (Spanish)."""
|
||||
|
||||
def test_open_tool_label_english(self):
|
||||
findings = [_make_finding(tool="02_text_cleaner")]
|
||||
app = _harness(findings)
|
||||
app.run()
|
||||
# ``st.page_link`` may show up under ``app.button`` or in the
|
||||
# raw markdown. We probe both.
|
||||
text = collected_text(app)
|
||||
# Pack template: "Open {tool} →"
|
||||
assert "Open Text Cleaner" in text
|
||||
|
||||
def test_open_tool_label_spanish(self):
|
||||
findings = [_make_finding(tool="02_text_cleaner")]
|
||||
app = _harness(findings, lang="es")
|
||||
app.run()
|
||||
text = collected_text(app)
|
||||
# Pack template: "Abrir {tool} →"
|
||||
assert "Abrir Limpiador de texto" in text
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Untargeted findings (file-level) go in the "Other" expander
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestUntargetedFindings:
|
||||
def test_untargeted_goes_to_other_expander_en(self):
|
||||
findings = [
|
||||
_make_finding(tool="", id="csv_bom_stripped"),
|
||||
_make_finding(tool="02_text_cleaner", id="nbsp_padding"),
|
||||
]
|
||||
app = _harness(findings)
|
||||
app.run()
|
||||
labels = [e.label for e in app.expander]
|
||||
# Pack template: "Other / file-level — {n} finding(s)"
|
||||
assert any("Other / file-level" in lbl for lbl in labels), (
|
||||
f"untargeted expander missing; got: {labels}"
|
||||
)
|
||||
|
||||
def test_untargeted_label_spanish(self):
|
||||
findings = [_make_finding(tool="", id="csv_bom_stripped")]
|
||||
app = _harness(findings, lang="es")
|
||||
app.run()
|
||||
labels = [e.label for e in app.expander]
|
||||
# Spanish pack: "Otros / a nivel de archivo — {n} hallazgo(s)"
|
||||
assert any("Otros / a nivel de archivo" in lbl for lbl in labels), (
|
||||
f"Spanish 'Other' expander missing; got: {labels}"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Severity summary
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestSeveritySummary:
|
||||
"""The panel renders a per-severity summary caption like
|
||||
``⚠️ 2 warn · ℹ️ 1 info``. We pin the icon + count rendering."""
|
||||
|
||||
def test_severity_icons_render(self):
|
||||
findings = [
|
||||
_make_finding(tool="02_text_cleaner", severity="warn"),
|
||||
_make_finding(tool="02_text_cleaner", severity="warn"),
|
||||
_make_finding(tool="03_format_standardizer", severity="info"),
|
||||
]
|
||||
app = _harness(findings)
|
||||
app.run()
|
||||
text = collected_text(app)
|
||||
# Icons live in the per-language pack ("findings.severity_*").
|
||||
# The summary template is shared between languages.
|
||||
assert "⚠️" in text or "warn" in text
|
||||
# Counts present.
|
||||
assert "2 warn" in text or "2 warn" in text
|
||||
157
tests/gui/test_gate.py
Normal file
157
tests/gui/test_gate.py
Normal file
@@ -0,0 +1,157 @@
|
||||
"""Gate tests — ``require_normalization_gate()`` behaviour.
|
||||
|
||||
The gate sits between every tool page and the user's data. Three states
|
||||
exist, each pinned here:
|
||||
|
||||
1. **No upload** — gate is a no-op; the page proceeds and its own
|
||||
uploader handles the file.
|
||||
2. **Upload but no normalization result** — gate shows a warning and a
|
||||
"Go to Review & Normalize" button, then ``st.stop()`` short-circuits
|
||||
the rest of the page.
|
||||
3. **Upload + matching passed normalization** — gate is a no-op; the
|
||||
page proceeds.
|
||||
|
||||
We exercise the gate via the Deduplicator page (any tool page would
|
||||
work; dedup is the smallest one that doesn't depend on heavy widgets).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from .conftest import (
|
||||
collected_text,
|
||||
stash_upload,
|
||||
stash_upload_without_gate,
|
||||
with_language,
|
||||
)
|
||||
|
||||
|
||||
# Deduplicator is our canary — it calls ``require_normalization_gate``
|
||||
# on the second line of the module. If the gate blocks, the dedup-
|
||||
# specific title shouldn't even render.
|
||||
GATED_PAGE = "1_Deduplicator"
|
||||
|
||||
|
||||
class TestGateNoUpload:
|
||||
"""No upload → the gate exits early and the page renders normally,
|
||||
showing its own file uploader. (This is the "user opened the dedup
|
||||
page first instead of coming from home" path.)"""
|
||||
|
||||
def test_no_upload_lets_page_render(self, app_factory):
|
||||
app = app_factory(GATED_PAGE)
|
||||
app.run()
|
||||
assert not app.exception
|
||||
text = collected_text(app)
|
||||
# The dedup page title is the unambiguous signal that the gate
|
||||
# didn't short-circuit.
|
||||
assert "Deduplicator" in text
|
||||
|
||||
def test_no_upload_no_gate_warning(self, app_factory):
|
||||
app = app_factory(GATED_PAGE)
|
||||
app.run()
|
||||
# The gate's warning string starts with the upload filename. No
|
||||
# warning should be present when there's no upload.
|
||||
for w in app.warning:
|
||||
assert "normalization gate" not in (w.body or "")
|
||||
|
||||
|
||||
class TestGateBlocksWithoutNormalization:
|
||||
"""Upload present but no passing normalization → gate fires:
|
||||
warning + Go-to-Review button + page short-circuit."""
|
||||
|
||||
def test_gate_warning_renders(self, app_factory, small_csv_bytes):
|
||||
app = app_factory(GATED_PAGE)
|
||||
stash_upload_without_gate(app, name="messy.csv", data=small_csv_bytes)
|
||||
app.run()
|
||||
warnings = [w.body for w in app.warning if w.body]
|
||||
joined = " ".join(warnings)
|
||||
assert "normalization gate" in joined, (
|
||||
f"expected gate warning; got warnings: {warnings}"
|
||||
)
|
||||
assert "messy.csv" in joined, (
|
||||
"gate warning should name the offending file"
|
||||
)
|
||||
|
||||
def test_gate_renders_go_to_review_button(self, app_factory, small_csv_bytes):
|
||||
app = app_factory(GATED_PAGE)
|
||||
stash_upload_without_gate(app, name="messy.csv", data=small_csv_bytes)
|
||||
app.run()
|
||||
labels = [b.label for b in app.button]
|
||||
assert any("Review & Normalize" in lbl for lbl in labels), (
|
||||
f"missing 'Go to Review & Normalize' button; got: {labels}"
|
||||
)
|
||||
|
||||
def test_gate_short_circuits_page(self, app_factory, small_csv_bytes):
|
||||
app = app_factory(GATED_PAGE)
|
||||
stash_upload_without_gate(app, name="messy.csv", data=small_csv_bytes)
|
||||
app.run()
|
||||
# When the gate fires it calls ``st.stop()`` after the warning.
|
||||
# The page-body widgets (e.g., the advanced-options expander, the
|
||||
# dedup-strategy widgets) must NOT be present.
|
||||
labels = [b.label for b in app.button]
|
||||
# The Run-Dedup primary action lives below the gate — make sure
|
||||
# the gate killed the render before it.
|
||||
assert not any("Run Deduplication" in lbl for lbl in labels), (
|
||||
f"gate failed to short-circuit; saw button: {labels}"
|
||||
)
|
||||
|
||||
def test_gate_warning_localizes_to_spanish(self, app_factory, small_csv_bytes):
|
||||
app = app_factory(GATED_PAGE)
|
||||
with_language(app, "es")
|
||||
stash_upload_without_gate(app, name="messy.csv", data=small_csv_bytes)
|
||||
app.run()
|
||||
warnings = " ".join(w.body for w in app.warning if w.body)
|
||||
# Spanish pack: ``debe pasar la verificación de normalización CSV``.
|
||||
assert "normalización" in warnings
|
||||
|
||||
def test_gate_button_localizes_to_spanish(self, app_factory, small_csv_bytes):
|
||||
app = app_factory(GATED_PAGE)
|
||||
with_language(app, "es")
|
||||
stash_upload_without_gate(app, name="messy.csv", data=small_csv_bytes)
|
||||
app.run()
|
||||
labels = [b.label for b in app.button]
|
||||
assert any("Revisar y Normalizar" in lbl for lbl in labels), (
|
||||
f"Spanish gate button missing; got: {labels}"
|
||||
)
|
||||
|
||||
|
||||
class TestGateAllowsWithPassedNormalization:
|
||||
"""Upload + passed normalization → gate is a no-op and the page
|
||||
renders past the gate."""
|
||||
|
||||
def test_passed_gate_lets_page_render(self, app_factory, small_csv_bytes):
|
||||
app = app_factory(GATED_PAGE)
|
||||
stash_upload(app, name="messy.csv", data=small_csv_bytes)
|
||||
app.run()
|
||||
assert not app.exception, f"page raised past gate: {app.exception}"
|
||||
# The pickup banner uses the upload name — that's our signal
|
||||
# that the gate let us through AND the pickup helper engaged.
|
||||
text = collected_text(app)
|
||||
assert "messy.csv" in text
|
||||
|
||||
|
||||
class TestGateMismatchedHash:
|
||||
"""Upload changes (different bytes) but normalization_for still
|
||||
points at the old hash → gate fires again because the result is
|
||||
stale. Pins the security-relevant "stale fix doesn't carry over to
|
||||
a new file" invariant."""
|
||||
|
||||
def test_stale_normalization_blocks_new_upload(self, app_factory, small_csv_bytes):
|
||||
app = app_factory(GATED_PAGE)
|
||||
# Stash bytes A but a normalization_for hash that points at B.
|
||||
app.session_state["home_uploaded_bytes"] = small_csv_bytes
|
||||
app.session_state["home_uploaded_name"] = "new.csv"
|
||||
app.session_state["home_uploaded_size"] = len(small_csv_bytes)
|
||||
app.session_state["normalization_for"] = "different-hash-from-an-old-upload"
|
||||
|
||||
# A passed-result object exists but is keyed to a different file.
|
||||
class _Passed:
|
||||
passed = True
|
||||
app.session_state["normalization_result"] = _Passed()
|
||||
|
||||
app.run()
|
||||
warnings = " ".join(w.body for w in app.warning if w.body)
|
||||
assert "normalization gate" in warnings, (
|
||||
"stale gate result should not unlock a new upload"
|
||||
)
|
||||
147
tests/gui/test_smoke.py
Normal file
147
tests/gui/test_smoke.py
Normal file
@@ -0,0 +1,147 @@
|
||||
"""Smoke tests: every page renders without exception in EN and ES.
|
||||
|
||||
The cheapest, highest-value GUI tests in the project. They catch:
|
||||
|
||||
- Page-level Python errors (import failures, syntax errors that
|
||||
``ast.parse`` misses because they're runtime, e.g., a missing
|
||||
attribute on a module).
|
||||
- i18n pack key drift (a string that used to render in EN now renders
|
||||
literally as ``"chrome.language_label"`` because someone renamed the
|
||||
key in en.json but forgot es.json or the call site).
|
||||
- Streamlit API churn that breaks ``set_page_config`` /
|
||||
``hide_streamlit_chrome`` on a single page.
|
||||
|
||||
What they don't cover: user interactions. Those live in the workflow
|
||||
tests.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from .conftest import collected_text, with_language
|
||||
|
||||
|
||||
# Every page that ships in the sidebar nav. Slugs match the filenames
|
||||
# under ``src/gui/pages/`` so failures point at a real file.
|
||||
PAGE_SLUGS = [
|
||||
"0_Review",
|
||||
"1_Deduplicator",
|
||||
"2_Text_Cleaner",
|
||||
"3_Format_Standardizer",
|
||||
"4_Missing_Values",
|
||||
"5_Column_Mapper",
|
||||
"6_Outlier_Detector",
|
||||
"7_Multi_File_Merger",
|
||||
"8_Validator_Reporter",
|
||||
"9_Pipeline_Runner",
|
||||
"99_Close",
|
||||
]
|
||||
|
||||
|
||||
# Substrings that must appear on each page for each language.
|
||||
#
|
||||
# v1.6 coverage reality (also documented in docs/USER-GUIDE.md §3.4):
|
||||
# only the home page, the Close page, and the shared chrome /
|
||||
# components ship Spanish strings. Per-tool page bodies are still
|
||||
# hard-coded English in both modes — translating them is tracked as a
|
||||
# follow-up. The substrings below reflect that reality: a page that
|
||||
# isn't translated yet asserts the same English substring under both
|
||||
# languages. The fact that the page *renders at all* in 'es' is still
|
||||
# the value of the smoke test.
|
||||
#
|
||||
# When a page gains real Spanish translation, flip its 'es' entry to
|
||||
# the localized substring — the test surface stays the same.
|
||||
EXPECTED_SUBSTRINGS: dict[str, dict[str, str]] = {
|
||||
"0_Review": {"en": "Review", "es": "Review"},
|
||||
"1_Deduplicator": {"en": "Deduplicator", "es": "Deduplicator"},
|
||||
"2_Text_Cleaner": {"en": "Text Cleaner", "es": "Text Cleaner"},
|
||||
"3_Format_Standardizer": {"en": "Format", "es": "Format"},
|
||||
"4_Missing_Values": {"en": "Missing", "es": "Missing"},
|
||||
"5_Column_Mapper": {"en": "Column", "es": "Column"},
|
||||
"6_Outlier_Detector": {"en": "Outlier", "es": "Outlier"},
|
||||
"7_Multi_File_Merger": {"en": "Merger", "es": "Merger"},
|
||||
"8_Validator_Reporter": {"en": "Validator", "es": "Validator"},
|
||||
"9_Pipeline_Runner": {"en": "Pipeline", "es": "Pipeline"},
|
||||
"99_Close": {"en": "Close DataTools", "es": "Cerrar DataTools"},
|
||||
}
|
||||
|
||||
|
||||
class TestHomePageRenders:
|
||||
"""The home page is the only one with full EN/ES coverage in v1.6.
|
||||
Pin it independently so its translation is non-regressable."""
|
||||
|
||||
@pytest.mark.parametrize("lang,expected", [
|
||||
("en", "DataTools — Data Cleaning Mastery"),
|
||||
("es", "DataTools — Maestría en limpieza de datos"),
|
||||
])
|
||||
def test_home_renders_in_language(self, home_app, lang, expected):
|
||||
with_language(home_app, lang)
|
||||
home_app.run()
|
||||
assert home_app.exception is None or home_app.exception == [], (
|
||||
f"home page raised: {home_app.exception}"
|
||||
)
|
||||
assert expected in collected_text(home_app)
|
||||
|
||||
def test_home_renders_footer_in_es(self, home_app):
|
||||
with_language(home_app, "es")
|
||||
home_app.run()
|
||||
text = collected_text(home_app)
|
||||
assert "Tus datos nunca salen" in text or "Se ejecuta localmente" in text
|
||||
|
||||
def test_home_tool_card_uses_es_name(self, home_app):
|
||||
"""When the home grid renders in Spanish, the dedup card title
|
||||
must use the Spanish display name, not the English fallback."""
|
||||
with_language(home_app, "es")
|
||||
home_app.run()
|
||||
text = collected_text(home_app)
|
||||
assert "Eliminador de duplicados" in text
|
||||
|
||||
|
||||
class TestEveryPageRenders:
|
||||
"""Parametrize over (page, language). Failure tells you exactly which
|
||||
page + which language broke."""
|
||||
|
||||
@pytest.mark.parametrize("slug", PAGE_SLUGS)
|
||||
@pytest.mark.parametrize("lang", ["en", "es"])
|
||||
def test_renders_without_exception(self, app_factory, slug, lang):
|
||||
app = app_factory(slug)
|
||||
with_language(app, lang)
|
||||
app.run()
|
||||
# AppTest exposes ``exception`` as a list of element-wrapped
|
||||
# exceptions (empty when no error fired).
|
||||
assert not app.exception, (
|
||||
f"page {slug!r} raised in language {lang!r}: {app.exception}"
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize("slug", PAGE_SLUGS)
|
||||
@pytest.mark.parametrize("lang", ["en", "es"])
|
||||
def test_expected_substring_present(self, app_factory, slug, lang):
|
||||
app = app_factory(slug)
|
||||
with_language(app, lang)
|
||||
app.run()
|
||||
needle = EXPECTED_SUBSTRINGS[slug][lang]
|
||||
text = collected_text(app)
|
||||
assert needle in text, (
|
||||
f"page {slug!r} ({lang!r}) missing expected substring "
|
||||
f"{needle!r}\nGot:\n{text[:500]}…"
|
||||
)
|
||||
|
||||
|
||||
class TestPageHasLanguageSelector:
|
||||
"""Every page that calls ``hide_streamlit_chrome`` should mount the
|
||||
sidebar language selector. This is the only place the picker is
|
||||
rendered — if the chrome helper stops calling it, the test fails."""
|
||||
|
||||
@pytest.mark.parametrize("slug", PAGE_SLUGS)
|
||||
def test_sidebar_selectbox_present(self, app_factory, slug):
|
||||
app = app_factory(slug)
|
||||
app.run()
|
||||
# The selector is the only sidebar selectbox we ship today; if
|
||||
# a page adds another the test should be loosened to "at least
|
||||
# one selectbox with the language label."
|
||||
assert len(app.sidebar.selectbox) >= 1, (
|
||||
f"page {slug!r} has no sidebar selectbox — "
|
||||
f"hide_streamlit_chrome() should have mounted the language "
|
||||
f"selector."
|
||||
)
|
||||
207
tests/gui/test_workflows.py
Normal file
207
tests/gui/test_workflows.py
Normal file
@@ -0,0 +1,207 @@
|
||||
"""Happy-path workflow tests for each Ready tool page.
|
||||
|
||||
These drive the GUI like a user would: pre-stash an upload + a passed
|
||||
gate, render the page, click the primary action, assert the result
|
||||
landed in session state. They catch wiring bugs that smoke tests
|
||||
can't see — e.g., a primary button mis-keyed, a result not stashed in
|
||||
session state, a page reading the wrong key.
|
||||
|
||||
Slow-ish (~0.5–2s per workflow). Sits behind the ``gui`` marker so
|
||||
``pytest -m 'not gui'`` skips them.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
|
||||
from .conftest import collected_text, stash_upload
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Deduplicator
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestDeduplicatorWorkflow:
|
||||
"""Upload → click Find Duplicates → result lands in session_state."""
|
||||
|
||||
def _setup(self, app_factory, small_csv_bytes):
|
||||
app = app_factory("1_Deduplicator")
|
||||
stash_upload(app, name="messy.csv", data=small_csv_bytes)
|
||||
return app
|
||||
|
||||
def test_upload_renders_preview(self, app_factory, small_csv_bytes):
|
||||
app = self._setup(app_factory, small_csv_bytes)
|
||||
app.run()
|
||||
text = collected_text(app)
|
||||
assert "Preview: messy.csv" in text, (
|
||||
f"upload preview header missing; got:\n{text[:500]}"
|
||||
)
|
||||
|
||||
def test_find_duplicates_button_present(self, app_factory, small_csv_bytes):
|
||||
app = self._setup(app_factory, small_csv_bytes)
|
||||
app.run()
|
||||
labels = [b.label for b in app.button]
|
||||
assert any("Find Duplicates" in lbl for lbl in labels), (
|
||||
f"primary action missing; got: {labels}"
|
||||
)
|
||||
|
||||
def test_clicking_find_duplicates_stashes_result(
|
||||
self, app_factory, small_csv_bytes,
|
||||
):
|
||||
app = self._setup(app_factory, small_csv_bytes)
|
||||
app.run()
|
||||
# Find the Find-Duplicates button and click it. AppTest's
|
||||
# button-by-key access is via ``.button(key=...)`` — we don't
|
||||
# have the key here, so locate it by label.
|
||||
target = next(b for b in app.button if "Find Duplicates" in b.label)
|
||||
target.click().run()
|
||||
# The page stores the result under ``result`` in session state.
|
||||
result = app.session_state["result"]
|
||||
assert result is not None, "Find Duplicates didn't stash a result"
|
||||
# The sample has Alice twice → one match group.
|
||||
assert len(result.match_groups) >= 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Text Cleaner
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestTextCleanerWorkflow:
|
||||
def _setup(self, app_factory, small_csv_bytes):
|
||||
app = app_factory("2_Text_Cleaner")
|
||||
stash_upload(app, name="messy.csv", data=small_csv_bytes)
|
||||
return app
|
||||
|
||||
def test_page_renders_with_upload(self, app_factory, small_csv_bytes):
|
||||
app = self._setup(app_factory, small_csv_bytes)
|
||||
app.run()
|
||||
assert not app.exception
|
||||
text = collected_text(app)
|
||||
assert "Text Cleaner" in text
|
||||
|
||||
def test_preview_or_clean_button_present(self, app_factory, small_csv_bytes):
|
||||
"""The text cleaner ships a primary action (label varies by
|
||||
version). We just assert at least one primary-looking button
|
||||
exists past the upload."""
|
||||
app = self._setup(app_factory, small_csv_bytes)
|
||||
app.run()
|
||||
# Filter out the gate-redirect button (which would only be
|
||||
# present if the gate fired, which our setup prevents).
|
||||
gate_buttons = {"Go to Review & Normalize", "Ir a Revisar y Normalizar"}
|
||||
non_gate = [b for b in app.button if b.label not in gate_buttons]
|
||||
assert non_gate, (
|
||||
f"no primary buttons rendered; got: {[b.label for b in app.button]}"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Format Standardizer
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestFormatStandardizerWorkflow:
|
||||
def test_page_renders_with_upload(self, app_factory, small_csv_bytes):
|
||||
app = app_factory("3_Format_Standardizer")
|
||||
stash_upload(app, name="messy.csv", data=small_csv_bytes)
|
||||
app.run()
|
||||
assert not app.exception
|
||||
text = collected_text(app)
|
||||
assert "Format Standardizer" in text
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Missing Value Handler
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestMissingValuesWorkflow:
|
||||
def test_page_renders_with_upload(self, app_factory, small_csv_bytes):
|
||||
app = app_factory("4_Missing_Values")
|
||||
stash_upload(app, name="messy.csv", data=small_csv_bytes)
|
||||
app.run()
|
||||
assert not app.exception
|
||||
text = collected_text(app)
|
||||
assert "Missing" in text
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Column Mapper
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestColumnMapperWorkflow:
|
||||
def test_page_renders_with_upload(self, app_factory, small_csv_bytes):
|
||||
app = app_factory("5_Column_Mapper")
|
||||
stash_upload(app, name="messy.csv", data=small_csv_bytes)
|
||||
app.run()
|
||||
assert not app.exception
|
||||
text = collected_text(app)
|
||||
assert "Column" in text
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pipeline Runner
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestPipelineRunnerWorkflow:
|
||||
def test_page_renders_with_upload(self, app_factory, small_csv_bytes):
|
||||
app = app_factory("9_Pipeline_Runner")
|
||||
stash_upload(app, name="messy.csv", data=small_csv_bytes)
|
||||
app.run()
|
||||
assert not app.exception
|
||||
text = collected_text(app)
|
||||
assert "Pipeline" in text
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Review page — special: doesn't gate on upload, has its own analyzer flow
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestReviewWorkflow:
|
||||
"""The Review page is the gate-fixer. Without an upload it shows a
|
||||
'go back to home' message. With an upload it runs the analyzer and
|
||||
shows findings."""
|
||||
|
||||
def test_no_upload_shows_back_to_home(self, app_factory):
|
||||
app = app_factory("0_Review")
|
||||
app.run()
|
||||
text = collected_text(app)
|
||||
# Page shows ``No file uploaded`` + ``Back to home``.
|
||||
assert "No file uploaded" in text or "uploaded" in text.lower()
|
||||
|
||||
def test_with_upload_shows_review_content(
|
||||
self, app_factory, small_csv_bytes,
|
||||
):
|
||||
app = app_factory("0_Review")
|
||||
# Review page only needs the upload bytes, not a pre-passed gate.
|
||||
app.session_state["home_uploaded_bytes"] = small_csv_bytes
|
||||
app.session_state["home_uploaded_name"] = "messy.csv"
|
||||
app.session_state["home_uploaded_size"] = len(small_csv_bytes)
|
||||
app.run()
|
||||
assert not app.exception
|
||||
text = collected_text(app)
|
||||
# Page ran the analyzer — either we get findings or the
|
||||
# "already clean" success message. Either way confirms the
|
||||
# analyzer pipeline ran end-to-end with the stashed bytes.
|
||||
clean_msg = "No findings to review" in text
|
||||
encoding_section = "File encoding" in text
|
||||
assert clean_msg or encoding_section, (
|
||||
f"Review page didn't surface analyzer output; got:\n{text[:400]}"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Coming-Soon pages still render (just a stub) — pinned so we know if a
|
||||
# Coming-Soon goes from "stub renders" to "import error".
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@pytest.mark.parametrize("slug,name", [
|
||||
("6_Outlier_Detector", "Outlier"),
|
||||
("7_Multi_File_Merger", "Merger"),
|
||||
("8_Validator_Reporter", "Validator"),
|
||||
])
|
||||
class TestComingSoonStubs:
|
||||
def test_stub_renders(self, app_factory, slug, name):
|
||||
app = app_factory(slug)
|
||||
app.run()
|
||||
assert not app.exception
|
||||
text = collected_text(app)
|
||||
assert name in text
|
||||
Reference in New Issue
Block a user