diff --git a/pytest.ini b/pytest.ini index e93a5da..09177d9 100644 --- a/pytest.ini +++ b/pytest.ini @@ -12,6 +12,7 @@ markers = e2e: end-to-end CLI / integration tests install: import / dependency sanity tests fixture_sweep: parametrized sweep over the test-cases/ folder + gui: Streamlit AppTest-driven tests (live in tests/gui/) # Warnings discipline: fail on unexpected DeprecationWarning from our own # code, but tolerate third-party deprecations that we can't fix. diff --git a/tests/gui/__init__.py b/tests/gui/__init__.py new file mode 100644 index 0000000..52c3274 --- /dev/null +++ b/tests/gui/__init__.py @@ -0,0 +1,6 @@ +"""GUI tests — Streamlit AppTest-driven coverage of pages/, components/, and i18n. + +Marked with ``@pytest.mark.gui`` so a quick core-only run can skip them +via ``pytest -m 'not gui'``. The default ``pytest`` invocation still runs +everything; the marker is opt-out, not opt-in. +""" diff --git a/tests/gui/_findings_panel_harness.py b/tests/gui/_findings_panel_harness.py new file mode 100644 index 0000000..0513b8c --- /dev/null +++ b/tests/gui/_findings_panel_harness.py @@ -0,0 +1,44 @@ +"""Test harness page for ``render_findings_panel``. + +A standalone Streamlit page module the AppTest layer can drive +directly. Renders the findings panel with whatever findings live in +``st.session_state["test_findings"]`` so test code can inject a list +and inspect what's rendered, without having to fake a file_uploader +widget. + +Lives next to its test file so it ships with the GUI test layer and +never gets confused with a real page. +""" + +from __future__ import annotations + +import sys +from pathlib import Path + +import streamlit as st + +# Same sys.path bootstrap as the real pages so ``src.*`` imports work +# regardless of how AppTest invokes the script. +_project_root = Path(__file__).resolve().parent.parent.parent +if str(_project_root) not in sys.path: + sys.path.insert(0, str(_project_root)) + + +# ``st.page_link`` requires a multipage-app context (Streamlit looks up +# the target page's URL from the app's PagesManager). AppTest doesn't +# wire that up for a standalone page, so any ``page_link`` call raises +# ``KeyError: 'url_pathname'`` here. We swap it for a markdown stub +# that renders the label inline — same observable text, no nav, no +# crash. +def _page_link_stub(page: str, *, label: str, **_kwargs) -> None: + st.markdown(f"[{label}]") + +st.page_link = _page_link_stub # type: ignore[assignment] + +from src.gui.components import hide_streamlit_chrome, render_findings_panel + +st.set_page_config(page_title="findings test", page_icon="🧪", layout="wide") +hide_streamlit_chrome() + +findings = st.session_state.get("test_findings", []) +render_findings_panel(findings) diff --git a/tests/gui/conftest.py b/tests/gui/conftest.py new file mode 100644 index 0000000..5e98ef9 --- /dev/null +++ b/tests/gui/conftest.py @@ -0,0 +1,204 @@ +"""Shared fixtures for the GUI test layer. + +Streamlit's ``AppTest.from_file`` runs a page module in-process inside +its own ScriptRunContext. Each fixture here returns either bytes (for +the upload-session-state path) or a configured ``AppTest`` ready to +``.run()``. Tests should NOT call ``AppTest.from_file`` directly so the +project-root path and the default session-state stash live in one place. + +Page paths are resolved relative to the repo root so the test suite +works from any cwd (e.g., ``pytest`` from a subdir, IDE runners). +""" + +from __future__ import annotations + +import hashlib +from pathlib import Path +from typing import Iterable + +import pandas as pd +import pytest + +from streamlit.testing.v1 import AppTest + + +# --------------------------------------------------------------------------- +# Paths +# --------------------------------------------------------------------------- + +PROJECT_ROOT: Path = Path(__file__).resolve().parent.parent.parent +PAGES_DIR: Path = PROJECT_ROOT / "src" / "gui" / "pages" +APP_PATH: Path = PROJECT_ROOT / "src" / "gui" / "app.py" +SAMPLES_DIR: Path = PROJECT_ROOT / "samples" + + +# All GUI tests get the marker automatically so a single +# ``pytest -m 'not gui'`` skips the whole subdir. +def pytest_collection_modifyitems(config, items): + for item in items: + # Only mark items collected from this subdir. + if "tests/gui/" in str(item.fspath).replace("\\", "/"): + item.add_marker(pytest.mark.gui) + + +# --------------------------------------------------------------------------- +# Sample data +# --------------------------------------------------------------------------- + +@pytest.fixture +def messy_sales_bytes() -> bytes: + """Raw bytes of the ``messy_sales.csv`` sample (mixed dup + text noise).""" + return (SAMPLES_DIR / "messy_sales.csv").read_bytes() + + +@pytest.fixture +def small_csv_bytes() -> bytes: + """A tiny CSV the tool pages can chew through fast — three rows, three + columns, one obvious duplicate. Suitable for happy-path workflow tests + where wall-clock matters.""" + return ( + b"name,email,phone\n" + b"Alice,alice@gmail.com,5551234567\n" + b"Alice,Alice@Gmail.com,(555) 123-4567\n" + b"Bob,bob@example.com,5559876543\n" + ) + + +@pytest.fixture +def small_csv_df(small_csv_bytes) -> pd.DataFrame: + """The ``small_csv_bytes`` parsed — for tests that need the DataFrame + form (e.g., direct ``deduplicate()`` calls within a test).""" + import io + return pd.read_csv(io.BytesIO(small_csv_bytes), dtype=str, keep_default_na=False) + + +# --------------------------------------------------------------------------- +# AppTest builders +# --------------------------------------------------------------------------- + +def _new_app(page_path: Path) -> AppTest: + """Build an AppTest pointed at *page_path*. Keeps the project-root / + page-resolution boilerplate out of the test bodies.""" + if not page_path.exists(): + raise FileNotFoundError(f"GUI test target missing: {page_path}") + return AppTest.from_file(str(page_path)) + + +@pytest.fixture +def home_app() -> AppTest: + """A fresh AppTest pointed at the home page (``src/gui/app.py``).""" + return _new_app(APP_PATH) + + +@pytest.fixture +def app_factory(): + """Callable returning a fresh AppTest for any page slug. + + Usage:: + + app = app_factory("1_Deduplicator") + app.run() + """ + def _make(slug: str) -> AppTest: + return _new_app(PAGES_DIR / f"{slug}.py") + return _make + + +# --------------------------------------------------------------------------- +# Upload-session helpers +# --------------------------------------------------------------------------- + +def stash_upload(app: AppTest, *, name: str, data: bytes) -> str: + """Pre-populate the home-screen upload stash + the gate's normalisation + result so a tool page renders past ``require_normalization_gate()``. + + Returns the SHA-256 hex of *data* (used as the gate key) in case the + test wants to assert against it. + + The gate checks:: + + - ``home_uploaded_bytes`` is set + - ``normalization_for == sha256(home_uploaded_bytes)`` + - ``normalization_result.passed is True`` + + We synthesise a passing result via a tiny stub object that satisfies + the gate's only attribute access (``.passed``). Tests that want to + exercise gate-blocking behaviour should NOT call this helper — they + should stash bytes without the normalisation result. + """ + sha = hashlib.sha256(data).hexdigest() + app.session_state["home_uploaded_bytes"] = data + app.session_state["home_uploaded_name"] = name + app.session_state["home_uploaded_size"] = len(data) + app.session_state["normalization_for"] = sha + app.session_state["normalization_result"] = _PassedGateResult() + return sha + + +class _PassedGateResult: + """Minimal stand-in for the real NormalizationResult shape — the gate + only reads ``.passed``. Using a real NormalizationResult here would + pull in core.normalize and tie GUI tests to its constructor surface. + """ + + passed: bool = True + + +def stash_upload_without_gate(app: AppTest, *, name: str, data: bytes) -> None: + """Stash the upload bytes but do NOT pre-pass the gate. Used by gate + tests that want the warning + Go-to-Review button to appear.""" + app.session_state["home_uploaded_bytes"] = data + app.session_state["home_uploaded_name"] = name + app.session_state["home_uploaded_size"] = len(data) + + +# --------------------------------------------------------------------------- +# i18n helpers +# --------------------------------------------------------------------------- + +def with_language(app: AppTest, lang: str) -> None: + """Set the active GUI language *before* ``app.run()``. The selector + widget reads ``_ui_lang_select`` on first render; we set the canonical + ``ui_lang`` key (what ``current_language()`` reads) directly so the + first render is already in the chosen language.""" + app.session_state["ui_lang"] = lang + + +def collected_text(app: AppTest) -> str: + """Flatten every text-bearing element on the page into one big string + so a test can assert ``"Maestría" in collected_text(app)`` without + juggling individual widget collections. + + Covers: title, header, subheader, caption, markdown, info, warning, + error, success, code, metric labels/values, button labels, expander + labels, page_link labels. Doesn't try to be exhaustive — if a widget + type isn't here, add it (cheap, mechanical). + """ + chunks: list[str] = [] + + def _extend(seq: Iterable, attrs: tuple[str, ...] = ("value", "label", "body")) -> None: + for el in seq: + for a in attrs: + v = getattr(el, a, None) + if isinstance(v, str) and v: + chunks.append(v) + break # one value per element is enough + + _extend(app.title) + _extend(app.header) + _extend(app.subheader) + _extend(app.caption) + _extend(app.markdown) + _extend(app.info) + _extend(app.warning) + _extend(app.error) + _extend(app.success) + _extend(app.button) + # Sidebar caption / markdown / button (Streamlit exposes the sidebar + # as a sub-tree with the same widget collections). + if hasattr(app, "sidebar"): + sb = app.sidebar + _extend(sb.markdown) + _extend(sb.caption) + _extend(sb.button) + return "\n".join(chunks) diff --git a/tests/gui/test_advanced_panels.py b/tests/gui/test_advanced_panels.py new file mode 100644 index 0000000..ae0b200 --- /dev/null +++ b/tests/gui/test_advanced_panels.py @@ -0,0 +1,194 @@ +"""Advanced-options panel tests. + +``config_panel`` (in ``src.gui.components``) is the dedup-page's +expander that houses every per-column / per-strategy knob. It's the +densest single widget surface in the GUI, so a session-state key drift +in there cascades into every dedup session. + +We exercise it via the Deduplicator page (rendering ``config_panel`` +in isolation requires a fake Streamlit context). The page provides +the surrounding state; we poke widgets and verify their effects. +""" + +from __future__ import annotations + +import pandas as pd +import pytest + +from .conftest import stash_upload + + +GATED_PAGE = "1_Deduplicator" + + +def _render_page(app_factory, small_csv_bytes): + app = app_factory(GATED_PAGE) + stash_upload(app, name="messy.csv", data=small_csv_bytes) + app.run() + return app + + +# --------------------------------------------------------------------------- +# Expander presence + collapsed state +# --------------------------------------------------------------------------- + +class TestAdvancedExpander: + def test_advanced_options_expander_renders(self, app_factory, small_csv_bytes): + app = _render_page(app_factory, small_csv_bytes) + labels = [e.label for e in app.expander] + assert any("Advanced Options" in lbl for lbl in labels), ( + f"Advanced Options expander missing; expanders: {labels}" + ) + + +# --------------------------------------------------------------------------- +# Algorithm selector +# --------------------------------------------------------------------------- + +class TestAlgorithmSelector: + """The fuzzy-algorithm dropdown drives ``Algorithm.{LEVENSHTEIN, + JARO_WINKLER, TOKEN_SET_RATIO}`` on every column. Default value + must be jaro_winkler — the strong-key build_default_strategies + assumes it.""" + + def test_default_algorithm_is_jaro_winkler(self, app_factory, small_csv_bytes): + app = _render_page(app_factory, small_csv_bytes) + # Find the selectbox by label. + algo_boxes = [ + sb for sb in app.selectbox + if sb.label == "Fuzzy algorithm" + ] + assert len(algo_boxes) == 1 + assert algo_boxes[0].value == "jaro_winkler" + + def test_algorithm_options_complete(self, app_factory, small_csv_bytes): + app = _render_page(app_factory, small_csv_bytes) + algo = next(sb for sb in app.selectbox if sb.label == "Fuzzy algorithm") + assert set(algo.options) == { + "jaro_winkler", "levenshtein", "token_set_ratio", + } + + +# --------------------------------------------------------------------------- +# Threshold slider +# --------------------------------------------------------------------------- + +class TestThresholdSlider: + def test_default_threshold_is_85(self, app_factory, small_csv_bytes): + app = _render_page(app_factory, small_csv_bytes) + thresholds = [s for s in app.slider if "Similarity" in (s.label or "")] + assert len(thresholds) == 1 + assert thresholds[0].value == 85 + + def test_threshold_bounds(self, app_factory, small_csv_bytes): + app = _render_page(app_factory, small_csv_bytes) + thr = next(s for s in app.slider if "Similarity" in (s.label or "")) + assert thr.min == 50 + assert thr.max == 100 + + +# --------------------------------------------------------------------------- +# Survivor rule selector +# --------------------------------------------------------------------------- + +class TestSurvivorSelector: + def test_default_is_first(self, app_factory, small_csv_bytes): + app = _render_page(app_factory, small_csv_bytes) + surv = next( + sb for sb in app.selectbox + if sb.label == "Survivor rule" + ) + assert surv.value == "first" + + def test_all_four_rules_offered(self, app_factory, small_csv_bytes): + app = _render_page(app_factory, small_csv_bytes) + surv = next(sb for sb in app.selectbox if sb.label == "Survivor rule") + assert set(surv.options) == { + "first", "last", "most-complete", "most-recent", + } + + def test_selecting_most_recent_does_not_crash( + self, app_factory, small_csv_bytes, + ): + """When ``most-recent`` is chosen the page should reveal a + Date column dropdown. Pin the no-crash invariant.""" + app = _render_page(app_factory, small_csv_bytes) + surv = next(sb for sb in app.selectbox if sb.label == "Survivor rule") + surv.select("most-recent").run() + assert not app.exception + + +# --------------------------------------------------------------------------- +# Merge checkbox +# --------------------------------------------------------------------------- + +class TestMergeCheckbox: + def test_merge_default_off(self, app_factory, small_csv_bytes): + app = _render_page(app_factory, small_csv_bytes) + merge_boxes = [c for c in app.checkbox if c.label == "Merge mode"] + assert len(merge_boxes) == 1 + assert merge_boxes[0].value is False + + def test_toggling_merge_does_not_crash( + self, app_factory, small_csv_bytes, + ): + app = _render_page(app_factory, small_csv_bytes) + merge = next(c for c in app.checkbox if c.label == "Merge mode") + merge.check().run() + assert not app.exception + # After checking, the value persists in session_state via the + # widget's own key. + + +# --------------------------------------------------------------------------- +# Column multiselects +# --------------------------------------------------------------------------- + +class TestColumnMultiselects: + """Match-on / Strong-keys / Fuzzy multiselects use ``st.multiselect`` + on every column. Empty default = auto-detect.""" + + def test_three_multiselects_present(self, app_factory, small_csv_bytes): + app = _render_page(app_factory, small_csv_bytes) + labels = {m.label for m in app.multiselect} + assert {"Match on columns", "Strong keys", "Fuzzy columns"} <= labels + + def test_defaults_are_empty(self, app_factory, small_csv_bytes): + app = _render_page(app_factory, small_csv_bytes) + for ms in app.multiselect: + if ms.label in { + "Match on columns", "Strong keys", "Fuzzy columns", + }: + assert ms.value == [], ( + f"{ms.label!r} default should be []; got {ms.value}" + ) + + def test_options_match_dataframe_columns(self, app_factory, small_csv_bytes): + app = _render_page(app_factory, small_csv_bytes) + df_cols = list(app.session_state["df"].columns) + for ms in app.multiselect: + if ms.label in { + "Match on columns", "Strong keys", "Fuzzy columns", + }: + assert list(ms.options) == df_cols + + +# --------------------------------------------------------------------------- +# Save / Load config +# --------------------------------------------------------------------------- + +class TestConfigSaveLoadButtons: + def test_save_settings_button_present(self, app_factory, small_csv_bytes): + app = _render_page(app_factory, small_csv_bytes) + labels = [b.label for b in app.button] + assert any("Save current settings" in lbl for lbl in labels) + + def test_config_file_uploader_present(self, app_factory, small_csv_bytes): + app = _render_page(app_factory, small_csv_bytes) + # AppTest exposes uploaders via ``app.file_uploader``. There + # are two: the main file (pickup-or-upload) and the config + # JSON. Make sure the config one is there. + labels = [u.label for u in app.file_uploader] + assert any("Load config profile" in lbl for lbl in labels), ( + f"config uploader missing; uploaders: {labels}" + ) diff --git a/tests/gui/test_chrome.py b/tests/gui/test_chrome.py new file mode 100644 index 0000000..537f3ea --- /dev/null +++ b/tests/gui/test_chrome.py @@ -0,0 +1,181 @@ +"""Chrome tests — language selector, hide_streamlit_chrome, quit flow. + +These verify the GUI plumbing that every page depends on. Failures here +cascade into every other page, so they run cheap and run first +(alphabetical name ordering after smoke). +""" + +from __future__ import annotations + +import pytest + +from .conftest import collected_text, with_language + + +# --------------------------------------------------------------------------- +# hide_streamlit_chrome mounts the selector +# --------------------------------------------------------------------------- + +class TestHideChromeMountsSelector: + """``hide_streamlit_chrome()`` is the one place the language selector + is mounted. Every page that hides chrome (= every page) must get + exactly one sidebar selectbox with the i18n label.""" + + def test_home_has_one_sidebar_selectbox(self, home_app): + home_app.run() + # Only one selectbox in the sidebar today; if a page adds + # another, this becomes a weaker bound. + assert len(home_app.sidebar.selectbox) == 1, ( + "expected exactly one sidebar selectbox (the language picker); " + f"got {len(home_app.sidebar.selectbox)}" + ) + + def test_selector_label_is_localized(self, home_app): + with_language(home_app, "es") + home_app.run() + labels = [sb.label for sb in home_app.sidebar.selectbox] + assert "Idioma" in labels, ( + f"Spanish selector should be labelled 'Idioma'; got {labels}" + ) + + def test_selector_label_english_default(self, home_app): + home_app.run() # no with_language → default = en + labels = [sb.label for sb in home_app.sidebar.selectbox] + assert "Language" in labels + + +# --------------------------------------------------------------------------- +# Language selector switches session state +# --------------------------------------------------------------------------- + +class TestLanguageSwitch: + """Picking 'es' in the selector flips ``st.session_state['ui_lang']`` + and re-renders the page with Spanish strings on the next run.""" + + def test_default_language_is_english(self, home_app): + home_app.run() + # AppTest's session_state proxy doesn't implement .get(); use + # membership check + attribute access. Absence == default ("en"). + lang = home_app.session_state["ui_lang"] if "ui_lang" in home_app.session_state else "en" + assert lang == "en" + text = collected_text(home_app) + assert "Data Cleaning Mastery" in text + + def test_selecting_spanish_persists_in_session(self, home_app): + home_app.run() + selector = home_app.sidebar.selectbox[0] + selector.select("es").run() + assert home_app.session_state["ui_lang"] == "es" + + def test_selecting_spanish_re_renders_in_spanish(self, home_app): + home_app.run() + selector = home_app.sidebar.selectbox[0] + selector.select("es").run() + text = collected_text(home_app) + assert "Maestría" in text, ( + "after selecting Spanish, the home title should switch to " + f"'🧹 DataTools — Maestría…'; got:\n{text[:300]}" + ) + + def test_selecting_back_to_english_reverts(self, home_app): + # Start in Spanish, then flip back. + with_language(home_app, "es") + home_app.run() + assert "Maestría" in collected_text(home_app) + + selector = home_app.sidebar.selectbox[0] + selector.select("en").run() + text = collected_text(home_app) + assert "Data Cleaning Mastery" in text + assert "Maestría" not in text + + +# --------------------------------------------------------------------------- +# Footer + page_title localization +# --------------------------------------------------------------------------- + +class TestLocalizedChrome: + """A spot-check on the parts of the chrome that aren't the selector: + the bottom footer caption and the home-page hero text. Other strings + are pinned indirectly by ``TestEveryPageRenders.test_expected_*``.""" + + def test_footer_english(self, home_app): + home_app.run() + text = collected_text(home_app) + assert "Your data never leaves" in text + + def test_footer_spanish(self, home_app): + with_language(home_app, "es") + home_app.run() + text = collected_text(home_app) + assert "Tus datos nunca salen" in text + + def test_upload_section_heading_localizes(self, home_app): + with_language(home_app, "es") + home_app.run() + text = collected_text(home_app) + # ``📤 Sube un archivo para empezar`` from the es pack. + assert "Sube un archivo" in text + + +# --------------------------------------------------------------------------- +# Quit / Close page +# --------------------------------------------------------------------------- + +class TestQuitButtonRenders: + """The Close page must show the localized title, body, and the + Close-the-app button. We don't actually click the button — that + would call ``os._exit(0)`` and kill the test process. We only + assert the button is present and its label is localized.""" + + def test_close_page_english(self, app_factory): + app = app_factory("99_Close") + app.run() + text = collected_text(app) + assert "Close DataTools" in text + labels = [b.label for b in app.button] + assert any("Close the app" in lbl for lbl in labels), ( + f"Close-the-app button missing; buttons: {labels}" + ) + + def test_close_page_spanish(self, app_factory): + app = app_factory("99_Close") + with_language(app, "es") + app.run() + text = collected_text(app) + assert "Cerrar DataTools" in text + labels = [b.label for b in app.button] + assert any("Cerrar la app" in lbl for lbl in labels), ( + f"Spanish Close button missing; buttons: {labels}" + ) + + def test_close_body_describes_unsaved_work_warning_es(self, app_factory): + app = app_factory("99_Close") + with_language(app, "es") + app.run() + text = collected_text(app) + assert "trabajo sin guardar" in text + + +# --------------------------------------------------------------------------- +# Tool cards use localized names on the home grid +# --------------------------------------------------------------------------- + +class TestHomeToolGridLocalization: + """The home grid pulls tool display names through ``tool_name()`` in + ``tools_registry``. The Spanish pack provides translations for every + tool id; a regression in that wiring would make Spanish users see + English names. Pin a few representative ones.""" + + @pytest.mark.parametrize("needle", [ + "Eliminador de duplicados", + "Limpiador de texto", + "Estandarizador de formatos", + "Gestor de valores faltantes", + "Mapeador de columnas", + ]) + def test_es_tool_name_on_home_grid(self, home_app, needle): + with_language(home_app, "es") + home_app.run() + text = collected_text(home_app) + assert needle in text, f"missing localized tool name {needle!r}" diff --git a/tests/gui/test_dedup_review.py b/tests/gui/test_dedup_review.py new file mode 100644 index 0000000..bcb4ae0 --- /dev/null +++ b/tests/gui/test_dedup_review.py @@ -0,0 +1,205 @@ +"""Dedup review widget tests. + +``match_group_card`` from ``src.gui.components`` has two modes (decided +/ undecided) and a Confirm/Undo flow keyed by session_state. We test +each state by exercising the parent Deduplicator page end to end and +then poking at ``review_decisions`` directly. + +Why not unit-test ``match_group_card`` in isolation? AppTest needs a +real page module, not a function call, so we drive the page and verify +the side effects on session_state. This catches integration bugs the +unit test couldn't see (e.g., session-state key drift between the +page and the component). +""" + +from __future__ import annotations + +import pandas as pd +import pytest + +from .conftest import collected_text, stash_upload + + +# We need a frame that produces at least one match group. The 3-row +# small_csv has two Alice rows that share an email (case-folded) → one +# group of two members. +def _run_with_results(app): + """Drive the page through to the post-Find-Duplicates state. + + 1. First ``run()`` — page picks up the stashed upload, reads it, and + renders the preview + Find Duplicates button. ``result`` is None. + 2. Click Find Duplicates and ``run()`` again — page calls + ``deduplicate()`` and stashes the result. Match group cards + render on this pass. + + Mirrors what a real user does instead of trying to short-circuit + the page by stashing ``result`` directly (the page resets it to + None on every new upload). + """ + app.run() + target = next(b for b in app.button if "Find Duplicates" in b.label) + target.click().run() + + +class TestMatchGroupCardUndecided: + """A freshly-found group has no decision → the card renders the + interactive editor + Confirm button.""" + + def test_card_expander_present(self, app_factory, small_csv_bytes): + app = app_factory("1_Deduplicator") + stash_upload(app, name="messy.csv", data=small_csv_bytes) + _run_with_results(app) + # An expander per group. The dedup result should produce + # exactly one match group on this fixture. + result = app.session_state["result"] + assert len(result.match_groups) >= 1, ( + "fixture should produce at least one match group" + ) + # Match group cards use ``st.expander``. AppTest exposes them + # via ``app.expander``. + labels = [e.label for e in app.expander] + assert any("Group 1" in lbl for lbl in labels), ( + f"undecided card expander missing; got: {labels}" + ) + + def test_confirm_button_renders_for_undecided_group( + self, app_factory, small_csv_bytes, + ): + app = app_factory("1_Deduplicator") + stash_upload(app, name="messy.csv", data=small_csv_bytes) + _run_with_results(app) + # Confirm button is keyed ``confirm_``. + result = app.session_state["result"] + gid = result.match_groups[0].group_id + labels = [b.label for b in app.button] + # Streamlit renders the button label as "Confirm". + assert any(lbl == "Confirm" for lbl in labels), ( + f"undecided card missing Confirm button; buttons: {labels}" + ) + + +class TestBatchActions: + """Accept All / Reject All / Clear Decisions are the three batch + buttons that mutate ``review_decisions`` across all groups.""" + + def test_accept_all_populates_decisions(self, app_factory, small_csv_bytes): + app = app_factory("1_Deduplicator") + stash_upload(app, name="messy.csv", data=small_csv_bytes) + _run_with_results(app) + + target = next(b for b in app.button if b.label == "Accept All") + target.click().run() + decisions = app.session_state["review_decisions"] + result = app.session_state["result"] + assert len(decisions) == len(result.match_groups), ( + "Accept All should record a decision per group; " + f"got {len(decisions)} decisions for " + f"{len(result.match_groups)} groups" + ) + # Each Accept-All decision keeps exactly one row (the survivor). + for d in decisions.values(): + assert len(d["keep_indices"]) == 1 + + def test_reject_all_keeps_every_member(self, app_factory, small_csv_bytes): + app = app_factory("1_Deduplicator") + stash_upload(app, name="messy.csv", data=small_csv_bytes) + _run_with_results(app) + + target = next(b for b in app.button if b.label == "Reject All") + target.click().run() + decisions = app.session_state["review_decisions"] + result = app.session_state["result"] + # Reject = keep every member → keep_indices == row_indices. + for g in result.match_groups: + assert set(decisions[g.group_id]["keep_indices"]) == set(g.row_indices) + + def test_clear_decisions_wipes_state(self, app_factory, small_csv_bytes): + app = app_factory("1_Deduplicator") + stash_upload(app, name="messy.csv", data=small_csv_bytes) + _run_with_results(app) + + # Populate decisions via Accept All, then Clear, then verify. + accept = next(b for b in app.button if b.label == "Accept All") + accept.click().run() + assert app.session_state["review_decisions"], ( + "precondition failed: Accept All didn't populate" + ) + + clear = next(b for b in app.button if "Clear Decisions" in b.label) + clear.click().run() + assert app.session_state["review_decisions"] == {} + + +class TestApplyReviewDecisions: + """The component-layer ``apply_review_decisions`` function is the + actual semantic engine; unit-test it directly. The GUI just feeds + its output to a download button.""" + + def test_keep_all_means_no_rows_removed( + self, app_factory, small_csv_bytes, + ): + from src.gui.components import apply_review_decisions + from src.core import deduplicate + import io + + df = pd.read_csv(io.BytesIO(small_csv_bytes), dtype=str, keep_default_na=False) + result = deduplicate(df, preview=True) + decisions = { + g.group_id: { + "keep_indices": list(g.row_indices), + "overrides": {}, + } + for g in result.match_groups + } + deduped, removed = apply_review_decisions(df, result.match_groups, decisions) + assert len(deduped) == len(df), ( + "Keep-All should preserve every row" + ) + assert removed.empty + + def test_merge_decision_drops_losers( + self, app_factory, small_csv_bytes, + ): + from src.gui.components import apply_review_decisions + from src.core import deduplicate + import io + + df = pd.read_csv(io.BytesIO(small_csv_bytes), dtype=str, keep_default_na=False) + result = deduplicate(df, preview=True) + # Merge each group to its first member. + decisions = { + g.group_id: { + "keep_indices": [g.row_indices[0]], + "overrides": {}, + } + for g in result.match_groups + } + deduped, removed = apply_review_decisions(df, result.match_groups, decisions) + expected_removed = sum(len(g.row_indices) - 1 for g in result.match_groups) + assert len(removed) == expected_removed + assert len(deduped) == len(df) - expected_removed + + def test_column_override_applies_to_survivor( + self, app_factory, small_csv_bytes, + ): + from src.gui.components import apply_review_decisions + from src.core import deduplicate + import io + + df = pd.read_csv(io.BytesIO(small_csv_bytes), dtype=str, keep_default_na=False) + result = deduplicate(df, preview=True) + group = result.match_groups[0] + survivor = group.row_indices[0] + decisions = { + group.group_id: { + "keep_indices": [survivor], + "overrides": {"phone": "OVERRIDE_VALUE"}, + } + } + deduped, _ = apply_review_decisions(df, result.match_groups, decisions) + # The survivor row in ``deduped`` must carry the override. Find + # it via the original (non-loser) name. + match = deduped[deduped["phone"] == "OVERRIDE_VALUE"] + assert len(match) == 1, ( + f"override didn't apply; deduped frame: {deduped.to_dict()}" + ) diff --git a/tests/gui/test_errors.py b/tests/gui/test_errors.py new file mode 100644 index 0000000..22abde4 --- /dev/null +++ b/tests/gui/test_errors.py @@ -0,0 +1,103 @@ +"""Error-display tests. + +Tool pages catch core exceptions (via ``format_for_user``) and surface +them through ``st.error``. We verify that the message structure makes +it through the GUI layer, not just that it gets raised by core (the +core tests already cover that). + +These tests deliberately feed garbage bytes / malformed content and +check the rendered error, not just that the page didn't crash. +""" + +from __future__ import annotations + +import pytest + +from .conftest import collected_text, stash_upload + + +# --------------------------------------------------------------------------- +# Malformed upload +# --------------------------------------------------------------------------- + +class TestMalformedUploadErrors: + """Bytes that look like a CSV but aren't parseable. The Deduplicator + page wraps ``read_file`` failures in an ``st.error`` with the file + name and the structured ``format_for_user`` output.""" + + @pytest.fixture + def garbage_bytes(self) -> bytes: + """Binary garbage with embedded NULs and non-UTF-8 sequences — + triggers the gate's repair pipeline failures, ultimately + produces a parse error on the dedup page if it makes it that + far. We bypass the gate so the dedup page sees it raw.""" + return b"\xff\xfe\x00\x01\x02garbage,without,structure\n\x00\xff" * 50 + + def test_garbage_bytes_do_not_crash_dedup( + self, app_factory, garbage_bytes, + ): + app = app_factory("1_Deduplicator") + stash_upload(app, name="garbage.csv", data=garbage_bytes) + app.run() + # The page should either render an error OR successfully parse + # the bytes as text (the gate has been pre-passed, so the + # pre-parse repair didn't run on this fixture). We just need + # no uncaught Python exception. + assert not app.exception + + +# --------------------------------------------------------------------------- +# Empty upload +# --------------------------------------------------------------------------- + +class TestEmptyUpload: + """Zero-byte upload — must be handled gracefully.""" + + def test_empty_bytes_renders(self, app_factory): + app = app_factory("1_Deduplicator") + stash_upload(app, name="empty.csv", data=b"") + app.run() + # Either: (a) we render an error, or (b) we render the page + # with no preview. Either is acceptable — what's NOT is an + # uncaught Python exception bubbling up. + assert not app.exception + + +# --------------------------------------------------------------------------- +# Single-column file +# --------------------------------------------------------------------------- + +class TestSingleColumnFile: + """A 1-column CSV is technically valid but produces no auto-detect + strategies. The page must explain this to the user rather than + silently producing zero match groups.""" + + def test_single_column_does_not_crash(self, app_factory): + app = app_factory("1_Deduplicator") + data = b"only_col\nvalue1\nvalue2\nvalue3\n" + stash_upload(app, name="single.csv", data=data) + app.run() + assert not app.exception + + +# --------------------------------------------------------------------------- +# Header collision in column_mapper +# --------------------------------------------------------------------------- + +class TestColumnMapperDuplicateTarget: + """The column mapper rejects mappings where two source columns + point at the same target. This is surfaced as an error. + + Test approach: ``map_columns`` validates upfront via core, and + raises ``InputValidationError`` — the GUI wraps it. We invoke the + core function directly to pin the validation contract.""" + + def test_duplicate_target_raises(self): + import pandas as pd + from src.core.column_mapper import map_columns, MapOptions + from src.core.errors import InputValidationError + + df = pd.DataFrame({"a": [1, 2], "b": [3, 4]}) + opts = MapOptions(mapping={"a": "name", "b": "name"}) + with pytest.raises(InputValidationError): + map_columns(df, opts) diff --git a/tests/gui/test_findings_panel.py b/tests/gui/test_findings_panel.py new file mode 100644 index 0000000..7a82751 --- /dev/null +++ b/tests/gui/test_findings_panel.py @@ -0,0 +1,227 @@ +"""Findings panel rendering tests. + +``render_findings_panel`` is the central widget on the home page and +the Review page; failures here cascade into the user's first +impression. We drive it via a tiny test harness page +(``_findings_panel_harness.py``) so the test can inject findings +directly into session state — no file_uploader simulation needed. + +We verify: + +- Empty findings list → localized "no issues" success message. +- Findings with tool ids → one expander per tool, labeled in the + active language. +- Header + severity summary render at the top. +- Untargeted findings land in the "Other / file-level" expander. + +Pack-key parity is already pinned by ``test_lang_packs.py``; this +file pins the call sites instead. +""" + +from __future__ import annotations + +from pathlib import Path + +import pandas as pd +import pytest + +from streamlit.testing.v1 import AppTest + +from .conftest import PROJECT_ROOT, collected_text, with_language + +HARNESS_PATH = Path(__file__).resolve().parent / "_findings_panel_harness.py" + + +def _harness(findings, lang: str = "en") -> AppTest: + """Build an AppTest of the harness page with ``findings`` pre-stashed.""" + app = AppTest.from_file(str(HARNESS_PATH)) + app.session_state["test_findings"] = findings + if lang != "en": + app.session_state["ui_lang"] = lang + return app + + +def _make_finding(tool: str = "", **overrides): + """Build a minimal :class:`Finding` object. ``Finding`` is a frozen + dataclass; constructor signature is well-pinned by core tests, so + we use it directly here rather than building dicts.""" + from src.core.analyze import Finding + kwargs = dict( + id="test_finding", + severity="warn", + tool=tool, + count=1, + description="A test finding.", + column=None, + samples=[], + confidence="medium", + fix_action="", + ) + kwargs.update(overrides) + return Finding(**kwargs) + + +# --------------------------------------------------------------------------- +# Empty findings → success message +# --------------------------------------------------------------------------- + +class TestEmptyFindings: + def test_empty_renders_no_issues_english(self): + app = _harness([]) + app.run() + text = collected_text(app) + assert "No issues detected" in text + + def test_empty_renders_no_issues_spanish(self): + app = _harness([], lang="es") + app.run() + text = collected_text(app) + assert "No se detectaron problemas" in text + + +# --------------------------------------------------------------------------- +# Header text +# --------------------------------------------------------------------------- + +class TestHeader: + def test_header_english(self): + app = _harness([_make_finding(tool="02_text_cleaner")]) + app.run() + text = collected_text(app) + assert "Detected issues" in text + + def test_header_spanish(self): + app = _harness([_make_finding(tool="02_text_cleaner")], lang="es") + app.run() + text = collected_text(app) + assert "Problemas detectados" in text + + +# --------------------------------------------------------------------------- +# Per-tool grouping → one expander per tool id +# --------------------------------------------------------------------------- + +class TestGrouping: + def test_findings_grouped_into_per_tool_expanders(self): + findings = [ + _make_finding(tool="02_text_cleaner", id="whitespace_padding"), + _make_finding(tool="02_text_cleaner", id="nbsp_padding"), + _make_finding(tool="03_format_standardizer", id="mixed_case_email"), + ] + app = _harness(findings) + app.run() + labels = [e.label for e in app.expander] + # Two unique tools → two expanders. Each label carries the + # tool's display name + finding count. + text_cleaner_expanders = [lbl for lbl in labels if "Text Cleaner" in lbl] + format_expanders = [lbl for lbl in labels if "Format Standardizer" in lbl] + assert len(text_cleaner_expanders) == 1, ( + f"expected one Text Cleaner expander; got: {labels}" + ) + assert len(format_expanders) == 1, ( + f"expected one Format Standardizer expander; got: {labels}" + ) + + def test_tool_names_localize_in_spanish(self): + findings = [_make_finding(tool="02_text_cleaner")] + app = _harness(findings, lang="es") + app.run() + labels = [e.label for e in app.expander] + assert any("Limpiador de texto" in lbl for lbl in labels), ( + f"Spanish tool name missing; expanders: {labels}" + ) + + def test_finding_count_in_expander_label(self): + findings = [ + _make_finding(tool="02_text_cleaner", id=f"f{i}") + for i in range(3) + ] + app = _harness(findings) + app.run() + labels = [e.label for e in app.expander] + # Pack template: "{tool} — {n} finding(s)" + text_cleaner_label = next(l for l in labels if "Text Cleaner" in l) + assert "3" in text_cleaner_label, ( + f"expected count '3' in expander label; got {text_cleaner_label!r}" + ) + + +# --------------------------------------------------------------------------- +# Open-tool button localizes +# --------------------------------------------------------------------------- + +class TestOpenToolButton: + """Each tool section has an ``st.page_link`` to jump to that tool's + page. AppTest exposes page_links as ``app.button`` entries with + label ``"Open {tool} →"`` (English) / ``"Abrir {tool} →"`` (Spanish).""" + + def test_open_tool_label_english(self): + findings = [_make_finding(tool="02_text_cleaner")] + app = _harness(findings) + app.run() + # ``st.page_link`` may show up under ``app.button`` or in the + # raw markdown. We probe both. + text = collected_text(app) + # Pack template: "Open {tool} →" + assert "Open Text Cleaner" in text + + def test_open_tool_label_spanish(self): + findings = [_make_finding(tool="02_text_cleaner")] + app = _harness(findings, lang="es") + app.run() + text = collected_text(app) + # Pack template: "Abrir {tool} →" + assert "Abrir Limpiador de texto" in text + + +# --------------------------------------------------------------------------- +# Untargeted findings (file-level) go in the "Other" expander +# --------------------------------------------------------------------------- + +class TestUntargetedFindings: + def test_untargeted_goes_to_other_expander_en(self): + findings = [ + _make_finding(tool="", id="csv_bom_stripped"), + _make_finding(tool="02_text_cleaner", id="nbsp_padding"), + ] + app = _harness(findings) + app.run() + labels = [e.label for e in app.expander] + # Pack template: "Other / file-level — {n} finding(s)" + assert any("Other / file-level" in lbl for lbl in labels), ( + f"untargeted expander missing; got: {labels}" + ) + + def test_untargeted_label_spanish(self): + findings = [_make_finding(tool="", id="csv_bom_stripped")] + app = _harness(findings, lang="es") + app.run() + labels = [e.label for e in app.expander] + # Spanish pack: "Otros / a nivel de archivo — {n} hallazgo(s)" + assert any("Otros / a nivel de archivo" in lbl for lbl in labels), ( + f"Spanish 'Other' expander missing; got: {labels}" + ) + + +# --------------------------------------------------------------------------- +# Severity summary +# --------------------------------------------------------------------------- + +class TestSeveritySummary: + """The panel renders a per-severity summary caption like + ``⚠️ 2 warn · ℹ️ 1 info``. We pin the icon + count rendering.""" + + def test_severity_icons_render(self): + findings = [ + _make_finding(tool="02_text_cleaner", severity="warn"), + _make_finding(tool="02_text_cleaner", severity="warn"), + _make_finding(tool="03_format_standardizer", severity="info"), + ] + app = _harness(findings) + app.run() + text = collected_text(app) + # Icons live in the per-language pack ("findings.severity_*"). + # The summary template is shared between languages. + assert "⚠️" in text or "warn" in text + # Counts present. + assert "2 warn" in text or "2 warn" in text diff --git a/tests/gui/test_gate.py b/tests/gui/test_gate.py new file mode 100644 index 0000000..27cbe77 --- /dev/null +++ b/tests/gui/test_gate.py @@ -0,0 +1,157 @@ +"""Gate tests — ``require_normalization_gate()`` behaviour. + +The gate sits between every tool page and the user's data. Three states +exist, each pinned here: + +1. **No upload** — gate is a no-op; the page proceeds and its own + uploader handles the file. +2. **Upload but no normalization result** — gate shows a warning and a + "Go to Review & Normalize" button, then ``st.stop()`` short-circuits + the rest of the page. +3. **Upload + matching passed normalization** — gate is a no-op; the + page proceeds. + +We exercise the gate via the Deduplicator page (any tool page would +work; dedup is the smallest one that doesn't depend on heavy widgets). +""" + +from __future__ import annotations + +import pytest + +from .conftest import ( + collected_text, + stash_upload, + stash_upload_without_gate, + with_language, +) + + +# Deduplicator is our canary — it calls ``require_normalization_gate`` +# on the second line of the module. If the gate blocks, the dedup- +# specific title shouldn't even render. +GATED_PAGE = "1_Deduplicator" + + +class TestGateNoUpload: + """No upload → the gate exits early and the page renders normally, + showing its own file uploader. (This is the "user opened the dedup + page first instead of coming from home" path.)""" + + def test_no_upload_lets_page_render(self, app_factory): + app = app_factory(GATED_PAGE) + app.run() + assert not app.exception + text = collected_text(app) + # The dedup page title is the unambiguous signal that the gate + # didn't short-circuit. + assert "Deduplicator" in text + + def test_no_upload_no_gate_warning(self, app_factory): + app = app_factory(GATED_PAGE) + app.run() + # The gate's warning string starts with the upload filename. No + # warning should be present when there's no upload. + for w in app.warning: + assert "normalization gate" not in (w.body or "") + + +class TestGateBlocksWithoutNormalization: + """Upload present but no passing normalization → gate fires: + warning + Go-to-Review button + page short-circuit.""" + + def test_gate_warning_renders(self, app_factory, small_csv_bytes): + app = app_factory(GATED_PAGE) + stash_upload_without_gate(app, name="messy.csv", data=small_csv_bytes) + app.run() + warnings = [w.body for w in app.warning if w.body] + joined = " ".join(warnings) + assert "normalization gate" in joined, ( + f"expected gate warning; got warnings: {warnings}" + ) + assert "messy.csv" in joined, ( + "gate warning should name the offending file" + ) + + def test_gate_renders_go_to_review_button(self, app_factory, small_csv_bytes): + app = app_factory(GATED_PAGE) + stash_upload_without_gate(app, name="messy.csv", data=small_csv_bytes) + app.run() + labels = [b.label for b in app.button] + assert any("Review & Normalize" in lbl for lbl in labels), ( + f"missing 'Go to Review & Normalize' button; got: {labels}" + ) + + def test_gate_short_circuits_page(self, app_factory, small_csv_bytes): + app = app_factory(GATED_PAGE) + stash_upload_without_gate(app, name="messy.csv", data=small_csv_bytes) + app.run() + # When the gate fires it calls ``st.stop()`` after the warning. + # The page-body widgets (e.g., the advanced-options expander, the + # dedup-strategy widgets) must NOT be present. + labels = [b.label for b in app.button] + # The Run-Dedup primary action lives below the gate — make sure + # the gate killed the render before it. + assert not any("Run Deduplication" in lbl for lbl in labels), ( + f"gate failed to short-circuit; saw button: {labels}" + ) + + def test_gate_warning_localizes_to_spanish(self, app_factory, small_csv_bytes): + app = app_factory(GATED_PAGE) + with_language(app, "es") + stash_upload_without_gate(app, name="messy.csv", data=small_csv_bytes) + app.run() + warnings = " ".join(w.body for w in app.warning if w.body) + # Spanish pack: ``debe pasar la verificación de normalización CSV``. + assert "normalización" in warnings + + def test_gate_button_localizes_to_spanish(self, app_factory, small_csv_bytes): + app = app_factory(GATED_PAGE) + with_language(app, "es") + stash_upload_without_gate(app, name="messy.csv", data=small_csv_bytes) + app.run() + labels = [b.label for b in app.button] + assert any("Revisar y Normalizar" in lbl for lbl in labels), ( + f"Spanish gate button missing; got: {labels}" + ) + + +class TestGateAllowsWithPassedNormalization: + """Upload + passed normalization → gate is a no-op and the page + renders past the gate.""" + + def test_passed_gate_lets_page_render(self, app_factory, small_csv_bytes): + app = app_factory(GATED_PAGE) + stash_upload(app, name="messy.csv", data=small_csv_bytes) + app.run() + assert not app.exception, f"page raised past gate: {app.exception}" + # The pickup banner uses the upload name — that's our signal + # that the gate let us through AND the pickup helper engaged. + text = collected_text(app) + assert "messy.csv" in text + + +class TestGateMismatchedHash: + """Upload changes (different bytes) but normalization_for still + points at the old hash → gate fires again because the result is + stale. Pins the security-relevant "stale fix doesn't carry over to + a new file" invariant.""" + + def test_stale_normalization_blocks_new_upload(self, app_factory, small_csv_bytes): + app = app_factory(GATED_PAGE) + # Stash bytes A but a normalization_for hash that points at B. + app.session_state["home_uploaded_bytes"] = small_csv_bytes + app.session_state["home_uploaded_name"] = "new.csv" + app.session_state["home_uploaded_size"] = len(small_csv_bytes) + app.session_state["normalization_for"] = "different-hash-from-an-old-upload" + + # A passed-result object exists but is keyed to a different file. + class _Passed: + passed = True + app.session_state["normalization_result"] = _Passed() + + app.run() + warnings = " ".join(w.body for w in app.warning if w.body) + assert "normalization gate" in warnings, ( + "stale gate result should not unlock a new upload" + ) diff --git a/tests/gui/test_smoke.py b/tests/gui/test_smoke.py new file mode 100644 index 0000000..0f6d79f --- /dev/null +++ b/tests/gui/test_smoke.py @@ -0,0 +1,147 @@ +"""Smoke tests: every page renders without exception in EN and ES. + +The cheapest, highest-value GUI tests in the project. They catch: + +- Page-level Python errors (import failures, syntax errors that + ``ast.parse`` misses because they're runtime, e.g., a missing + attribute on a module). +- i18n pack key drift (a string that used to render in EN now renders + literally as ``"chrome.language_label"`` because someone renamed the + key in en.json but forgot es.json or the call site). +- Streamlit API churn that breaks ``set_page_config`` / + ``hide_streamlit_chrome`` on a single page. + +What they don't cover: user interactions. Those live in the workflow +tests. +""" + +from __future__ import annotations + +import pytest + +from .conftest import collected_text, with_language + + +# Every page that ships in the sidebar nav. Slugs match the filenames +# under ``src/gui/pages/`` so failures point at a real file. +PAGE_SLUGS = [ + "0_Review", + "1_Deduplicator", + "2_Text_Cleaner", + "3_Format_Standardizer", + "4_Missing_Values", + "5_Column_Mapper", + "6_Outlier_Detector", + "7_Multi_File_Merger", + "8_Validator_Reporter", + "9_Pipeline_Runner", + "99_Close", +] + + +# Substrings that must appear on each page for each language. +# +# v1.6 coverage reality (also documented in docs/USER-GUIDE.md §3.4): +# only the home page, the Close page, and the shared chrome / +# components ship Spanish strings. Per-tool page bodies are still +# hard-coded English in both modes — translating them is tracked as a +# follow-up. The substrings below reflect that reality: a page that +# isn't translated yet asserts the same English substring under both +# languages. The fact that the page *renders at all* in 'es' is still +# the value of the smoke test. +# +# When a page gains real Spanish translation, flip its 'es' entry to +# the localized substring — the test surface stays the same. +EXPECTED_SUBSTRINGS: dict[str, dict[str, str]] = { + "0_Review": {"en": "Review", "es": "Review"}, + "1_Deduplicator": {"en": "Deduplicator", "es": "Deduplicator"}, + "2_Text_Cleaner": {"en": "Text Cleaner", "es": "Text Cleaner"}, + "3_Format_Standardizer": {"en": "Format", "es": "Format"}, + "4_Missing_Values": {"en": "Missing", "es": "Missing"}, + "5_Column_Mapper": {"en": "Column", "es": "Column"}, + "6_Outlier_Detector": {"en": "Outlier", "es": "Outlier"}, + "7_Multi_File_Merger": {"en": "Merger", "es": "Merger"}, + "8_Validator_Reporter": {"en": "Validator", "es": "Validator"}, + "9_Pipeline_Runner": {"en": "Pipeline", "es": "Pipeline"}, + "99_Close": {"en": "Close DataTools", "es": "Cerrar DataTools"}, +} + + +class TestHomePageRenders: + """The home page is the only one with full EN/ES coverage in v1.6. + Pin it independently so its translation is non-regressable.""" + + @pytest.mark.parametrize("lang,expected", [ + ("en", "DataTools — Data Cleaning Mastery"), + ("es", "DataTools — Maestría en limpieza de datos"), + ]) + def test_home_renders_in_language(self, home_app, lang, expected): + with_language(home_app, lang) + home_app.run() + assert home_app.exception is None or home_app.exception == [], ( + f"home page raised: {home_app.exception}" + ) + assert expected in collected_text(home_app) + + def test_home_renders_footer_in_es(self, home_app): + with_language(home_app, "es") + home_app.run() + text = collected_text(home_app) + assert "Tus datos nunca salen" in text or "Se ejecuta localmente" in text + + def test_home_tool_card_uses_es_name(self, home_app): + """When the home grid renders in Spanish, the dedup card title + must use the Spanish display name, not the English fallback.""" + with_language(home_app, "es") + home_app.run() + text = collected_text(home_app) + assert "Eliminador de duplicados" in text + + +class TestEveryPageRenders: + """Parametrize over (page, language). Failure tells you exactly which + page + which language broke.""" + + @pytest.mark.parametrize("slug", PAGE_SLUGS) + @pytest.mark.parametrize("lang", ["en", "es"]) + def test_renders_without_exception(self, app_factory, slug, lang): + app = app_factory(slug) + with_language(app, lang) + app.run() + # AppTest exposes ``exception`` as a list of element-wrapped + # exceptions (empty when no error fired). + assert not app.exception, ( + f"page {slug!r} raised in language {lang!r}: {app.exception}" + ) + + @pytest.mark.parametrize("slug", PAGE_SLUGS) + @pytest.mark.parametrize("lang", ["en", "es"]) + def test_expected_substring_present(self, app_factory, slug, lang): + app = app_factory(slug) + with_language(app, lang) + app.run() + needle = EXPECTED_SUBSTRINGS[slug][lang] + text = collected_text(app) + assert needle in text, ( + f"page {slug!r} ({lang!r}) missing expected substring " + f"{needle!r}\nGot:\n{text[:500]}…" + ) + + +class TestPageHasLanguageSelector: + """Every page that calls ``hide_streamlit_chrome`` should mount the + sidebar language selector. This is the only place the picker is + rendered — if the chrome helper stops calling it, the test fails.""" + + @pytest.mark.parametrize("slug", PAGE_SLUGS) + def test_sidebar_selectbox_present(self, app_factory, slug): + app = app_factory(slug) + app.run() + # The selector is the only sidebar selectbox we ship today; if + # a page adds another the test should be loosened to "at least + # one selectbox with the language label." + assert len(app.sidebar.selectbox) >= 1, ( + f"page {slug!r} has no sidebar selectbox — " + f"hide_streamlit_chrome() should have mounted the language " + f"selector." + ) diff --git a/tests/gui/test_workflows.py b/tests/gui/test_workflows.py new file mode 100644 index 0000000..a67c41a --- /dev/null +++ b/tests/gui/test_workflows.py @@ -0,0 +1,207 @@ +"""Happy-path workflow tests for each Ready tool page. + +These drive the GUI like a user would: pre-stash an upload + a passed +gate, render the page, click the primary action, assert the result +landed in session state. They catch wiring bugs that smoke tests +can't see — e.g., a primary button mis-keyed, a result not stashed in +session state, a page reading the wrong key. + +Slow-ish (~0.5–2s per workflow). Sits behind the ``gui`` marker so +``pytest -m 'not gui'`` skips them. +""" + +from __future__ import annotations + +import pandas as pd +import pytest + +from .conftest import collected_text, stash_upload + + +# --------------------------------------------------------------------------- +# Deduplicator +# --------------------------------------------------------------------------- + +class TestDeduplicatorWorkflow: + """Upload → click Find Duplicates → result lands in session_state.""" + + def _setup(self, app_factory, small_csv_bytes): + app = app_factory("1_Deduplicator") + stash_upload(app, name="messy.csv", data=small_csv_bytes) + return app + + def test_upload_renders_preview(self, app_factory, small_csv_bytes): + app = self._setup(app_factory, small_csv_bytes) + app.run() + text = collected_text(app) + assert "Preview: messy.csv" in text, ( + f"upload preview header missing; got:\n{text[:500]}" + ) + + def test_find_duplicates_button_present(self, app_factory, small_csv_bytes): + app = self._setup(app_factory, small_csv_bytes) + app.run() + labels = [b.label for b in app.button] + assert any("Find Duplicates" in lbl for lbl in labels), ( + f"primary action missing; got: {labels}" + ) + + def test_clicking_find_duplicates_stashes_result( + self, app_factory, small_csv_bytes, + ): + app = self._setup(app_factory, small_csv_bytes) + app.run() + # Find the Find-Duplicates button and click it. AppTest's + # button-by-key access is via ``.button(key=...)`` — we don't + # have the key here, so locate it by label. + target = next(b for b in app.button if "Find Duplicates" in b.label) + target.click().run() + # The page stores the result under ``result`` in session state. + result = app.session_state["result"] + assert result is not None, "Find Duplicates didn't stash a result" + # The sample has Alice twice → one match group. + assert len(result.match_groups) >= 1 + + +# --------------------------------------------------------------------------- +# Text Cleaner +# --------------------------------------------------------------------------- + +class TestTextCleanerWorkflow: + def _setup(self, app_factory, small_csv_bytes): + app = app_factory("2_Text_Cleaner") + stash_upload(app, name="messy.csv", data=small_csv_bytes) + return app + + def test_page_renders_with_upload(self, app_factory, small_csv_bytes): + app = self._setup(app_factory, small_csv_bytes) + app.run() + assert not app.exception + text = collected_text(app) + assert "Text Cleaner" in text + + def test_preview_or_clean_button_present(self, app_factory, small_csv_bytes): + """The text cleaner ships a primary action (label varies by + version). We just assert at least one primary-looking button + exists past the upload.""" + app = self._setup(app_factory, small_csv_bytes) + app.run() + # Filter out the gate-redirect button (which would only be + # present if the gate fired, which our setup prevents). + gate_buttons = {"Go to Review & Normalize", "Ir a Revisar y Normalizar"} + non_gate = [b for b in app.button if b.label not in gate_buttons] + assert non_gate, ( + f"no primary buttons rendered; got: {[b.label for b in app.button]}" + ) + + +# --------------------------------------------------------------------------- +# Format Standardizer +# --------------------------------------------------------------------------- + +class TestFormatStandardizerWorkflow: + def test_page_renders_with_upload(self, app_factory, small_csv_bytes): + app = app_factory("3_Format_Standardizer") + stash_upload(app, name="messy.csv", data=small_csv_bytes) + app.run() + assert not app.exception + text = collected_text(app) + assert "Format Standardizer" in text + + +# --------------------------------------------------------------------------- +# Missing Value Handler +# --------------------------------------------------------------------------- + +class TestMissingValuesWorkflow: + def test_page_renders_with_upload(self, app_factory, small_csv_bytes): + app = app_factory("4_Missing_Values") + stash_upload(app, name="messy.csv", data=small_csv_bytes) + app.run() + assert not app.exception + text = collected_text(app) + assert "Missing" in text + + +# --------------------------------------------------------------------------- +# Column Mapper +# --------------------------------------------------------------------------- + +class TestColumnMapperWorkflow: + def test_page_renders_with_upload(self, app_factory, small_csv_bytes): + app = app_factory("5_Column_Mapper") + stash_upload(app, name="messy.csv", data=small_csv_bytes) + app.run() + assert not app.exception + text = collected_text(app) + assert "Column" in text + + +# --------------------------------------------------------------------------- +# Pipeline Runner +# --------------------------------------------------------------------------- + +class TestPipelineRunnerWorkflow: + def test_page_renders_with_upload(self, app_factory, small_csv_bytes): + app = app_factory("9_Pipeline_Runner") + stash_upload(app, name="messy.csv", data=small_csv_bytes) + app.run() + assert not app.exception + text = collected_text(app) + assert "Pipeline" in text + + +# --------------------------------------------------------------------------- +# Review page — special: doesn't gate on upload, has its own analyzer flow +# --------------------------------------------------------------------------- + +class TestReviewWorkflow: + """The Review page is the gate-fixer. Without an upload it shows a + 'go back to home' message. With an upload it runs the analyzer and + shows findings.""" + + def test_no_upload_shows_back_to_home(self, app_factory): + app = app_factory("0_Review") + app.run() + text = collected_text(app) + # Page shows ``No file uploaded`` + ``Back to home``. + assert "No file uploaded" in text or "uploaded" in text.lower() + + def test_with_upload_shows_review_content( + self, app_factory, small_csv_bytes, + ): + app = app_factory("0_Review") + # Review page only needs the upload bytes, not a pre-passed gate. + app.session_state["home_uploaded_bytes"] = small_csv_bytes + app.session_state["home_uploaded_name"] = "messy.csv" + app.session_state["home_uploaded_size"] = len(small_csv_bytes) + app.run() + assert not app.exception + text = collected_text(app) + # Page ran the analyzer — either we get findings or the + # "already clean" success message. Either way confirms the + # analyzer pipeline ran end-to-end with the stashed bytes. + clean_msg = "No findings to review" in text + encoding_section = "File encoding" in text + assert clean_msg or encoding_section, ( + f"Review page didn't surface analyzer output; got:\n{text[:400]}" + ) + + +# --------------------------------------------------------------------------- +# Coming-Soon pages still render (just a stub) — pinned so we know if a +# Coming-Soon goes from "stub renders" to "import error". +# --------------------------------------------------------------------------- + +@pytest.mark.parametrize("slug,name", [ + ("6_Outlier_Detector", "Outlier"), + ("7_Multi_File_Merger", "Merger"), + ("8_Validator_Reporter", "Validator"), +]) +class TestComingSoonStubs: + def test_stub_renders(self, app_factory, slug, name): + app = app_factory(slug) + app.run() + assert not app.exception + text = collected_text(app) + assert name in text