Files
datatools-dev/tests/gui/test_advanced_panels.py
Michael 35d46a0c1a test(gui): add Streamlit AppTest layer (139 tests)
Until now every test ran against core or the CLI; the Streamlit GUI
was verified by hand. This commit adds tests/gui/ — 139 AppTest-
driven tests behind a 'gui' marker so the quick loop
(``pytest -m 'not gui'``) stays at 1777 tests / ~10s while
``pytest`` runs everything (1916 / ~14s).

Coverage:
- test_smoke.py (59): every page renders in EN and ES, expected
  substring present, sidebar selector mounted.
- test_chrome.py (18): language selector flips session state and
  re-renders; quit button + farewell strings localize; tool-card
  names use the active language.
- test_gate.py (9): require_normalization_gate no-op / warning /
  short-circuit / hash-mismatch invariants; warning + button
  localized.
- test_workflows.py (14): happy path per Ready tool — stash
  upload, render, find primary action, verify result lands in
  session state.
- test_dedup_review.py (8): Accept All / Reject All / Clear
  Decisions wire through to review_decisions; apply_review_decisions
  semantics (keep-all, merge, column override).
- test_advanced_panels.py (15): config_panel widget defaults and
  options (algorithm, threshold, survivor rule, merge, multiselects,
  config save/load).
- test_errors.py (4): garbage / empty / single-column uploads don't
  crash; duplicate-target mapping raises InputValidationError.
- test_findings_panel.py (12): driven via a small standalone harness
  page so we test the component without faking a file_uploader. EN
  + ES strings, per-tool grouping, open-tool button label, untargeted
  expander, severity summary.

Shared infrastructure in tests/gui/conftest.py:
- ``stash_upload`` / ``stash_upload_without_gate`` — populate
  session_state to pre-pass or block the gate.
- ``with_language`` — set ``ui_lang`` before run().
- ``collected_text`` — flatten title/caption/markdown/etc. into
  one string for substring assertions.
- Auto-marking: every test in tests/gui/ gets ``@pytest.mark.gui``
  via ``pytest_collection_modifyitems``, so the marker isn't
  per-test boilerplate.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-13 16:13:40 +00:00

195 lines
7.6 KiB
Python

"""Advanced-options panel tests.
``config_panel`` (in ``src.gui.components``) is the dedup-page's
expander that houses every per-column / per-strategy knob. It's the
densest single widget surface in the GUI, so a session-state key drift
in there cascades into every dedup session.
We exercise it via the Deduplicator page (rendering ``config_panel``
in isolation requires a fake Streamlit context). The page provides
the surrounding state; we poke widgets and verify their effects.
"""
from __future__ import annotations
import pandas as pd
import pytest
from .conftest import stash_upload
GATED_PAGE = "1_Deduplicator"
def _render_page(app_factory, small_csv_bytes):
app = app_factory(GATED_PAGE)
stash_upload(app, name="messy.csv", data=small_csv_bytes)
app.run()
return app
# ---------------------------------------------------------------------------
# Expander presence + collapsed state
# ---------------------------------------------------------------------------
class TestAdvancedExpander:
def test_advanced_options_expander_renders(self, app_factory, small_csv_bytes):
app = _render_page(app_factory, small_csv_bytes)
labels = [e.label for e in app.expander]
assert any("Advanced Options" in lbl for lbl in labels), (
f"Advanced Options expander missing; expanders: {labels}"
)
# ---------------------------------------------------------------------------
# Algorithm selector
# ---------------------------------------------------------------------------
class TestAlgorithmSelector:
"""The fuzzy-algorithm dropdown drives ``Algorithm.{LEVENSHTEIN,
JARO_WINKLER, TOKEN_SET_RATIO}`` on every column. Default value
must be jaro_winkler — the strong-key build_default_strategies
assumes it."""
def test_default_algorithm_is_jaro_winkler(self, app_factory, small_csv_bytes):
app = _render_page(app_factory, small_csv_bytes)
# Find the selectbox by label.
algo_boxes = [
sb for sb in app.selectbox
if sb.label == "Fuzzy algorithm"
]
assert len(algo_boxes) == 1
assert algo_boxes[0].value == "jaro_winkler"
def test_algorithm_options_complete(self, app_factory, small_csv_bytes):
app = _render_page(app_factory, small_csv_bytes)
algo = next(sb for sb in app.selectbox if sb.label == "Fuzzy algorithm")
assert set(algo.options) == {
"jaro_winkler", "levenshtein", "token_set_ratio",
}
# ---------------------------------------------------------------------------
# Threshold slider
# ---------------------------------------------------------------------------
class TestThresholdSlider:
def test_default_threshold_is_85(self, app_factory, small_csv_bytes):
app = _render_page(app_factory, small_csv_bytes)
thresholds = [s for s in app.slider if "Similarity" in (s.label or "")]
assert len(thresholds) == 1
assert thresholds[0].value == 85
def test_threshold_bounds(self, app_factory, small_csv_bytes):
app = _render_page(app_factory, small_csv_bytes)
thr = next(s for s in app.slider if "Similarity" in (s.label or ""))
assert thr.min == 50
assert thr.max == 100
# ---------------------------------------------------------------------------
# Survivor rule selector
# ---------------------------------------------------------------------------
class TestSurvivorSelector:
def test_default_is_first(self, app_factory, small_csv_bytes):
app = _render_page(app_factory, small_csv_bytes)
surv = next(
sb for sb in app.selectbox
if sb.label == "Survivor rule"
)
assert surv.value == "first"
def test_all_four_rules_offered(self, app_factory, small_csv_bytes):
app = _render_page(app_factory, small_csv_bytes)
surv = next(sb for sb in app.selectbox if sb.label == "Survivor rule")
assert set(surv.options) == {
"first", "last", "most-complete", "most-recent",
}
def test_selecting_most_recent_does_not_crash(
self, app_factory, small_csv_bytes,
):
"""When ``most-recent`` is chosen the page should reveal a
Date column dropdown. Pin the no-crash invariant."""
app = _render_page(app_factory, small_csv_bytes)
surv = next(sb for sb in app.selectbox if sb.label == "Survivor rule")
surv.select("most-recent").run()
assert not app.exception
# ---------------------------------------------------------------------------
# Merge checkbox
# ---------------------------------------------------------------------------
class TestMergeCheckbox:
def test_merge_default_off(self, app_factory, small_csv_bytes):
app = _render_page(app_factory, small_csv_bytes)
merge_boxes = [c for c in app.checkbox if c.label == "Merge mode"]
assert len(merge_boxes) == 1
assert merge_boxes[0].value is False
def test_toggling_merge_does_not_crash(
self, app_factory, small_csv_bytes,
):
app = _render_page(app_factory, small_csv_bytes)
merge = next(c for c in app.checkbox if c.label == "Merge mode")
merge.check().run()
assert not app.exception
# After checking, the value persists in session_state via the
# widget's own key.
# ---------------------------------------------------------------------------
# Column multiselects
# ---------------------------------------------------------------------------
class TestColumnMultiselects:
"""Match-on / Strong-keys / Fuzzy multiselects use ``st.multiselect``
on every column. Empty default = auto-detect."""
def test_three_multiselects_present(self, app_factory, small_csv_bytes):
app = _render_page(app_factory, small_csv_bytes)
labels = {m.label for m in app.multiselect}
assert {"Match on columns", "Strong keys", "Fuzzy columns"} <= labels
def test_defaults_are_empty(self, app_factory, small_csv_bytes):
app = _render_page(app_factory, small_csv_bytes)
for ms in app.multiselect:
if ms.label in {
"Match on columns", "Strong keys", "Fuzzy columns",
}:
assert ms.value == [], (
f"{ms.label!r} default should be []; got {ms.value}"
)
def test_options_match_dataframe_columns(self, app_factory, small_csv_bytes):
app = _render_page(app_factory, small_csv_bytes)
df_cols = list(app.session_state["df"].columns)
for ms in app.multiselect:
if ms.label in {
"Match on columns", "Strong keys", "Fuzzy columns",
}:
assert list(ms.options) == df_cols
# ---------------------------------------------------------------------------
# Save / Load config
# ---------------------------------------------------------------------------
class TestConfigSaveLoadButtons:
def test_save_settings_button_present(self, app_factory, small_csv_bytes):
app = _render_page(app_factory, small_csv_bytes)
labels = [b.label for b in app.button]
assert any("Save current settings" in lbl for lbl in labels)
def test_config_file_uploader_present(self, app_factory, small_csv_bytes):
app = _render_page(app_factory, small_csv_bytes)
# AppTest exposes uploaders via ``app.file_uploader``. There
# are two: the main file (pickup-or-upload) and the config
# JSON. Make sure the config one is there.
labels = [u.label for u in app.file_uploader]
assert any("Load config profile" in lbl for lbl in labels), (
f"config uploader missing; uploaders: {labels}"
)