Until now every test ran against core or the CLI; the Streamlit GUI was verified by hand. This commit adds tests/gui/ — 139 AppTest- driven tests behind a 'gui' marker so the quick loop (``pytest -m 'not gui'``) stays at 1777 tests / ~10s while ``pytest`` runs everything (1916 / ~14s). Coverage: - test_smoke.py (59): every page renders in EN and ES, expected substring present, sidebar selector mounted. - test_chrome.py (18): language selector flips session state and re-renders; quit button + farewell strings localize; tool-card names use the active language. - test_gate.py (9): require_normalization_gate no-op / warning / short-circuit / hash-mismatch invariants; warning + button localized. - test_workflows.py (14): happy path per Ready tool — stash upload, render, find primary action, verify result lands in session state. - test_dedup_review.py (8): Accept All / Reject All / Clear Decisions wire through to review_decisions; apply_review_decisions semantics (keep-all, merge, column override). - test_advanced_panels.py (15): config_panel widget defaults and options (algorithm, threshold, survivor rule, merge, multiselects, config save/load). - test_errors.py (4): garbage / empty / single-column uploads don't crash; duplicate-target mapping raises InputValidationError. - test_findings_panel.py (12): driven via a small standalone harness page so we test the component without faking a file_uploader. EN + ES strings, per-tool grouping, open-tool button label, untargeted expander, severity summary. Shared infrastructure in tests/gui/conftest.py: - ``stash_upload`` / ``stash_upload_without_gate`` — populate session_state to pre-pass or block the gate. - ``with_language`` — set ``ui_lang`` before run(). - ``collected_text`` — flatten title/caption/markdown/etc. into one string for substring assertions. - Auto-marking: every test in tests/gui/ gets ``@pytest.mark.gui`` via ``pytest_collection_modifyitems``, so the marker isn't per-test boilerplate. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
195 lines
7.6 KiB
Python
195 lines
7.6 KiB
Python
"""Advanced-options panel tests.
|
|
|
|
``config_panel`` (in ``src.gui.components``) is the dedup-page's
|
|
expander that houses every per-column / per-strategy knob. It's the
|
|
densest single widget surface in the GUI, so a session-state key drift
|
|
in there cascades into every dedup session.
|
|
|
|
We exercise it via the Deduplicator page (rendering ``config_panel``
|
|
in isolation requires a fake Streamlit context). The page provides
|
|
the surrounding state; we poke widgets and verify their effects.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import pandas as pd
|
|
import pytest
|
|
|
|
from .conftest import stash_upload
|
|
|
|
|
|
GATED_PAGE = "1_Deduplicator"
|
|
|
|
|
|
def _render_page(app_factory, small_csv_bytes):
|
|
app = app_factory(GATED_PAGE)
|
|
stash_upload(app, name="messy.csv", data=small_csv_bytes)
|
|
app.run()
|
|
return app
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Expander presence + collapsed state
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestAdvancedExpander:
|
|
def test_advanced_options_expander_renders(self, app_factory, small_csv_bytes):
|
|
app = _render_page(app_factory, small_csv_bytes)
|
|
labels = [e.label for e in app.expander]
|
|
assert any("Advanced Options" in lbl for lbl in labels), (
|
|
f"Advanced Options expander missing; expanders: {labels}"
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Algorithm selector
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestAlgorithmSelector:
|
|
"""The fuzzy-algorithm dropdown drives ``Algorithm.{LEVENSHTEIN,
|
|
JARO_WINKLER, TOKEN_SET_RATIO}`` on every column. Default value
|
|
must be jaro_winkler — the strong-key build_default_strategies
|
|
assumes it."""
|
|
|
|
def test_default_algorithm_is_jaro_winkler(self, app_factory, small_csv_bytes):
|
|
app = _render_page(app_factory, small_csv_bytes)
|
|
# Find the selectbox by label.
|
|
algo_boxes = [
|
|
sb for sb in app.selectbox
|
|
if sb.label == "Fuzzy algorithm"
|
|
]
|
|
assert len(algo_boxes) == 1
|
|
assert algo_boxes[0].value == "jaro_winkler"
|
|
|
|
def test_algorithm_options_complete(self, app_factory, small_csv_bytes):
|
|
app = _render_page(app_factory, small_csv_bytes)
|
|
algo = next(sb for sb in app.selectbox if sb.label == "Fuzzy algorithm")
|
|
assert set(algo.options) == {
|
|
"jaro_winkler", "levenshtein", "token_set_ratio",
|
|
}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Threshold slider
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestThresholdSlider:
|
|
def test_default_threshold_is_85(self, app_factory, small_csv_bytes):
|
|
app = _render_page(app_factory, small_csv_bytes)
|
|
thresholds = [s for s in app.slider if "Similarity" in (s.label or "")]
|
|
assert len(thresholds) == 1
|
|
assert thresholds[0].value == 85
|
|
|
|
def test_threshold_bounds(self, app_factory, small_csv_bytes):
|
|
app = _render_page(app_factory, small_csv_bytes)
|
|
thr = next(s for s in app.slider if "Similarity" in (s.label or ""))
|
|
assert thr.min == 50
|
|
assert thr.max == 100
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Survivor rule selector
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestSurvivorSelector:
|
|
def test_default_is_first(self, app_factory, small_csv_bytes):
|
|
app = _render_page(app_factory, small_csv_bytes)
|
|
surv = next(
|
|
sb for sb in app.selectbox
|
|
if sb.label == "Survivor rule"
|
|
)
|
|
assert surv.value == "first"
|
|
|
|
def test_all_four_rules_offered(self, app_factory, small_csv_bytes):
|
|
app = _render_page(app_factory, small_csv_bytes)
|
|
surv = next(sb for sb in app.selectbox if sb.label == "Survivor rule")
|
|
assert set(surv.options) == {
|
|
"first", "last", "most-complete", "most-recent",
|
|
}
|
|
|
|
def test_selecting_most_recent_does_not_crash(
|
|
self, app_factory, small_csv_bytes,
|
|
):
|
|
"""When ``most-recent`` is chosen the page should reveal a
|
|
Date column dropdown. Pin the no-crash invariant."""
|
|
app = _render_page(app_factory, small_csv_bytes)
|
|
surv = next(sb for sb in app.selectbox if sb.label == "Survivor rule")
|
|
surv.select("most-recent").run()
|
|
assert not app.exception
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Merge checkbox
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestMergeCheckbox:
|
|
def test_merge_default_off(self, app_factory, small_csv_bytes):
|
|
app = _render_page(app_factory, small_csv_bytes)
|
|
merge_boxes = [c for c in app.checkbox if c.label == "Merge mode"]
|
|
assert len(merge_boxes) == 1
|
|
assert merge_boxes[0].value is False
|
|
|
|
def test_toggling_merge_does_not_crash(
|
|
self, app_factory, small_csv_bytes,
|
|
):
|
|
app = _render_page(app_factory, small_csv_bytes)
|
|
merge = next(c for c in app.checkbox if c.label == "Merge mode")
|
|
merge.check().run()
|
|
assert not app.exception
|
|
# After checking, the value persists in session_state via the
|
|
# widget's own key.
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Column multiselects
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestColumnMultiselects:
|
|
"""Match-on / Strong-keys / Fuzzy multiselects use ``st.multiselect``
|
|
on every column. Empty default = auto-detect."""
|
|
|
|
def test_three_multiselects_present(self, app_factory, small_csv_bytes):
|
|
app = _render_page(app_factory, small_csv_bytes)
|
|
labels = {m.label for m in app.multiselect}
|
|
assert {"Match on columns", "Strong keys", "Fuzzy columns"} <= labels
|
|
|
|
def test_defaults_are_empty(self, app_factory, small_csv_bytes):
|
|
app = _render_page(app_factory, small_csv_bytes)
|
|
for ms in app.multiselect:
|
|
if ms.label in {
|
|
"Match on columns", "Strong keys", "Fuzzy columns",
|
|
}:
|
|
assert ms.value == [], (
|
|
f"{ms.label!r} default should be []; got {ms.value}"
|
|
)
|
|
|
|
def test_options_match_dataframe_columns(self, app_factory, small_csv_bytes):
|
|
app = _render_page(app_factory, small_csv_bytes)
|
|
df_cols = list(app.session_state["df"].columns)
|
|
for ms in app.multiselect:
|
|
if ms.label in {
|
|
"Match on columns", "Strong keys", "Fuzzy columns",
|
|
}:
|
|
assert list(ms.options) == df_cols
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Save / Load config
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestConfigSaveLoadButtons:
|
|
def test_save_settings_button_present(self, app_factory, small_csv_bytes):
|
|
app = _render_page(app_factory, small_csv_bytes)
|
|
labels = [b.label for b in app.button]
|
|
assert any("Save current settings" in lbl for lbl in labels)
|
|
|
|
def test_config_file_uploader_present(self, app_factory, small_csv_bytes):
|
|
app = _render_page(app_factory, small_csv_bytes)
|
|
# AppTest exposes uploaders via ``app.file_uploader``. There
|
|
# are two: the main file (pickup-or-upload) and the config
|
|
# JSON. Make sure the config one is there.
|
|
labels = [u.label for u in app.file_uploader]
|
|
assert any("Load config profile" in lbl for lbl in labels), (
|
|
f"config uploader missing; uploaders: {labels}"
|
|
)
|