Files
datatools-dev/tests/gui/test_advanced_panels.py
Michael db5ec084da docs+code: rename tool labels everywhere
Sweep follow-up to 93e43fc. Display labels now consistent across docs,
landing pages, CLI output, code comments, docstrings, and test prose.
Five parallel surfaces touched:

- docs (EN + ES): README, USER-GUIDE, CLI-REFERENCE, and 11 internal
  design/planning docs
- landing pages: index + bookkeeper/revops/shopify-pet
- src: CLI module docstrings, _TOOL_DISPLAY dicts in cli_analyze.py
  and gui/components/_legacy.py, core module headers, every tool
  page's module docstring
- tests: class/method/module docstrings and section-header comments
- test-cases READMEs

Page slugs (1_Deduplicator etc.), tool_id strings (01_deduplicator
etc.), Python class names (TestDeduplicatorWorkflow, FeatureFlag.*),
URL paths, anchor IDs, CSS classes, and asset filenames were left
intact since they're code identifiers / structural references.

All 2033 tests pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-16 19:50:09 +00:00

195 lines
7.6 KiB
Python

"""Advanced-options panel tests.
``config_panel`` (in ``src.gui.components``) is the dedup-page's
expander that houses every per-column / per-strategy knob. It's the
densest single widget surface in the GUI, so a session-state key drift
in there cascades into every dedup session.
We exercise it via the Find Duplicates page (rendering ``config_panel``
in isolation requires a fake Streamlit context). The page provides
the surrounding state; we poke widgets and verify their effects.
"""
from __future__ import annotations
import pandas as pd
import pytest
from .conftest import stash_upload
GATED_PAGE = "1_Deduplicator"
def _render_page(app_factory, small_csv_bytes):
app = app_factory(GATED_PAGE)
stash_upload(app, name="messy.csv", data=small_csv_bytes)
app.run()
return app
# ---------------------------------------------------------------------------
# Expander presence + collapsed state
# ---------------------------------------------------------------------------
class TestAdvancedExpander:
def test_advanced_options_expander_renders(self, app_factory, small_csv_bytes):
app = _render_page(app_factory, small_csv_bytes)
labels = [e.label for e in app.expander]
assert any("Advanced Options" in lbl for lbl in labels), (
f"Advanced Options expander missing; expanders: {labels}"
)
# ---------------------------------------------------------------------------
# Algorithm selector
# ---------------------------------------------------------------------------
class TestAlgorithmSelector:
"""The fuzzy-algorithm dropdown drives ``Algorithm.{LEVENSHTEIN,
JARO_WINKLER, TOKEN_SET_RATIO}`` on every column. Default value
must be jaro_winkler — the strong-key build_default_strategies
assumes it."""
def test_default_algorithm_is_jaro_winkler(self, app_factory, small_csv_bytes):
app = _render_page(app_factory, small_csv_bytes)
# Find the selectbox by label.
algo_boxes = [
sb for sb in app.selectbox
if sb.label == "Fuzzy algorithm"
]
assert len(algo_boxes) == 1
assert algo_boxes[0].value == "jaro_winkler"
def test_algorithm_options_complete(self, app_factory, small_csv_bytes):
app = _render_page(app_factory, small_csv_bytes)
algo = next(sb for sb in app.selectbox if sb.label == "Fuzzy algorithm")
assert set(algo.options) == {
"jaro_winkler", "levenshtein", "token_set_ratio",
}
# ---------------------------------------------------------------------------
# Threshold slider
# ---------------------------------------------------------------------------
class TestThresholdSlider:
def test_default_threshold_is_85(self, app_factory, small_csv_bytes):
app = _render_page(app_factory, small_csv_bytes)
thresholds = [s for s in app.slider if "Similarity" in (s.label or "")]
assert len(thresholds) == 1
assert thresholds[0].value == 85
def test_threshold_bounds(self, app_factory, small_csv_bytes):
app = _render_page(app_factory, small_csv_bytes)
thr = next(s for s in app.slider if "Similarity" in (s.label or ""))
assert thr.min == 50
assert thr.max == 100
# ---------------------------------------------------------------------------
# Survivor rule selector
# ---------------------------------------------------------------------------
class TestSurvivorSelector:
def test_default_is_first(self, app_factory, small_csv_bytes):
app = _render_page(app_factory, small_csv_bytes)
surv = next(
sb for sb in app.selectbox
if sb.label == "Survivor rule"
)
assert surv.value == "first"
def test_all_four_rules_offered(self, app_factory, small_csv_bytes):
app = _render_page(app_factory, small_csv_bytes)
surv = next(sb for sb in app.selectbox if sb.label == "Survivor rule")
assert set(surv.options) == {
"first", "last", "most-complete", "most-recent",
}
def test_selecting_most_recent_does_not_crash(
self, app_factory, small_csv_bytes,
):
"""When ``most-recent`` is chosen the page should reveal a
Date column dropdown. Pin the no-crash invariant."""
app = _render_page(app_factory, small_csv_bytes)
surv = next(sb for sb in app.selectbox if sb.label == "Survivor rule")
surv.select("most-recent").run()
assert not app.exception
# ---------------------------------------------------------------------------
# Merge checkbox
# ---------------------------------------------------------------------------
class TestMergeCheckbox:
def test_merge_default_off(self, app_factory, small_csv_bytes):
app = _render_page(app_factory, small_csv_bytes)
merge_boxes = [c for c in app.checkbox if c.label == "Merge mode"]
assert len(merge_boxes) == 1
assert merge_boxes[0].value is False
def test_toggling_merge_does_not_crash(
self, app_factory, small_csv_bytes,
):
app = _render_page(app_factory, small_csv_bytes)
merge = next(c for c in app.checkbox if c.label == "Merge mode")
merge.check().run()
assert not app.exception
# After checking, the value persists in session_state via the
# widget's own key.
# ---------------------------------------------------------------------------
# Column multiselects
# ---------------------------------------------------------------------------
class TestColumnMultiselects:
"""Match-on / Strong-keys / Fuzzy multiselects use ``st.multiselect``
on every column. Empty default = auto-detect."""
def test_three_multiselects_present(self, app_factory, small_csv_bytes):
app = _render_page(app_factory, small_csv_bytes)
labels = {m.label for m in app.multiselect}
assert {"Match on columns", "Strong keys", "Fuzzy columns"} <= labels
def test_defaults_are_empty(self, app_factory, small_csv_bytes):
app = _render_page(app_factory, small_csv_bytes)
for ms in app.multiselect:
if ms.label in {
"Match on columns", "Strong keys", "Fuzzy columns",
}:
assert ms.value == [], (
f"{ms.label!r} default should be []; got {ms.value}"
)
def test_options_match_dataframe_columns(self, app_factory, small_csv_bytes):
app = _render_page(app_factory, small_csv_bytes)
df_cols = list(app.session_state["df"].columns)
for ms in app.multiselect:
if ms.label in {
"Match on columns", "Strong keys", "Fuzzy columns",
}:
assert list(ms.options) == df_cols
# ---------------------------------------------------------------------------
# Save / Load config
# ---------------------------------------------------------------------------
class TestConfigSaveLoadButtons:
def test_save_settings_button_present(self, app_factory, small_csv_bytes):
app = _render_page(app_factory, small_csv_bytes)
labels = [b.label for b in app.button]
assert any("Save current settings" in lbl for lbl in labels)
def test_config_file_uploader_present(self, app_factory, small_csv_bytes):
app = _render_page(app_factory, small_csv_bytes)
# AppTest exposes uploaders via ``app.file_uploader``. There
# are two: the main file (pickup-or-upload) and the config
# JSON. Make sure the config one is there.
labels = [u.label for u in app.file_uploader]
assert any("Load config profile" in lbl for lbl in labels), (
f"config uploader missing; uploaders: {labels}"
)