Sweep follow-up to 93e43fc. Display labels now consistent across docs,
landing pages, CLI output, code comments, docstrings, and test prose.
Five parallel surfaces touched:
- docs (EN + ES): README, USER-GUIDE, CLI-REFERENCE, and 11 internal
design/planning docs
- landing pages: index + bookkeeper/revops/shopify-pet
- src: CLI module docstrings, _TOOL_DISPLAY dicts in cli_analyze.py
and gui/components/_legacy.py, core module headers, every tool
page's module docstring
- tests: class/method/module docstrings and section-header comments
- test-cases READMEs
Page slugs (1_Deduplicator etc.), tool_id strings (01_deduplicator
etc.), Python class names (TestDeduplicatorWorkflow, FeatureFlag.*),
URL paths, anchor IDs, CSS classes, and asset filenames were left
intact since they're code identifiers / structural references.
All 2033 tests pass.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
195 lines
7.6 KiB
Python
195 lines
7.6 KiB
Python
"""Advanced-options panel tests.
|
|
|
|
``config_panel`` (in ``src.gui.components``) is the dedup-page's
|
|
expander that houses every per-column / per-strategy knob. It's the
|
|
densest single widget surface in the GUI, so a session-state key drift
|
|
in there cascades into every dedup session.
|
|
|
|
We exercise it via the Find Duplicates page (rendering ``config_panel``
|
|
in isolation requires a fake Streamlit context). The page provides
|
|
the surrounding state; we poke widgets and verify their effects.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import pandas as pd
|
|
import pytest
|
|
|
|
from .conftest import stash_upload
|
|
|
|
|
|
GATED_PAGE = "1_Deduplicator"
|
|
|
|
|
|
def _render_page(app_factory, small_csv_bytes):
|
|
app = app_factory(GATED_PAGE)
|
|
stash_upload(app, name="messy.csv", data=small_csv_bytes)
|
|
app.run()
|
|
return app
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Expander presence + collapsed state
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestAdvancedExpander:
|
|
def test_advanced_options_expander_renders(self, app_factory, small_csv_bytes):
|
|
app = _render_page(app_factory, small_csv_bytes)
|
|
labels = [e.label for e in app.expander]
|
|
assert any("Advanced Options" in lbl for lbl in labels), (
|
|
f"Advanced Options expander missing; expanders: {labels}"
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Algorithm selector
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestAlgorithmSelector:
|
|
"""The fuzzy-algorithm dropdown drives ``Algorithm.{LEVENSHTEIN,
|
|
JARO_WINKLER, TOKEN_SET_RATIO}`` on every column. Default value
|
|
must be jaro_winkler — the strong-key build_default_strategies
|
|
assumes it."""
|
|
|
|
def test_default_algorithm_is_jaro_winkler(self, app_factory, small_csv_bytes):
|
|
app = _render_page(app_factory, small_csv_bytes)
|
|
# Find the selectbox by label.
|
|
algo_boxes = [
|
|
sb for sb in app.selectbox
|
|
if sb.label == "Fuzzy algorithm"
|
|
]
|
|
assert len(algo_boxes) == 1
|
|
assert algo_boxes[0].value == "jaro_winkler"
|
|
|
|
def test_algorithm_options_complete(self, app_factory, small_csv_bytes):
|
|
app = _render_page(app_factory, small_csv_bytes)
|
|
algo = next(sb for sb in app.selectbox if sb.label == "Fuzzy algorithm")
|
|
assert set(algo.options) == {
|
|
"jaro_winkler", "levenshtein", "token_set_ratio",
|
|
}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Threshold slider
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestThresholdSlider:
|
|
def test_default_threshold_is_85(self, app_factory, small_csv_bytes):
|
|
app = _render_page(app_factory, small_csv_bytes)
|
|
thresholds = [s for s in app.slider if "Similarity" in (s.label or "")]
|
|
assert len(thresholds) == 1
|
|
assert thresholds[0].value == 85
|
|
|
|
def test_threshold_bounds(self, app_factory, small_csv_bytes):
|
|
app = _render_page(app_factory, small_csv_bytes)
|
|
thr = next(s for s in app.slider if "Similarity" in (s.label or ""))
|
|
assert thr.min == 50
|
|
assert thr.max == 100
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Survivor rule selector
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestSurvivorSelector:
|
|
def test_default_is_first(self, app_factory, small_csv_bytes):
|
|
app = _render_page(app_factory, small_csv_bytes)
|
|
surv = next(
|
|
sb for sb in app.selectbox
|
|
if sb.label == "Survivor rule"
|
|
)
|
|
assert surv.value == "first"
|
|
|
|
def test_all_four_rules_offered(self, app_factory, small_csv_bytes):
|
|
app = _render_page(app_factory, small_csv_bytes)
|
|
surv = next(sb for sb in app.selectbox if sb.label == "Survivor rule")
|
|
assert set(surv.options) == {
|
|
"first", "last", "most-complete", "most-recent",
|
|
}
|
|
|
|
def test_selecting_most_recent_does_not_crash(
|
|
self, app_factory, small_csv_bytes,
|
|
):
|
|
"""When ``most-recent`` is chosen the page should reveal a
|
|
Date column dropdown. Pin the no-crash invariant."""
|
|
app = _render_page(app_factory, small_csv_bytes)
|
|
surv = next(sb for sb in app.selectbox if sb.label == "Survivor rule")
|
|
surv.select("most-recent").run()
|
|
assert not app.exception
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Merge checkbox
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestMergeCheckbox:
|
|
def test_merge_default_off(self, app_factory, small_csv_bytes):
|
|
app = _render_page(app_factory, small_csv_bytes)
|
|
merge_boxes = [c for c in app.checkbox if c.label == "Merge mode"]
|
|
assert len(merge_boxes) == 1
|
|
assert merge_boxes[0].value is False
|
|
|
|
def test_toggling_merge_does_not_crash(
|
|
self, app_factory, small_csv_bytes,
|
|
):
|
|
app = _render_page(app_factory, small_csv_bytes)
|
|
merge = next(c for c in app.checkbox if c.label == "Merge mode")
|
|
merge.check().run()
|
|
assert not app.exception
|
|
# After checking, the value persists in session_state via the
|
|
# widget's own key.
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Column multiselects
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestColumnMultiselects:
|
|
"""Match-on / Strong-keys / Fuzzy multiselects use ``st.multiselect``
|
|
on every column. Empty default = auto-detect."""
|
|
|
|
def test_three_multiselects_present(self, app_factory, small_csv_bytes):
|
|
app = _render_page(app_factory, small_csv_bytes)
|
|
labels = {m.label for m in app.multiselect}
|
|
assert {"Match on columns", "Strong keys", "Fuzzy columns"} <= labels
|
|
|
|
def test_defaults_are_empty(self, app_factory, small_csv_bytes):
|
|
app = _render_page(app_factory, small_csv_bytes)
|
|
for ms in app.multiselect:
|
|
if ms.label in {
|
|
"Match on columns", "Strong keys", "Fuzzy columns",
|
|
}:
|
|
assert ms.value == [], (
|
|
f"{ms.label!r} default should be []; got {ms.value}"
|
|
)
|
|
|
|
def test_options_match_dataframe_columns(self, app_factory, small_csv_bytes):
|
|
app = _render_page(app_factory, small_csv_bytes)
|
|
df_cols = list(app.session_state["df"].columns)
|
|
for ms in app.multiselect:
|
|
if ms.label in {
|
|
"Match on columns", "Strong keys", "Fuzzy columns",
|
|
}:
|
|
assert list(ms.options) == df_cols
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Save / Load config
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestConfigSaveLoadButtons:
|
|
def test_save_settings_button_present(self, app_factory, small_csv_bytes):
|
|
app = _render_page(app_factory, small_csv_bytes)
|
|
labels = [b.label for b in app.button]
|
|
assert any("Save current settings" in lbl for lbl in labels)
|
|
|
|
def test_config_file_uploader_present(self, app_factory, small_csv_bytes):
|
|
app = _render_page(app_factory, small_csv_bytes)
|
|
# AppTest exposes uploaders via ``app.file_uploader``. There
|
|
# are two: the main file (pickup-or-upload) and the config
|
|
# JSON. Make sure the config one is there.
|
|
labels = [u.label for u in app.file_uploader]
|
|
assert any("Load config profile" in lbl for lbl in labels), (
|
|
f"config uploader missing; uploaders: {labels}"
|
|
)
|