"""Advanced-options panel tests. ``config_panel`` (in ``src.gui.components``) is the dedup-page's expander that houses every per-column / per-strategy knob. It's the densest single widget surface in the GUI, so a session-state key drift in there cascades into every dedup session. We exercise it via the Find Duplicates page (rendering ``config_panel`` in isolation requires a fake Streamlit context). The page provides the surrounding state; we poke widgets and verify their effects. """ from __future__ import annotations import pandas as pd import pytest from .conftest import stash_upload GATED_PAGE = "1_Deduplicator" def _render_page(app_factory, small_csv_bytes): app = app_factory(GATED_PAGE) stash_upload(app, name="messy.csv", data=small_csv_bytes) app.run() return app # --------------------------------------------------------------------------- # Expander presence + collapsed state # --------------------------------------------------------------------------- class TestAdvancedExpander: def test_advanced_options_expander_renders(self, app_factory, small_csv_bytes): app = _render_page(app_factory, small_csv_bytes) labels = [e.label for e in app.expander] assert any("Advanced Options" in lbl for lbl in labels), ( f"Advanced Options expander missing; expanders: {labels}" ) # --------------------------------------------------------------------------- # Algorithm selector # --------------------------------------------------------------------------- class TestAlgorithmSelector: """The fuzzy-algorithm dropdown drives ``Algorithm.{LEVENSHTEIN, JARO_WINKLER, TOKEN_SET_RATIO}`` on every column. Default value must be jaro_winkler — the strong-key build_default_strategies assumes it.""" def test_default_algorithm_is_jaro_winkler(self, app_factory, small_csv_bytes): app = _render_page(app_factory, small_csv_bytes) # Find the selectbox by label. algo_boxes = [ sb for sb in app.selectbox if sb.label == "Fuzzy algorithm" ] assert len(algo_boxes) == 1 assert algo_boxes[0].value == "jaro_winkler" def test_algorithm_options_complete(self, app_factory, small_csv_bytes): app = _render_page(app_factory, small_csv_bytes) algo = next(sb for sb in app.selectbox if sb.label == "Fuzzy algorithm") assert set(algo.options) == { "jaro_winkler", "levenshtein", "token_set_ratio", } # --------------------------------------------------------------------------- # Threshold slider # --------------------------------------------------------------------------- class TestThresholdSlider: def test_default_threshold_is_85(self, app_factory, small_csv_bytes): app = _render_page(app_factory, small_csv_bytes) thresholds = [s for s in app.slider if "Similarity" in (s.label or "")] assert len(thresholds) == 1 assert thresholds[0].value == 85 def test_threshold_bounds(self, app_factory, small_csv_bytes): app = _render_page(app_factory, small_csv_bytes) thr = next(s for s in app.slider if "Similarity" in (s.label or "")) assert thr.min == 50 assert thr.max == 100 # --------------------------------------------------------------------------- # Survivor rule selector # --------------------------------------------------------------------------- class TestSurvivorSelector: def test_default_is_first(self, app_factory, small_csv_bytes): app = _render_page(app_factory, small_csv_bytes) surv = next( sb for sb in app.selectbox if sb.label == "Survivor rule" ) assert surv.value == "first" def test_all_four_rules_offered(self, app_factory, small_csv_bytes): app = _render_page(app_factory, small_csv_bytes) surv = next(sb for sb in app.selectbox if sb.label == "Survivor rule") assert set(surv.options) == { "first", "last", "most-complete", "most-recent", } def test_selecting_most_recent_does_not_crash( self, app_factory, small_csv_bytes, ): """When ``most-recent`` is chosen the page should reveal a Date column dropdown. Pin the no-crash invariant.""" app = _render_page(app_factory, small_csv_bytes) surv = next(sb for sb in app.selectbox if sb.label == "Survivor rule") surv.select("most-recent").run() assert not app.exception # --------------------------------------------------------------------------- # Merge checkbox # --------------------------------------------------------------------------- class TestMergeCheckbox: def test_merge_default_off(self, app_factory, small_csv_bytes): app = _render_page(app_factory, small_csv_bytes) merge_boxes = [c for c in app.checkbox if c.label == "Merge mode"] assert len(merge_boxes) == 1 assert merge_boxes[0].value is False def test_toggling_merge_does_not_crash( self, app_factory, small_csv_bytes, ): app = _render_page(app_factory, small_csv_bytes) merge = next(c for c in app.checkbox if c.label == "Merge mode") merge.check().run() assert not app.exception # After checking, the value persists in session_state via the # widget's own key. # --------------------------------------------------------------------------- # Column multiselects # --------------------------------------------------------------------------- class TestColumnMultiselects: """Match-on / Strong-keys / Fuzzy multiselects use ``st.multiselect`` on every column. Empty default = auto-detect.""" def test_three_multiselects_present(self, app_factory, small_csv_bytes): app = _render_page(app_factory, small_csv_bytes) labels = {m.label for m in app.multiselect} assert {"Match on columns", "Strong keys", "Fuzzy columns"} <= labels def test_defaults_are_empty(self, app_factory, small_csv_bytes): app = _render_page(app_factory, small_csv_bytes) for ms in app.multiselect: if ms.label in { "Match on columns", "Strong keys", "Fuzzy columns", }: assert ms.value == [], ( f"{ms.label!r} default should be []; got {ms.value}" ) def test_options_match_dataframe_columns(self, app_factory, small_csv_bytes): app = _render_page(app_factory, small_csv_bytes) df_cols = list(app.session_state["df"].columns) for ms in app.multiselect: if ms.label in { "Match on columns", "Strong keys", "Fuzzy columns", }: assert list(ms.options) == df_cols # --------------------------------------------------------------------------- # Save / Load config # --------------------------------------------------------------------------- class TestConfigSaveLoadButtons: def test_save_settings_button_present(self, app_factory, small_csv_bytes): app = _render_page(app_factory, small_csv_bytes) labels = [b.label for b in app.button] assert any("Save current settings" in lbl for lbl in labels) def test_config_file_uploader_present(self, app_factory, small_csv_bytes): app = _render_page(app_factory, small_csv_bytes) # AppTest exposes uploaders via ``app.file_uploader``. There # are two: the main file (pickup-or-upload) and the config # JSON. Make sure the config one is there. labels = [u.label for u in app.file_uploader] assert any("Load config profile" in lbl for lbl in labels), ( f"config uploader missing; uploaders: {labels}" )