"""Pure-function tests for pipeline_modules phrasing helpers. These cover the adapter-key → tool bridge, the plain-English ``step_phrase`` wording, ``step_status`` pill levels, and the column-prose / pluralization helpers (``_fmt_cols`` / ``_n``). No Streamlit / AppTest needed — every symbol under test is a pure function over plain dicts/lists. """ from __future__ import annotations import pytest from src.core.pipeline import TOOL_NAMES from src.gui.components.pipeline_modules import ( CONFIG_RENDERERS, PIPELINE_TOOL_META, _fmt_cols, _n, step_label, step_phrase, step_status, ) # --------------------------------------------------------------------------- # Bridge completeness # --------------------------------------------------------------------------- @pytest.mark.parametrize("tool", TOOL_NAMES) def test_pipeline_tool_meta_covers_every_tool(tool): assert tool in PIPELINE_TOOL_META assert PIPELINE_TOOL_META[tool] # non-empty tool_id @pytest.mark.parametrize("tool", TOOL_NAMES) def test_step_label_is_friendly_and_not_the_raw_key(tool): label = step_label(tool) assert isinstance(label, str) assert label assert label != tool @pytest.mark.parametrize("tool", TOOL_NAMES) def test_every_tool_has_a_config_renderer(tool): assert tool in CONFIG_RENDERERS assert callable(CONFIG_RENDERERS[tool]) def test_step_label_falls_back_to_raw_key_for_unknown_tool(): assert step_label("not_a_tool") == "not_a_tool" # --------------------------------------------------------------------------- # step_phrase — populated + no-op cases for all five tools # --------------------------------------------------------------------------- def test_step_phrase_text_clean_populated_and_noop(): assert step_phrase("text_clean", { "cells_changed": 1204, "columns_processed": ["name", "city"], }) == "1,204 cells cleaned in name & city" assert step_phrase("text_clean", {"cells_changed": 0}) == "No changes needed." assert step_phrase("text_clean", {}) == "No changes needed." def test_step_phrase_format_standardize_populated_and_noop(): assert step_phrase("format_standardize", { "cells_changed": 50, "columns_processed": ["phone"], }) == "50 cells standardized in phone" # unparseable cells append a "left unchanged" tail assert step_phrase("format_standardize", { "cells_changed": 50, "cells_unparseable": 3, "columns_processed": ["phone"], }) == "50 cells standardized in phone (3 left unchanged)" assert step_phrase("format_standardize", {}) == "Nothing to standardize." assert step_phrase("format_standardize", { "cells_changed": 0, "cells_unparseable": 0, }) == "Nothing to standardize." def test_step_phrase_missing_populated_and_noop(): assert step_phrase("missing", { "cells_filled": 12, "rows_dropped": 4, "columns_dropped": ["x", "y"], }) == "12 cells filled, 4 rows dropped, 2 columns dropped" assert step_phrase("missing", {}) == "No missing values to handle." # sentinel-only flagging path assert step_phrase("missing", { "sentinels_standardized": 7, }) == "7 blank cells flagged" def test_step_phrase_column_map_populated_and_noop(): assert step_phrase("column_map", { "columns_renamed": 3, "columns_added": ["new"], "columns_dropped": ["old", "gone"], }) == "3 columns renamed, 1 column added, 2 columns dropped" assert step_phrase("column_map", {}) == "Columns already aligned." def test_step_phrase_dedup_mockup_case(): assert step_phrase("dedup", { "input_rows": 18442, "output_rows": 18130, "duplicates_removed": 312, "groups": 147, }) == "312 duplicates removed across 147 groups (18,442 → 18,130 rows)" def test_step_phrase_dedup_noop(): assert step_phrase("dedup", {"duplicates_removed": 0}) == "No duplicates found." assert step_phrase("dedup", {}) == "No duplicates found." # --------------------------------------------------------------------------- # Pluralization (_n) through step_phrase # --------------------------------------------------------------------------- def test_step_phrase_dedup_singular(): assert step_phrase("dedup", { "input_rows": 10, "output_rows": 9, "duplicates_removed": 1, "groups": 1, }) == "1 duplicate removed across 1 group (10 → 9 rows)" def test_step_phrase_missing_singular(): assert step_phrase("missing", { "rows_dropped": 1, "columns_dropped": ["x"], }) == "1 row dropped, 1 column dropped" def test_n_singular_vs_plural_every_noun(): assert _n(1, "cell") == "1 cell" assert _n(2, "cell") == "2 cells" assert _n(1, "row") == "1 row" assert _n(3, "row") == "3 rows" assert _n(1, "column") == "1 column" assert _n(5, "column") == "5 columns" assert _n(1, "duplicate") == "1 duplicate" assert _n(9, "duplicate") == "9 duplicates" assert _n(1, "group") == "1 group" assert _n(4, "group") == "4 groups" def test_n_thousands_separator(): assert _n(1204, "cell") == "1,204 cells" assert _n(18442, "row") == "18,442 rows" # --------------------------------------------------------------------------- # Column prose (_fmt_cols) # --------------------------------------------------------------------------- def test_fmt_cols_zero(): assert _fmt_cols([]) == "" def test_fmt_cols_one(): assert _fmt_cols(["name"]) == "name" def test_fmt_cols_two(): assert _fmt_cols(["name", "city"]) == "name & city" def test_fmt_cols_three(): assert _fmt_cols(["a", "b", "c"]) == "a, b & c" def test_fmt_cols_four_or_more(): assert _fmt_cols(["a", "b", "c", "d"]) == "a, b & 2 more" assert _fmt_cols(["a", "b", "c", "d", "e"]) == "a, b & 3 more" def test_fmt_cols_coerces_non_strings(): assert _fmt_cols([1, 2]) == "1 & 2" # --------------------------------------------------------------------------- # step_status — pill levels + details # --------------------------------------------------------------------------- def test_step_status_clean_is_ok(): assert step_status("text_clean", {"cells_changed": 5}) == ("✓ ok", "ok", "") def test_step_status_skipped(): label, level, detail = step_status("text_clean", {"cells_changed": 5}, skipped=True) assert level == "skipped" assert detail == "" assert "skipped" in label def test_step_status_error_uses_first_line_only(): label, level, detail = step_status( "dedup", {}, error="X: msg\nline2\nline3", ) assert level == "error" assert detail == "X: msg" assert "error" in label def test_step_status_error_takes_precedence_over_skipped(): label, level, detail = step_status( "text_clean", {}, skipped=True, error="boom\nsecond", ) assert level == "error" assert detail == "boom" def test_step_status_format_standardize_unparseable_warns(): label, level, detail = step_status( "format_standardize", {"cells_changed": 100, "cells_unparseable": 141}, ) assert level == "warn" assert "141 skipped" in label assert detail # non-empty inline detail def test_step_status_format_standardize_no_unparseable_is_ok(): assert step_status( "format_standardize", {"cells_changed": 100}, ) == ("✓ ok", "ok", "") def test_step_status_column_map_coercion_failures_warn(): label, level, detail = step_status( "column_map", {"coercion_failures": {"age": 4}}, ) assert level == "warn" assert "4 not coerced" in label assert detail def test_step_status_column_map_missing_required_targets_warn(): label, level, detail = step_status( "column_map", {"missing_required_targets": ["email"]}, ) assert level == "warn" assert "missing targets" in label assert "email" in detail def test_step_status_column_map_missing_targets_take_precedence_over_coercion(): # both present → missing-targets branch wins label, level, detail = step_status( "column_map", {"missing_required_targets": ["email"], "coercion_failures": {"age": 4}}, ) assert level == "warn" assert "missing targets" in label def test_step_status_unknown_tool_is_ok(): assert step_status("mystery", {"foo": 1}) == ("✓ ok", "ok", "")