Files
datatools-dev/tests/gui/test_pipeline_phrasing.py
Michael 38616d69e2 test(pipeline): complete automated test suite for the pipeline feature
Adds ~115 tests pinning the Automated Workflows feature end to end:

- tests/test_pipeline.py (+43): per-adapter summary correctness on known
  inputs, multi-step data flow, error stop/continue contract, empty /
  single-column / all-disabled edges, dict+file serialization round-trips,
  recommended_pipeline(include=…), and a synthesized demo integration run.
- tests/test_cli_pipeline.py (new, 21): --recommend, dry-run-by-default,
  --apply output CSV + audit JSON, --steps, --strict abort, arg validation,
  --continue-on-error vs halt, and a save→load round-trip. Invokes the Typer
  app directly to bypass the license guard (house pattern).
- tests/gui/test_pipeline_builder.py (+9): reorder ▲/▼, disabled edge
  buttons, disabled-step persistence across reorder, restore-recommended,
  Advanced JSON export/import, and per-tool Configure panels emitting the
  correct option dicts (AppTest).
- tests/gui/test_pipeline_phrasing.py (new, 30): step_phrase/step_status and
  the adapter-key→friendly-name bridge as pure functions, incl. pluralization,
  column prose, and warn/error status derivation.

Full suite: 2565 passed, 91 skipped. No product bugs surfaced. Documents the
coverage in docs/DEVELOPER.md (test tree + a pipeline-coverage note).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-22 18:31:15 +00:00

255 lines
8.1 KiB
Python

"""Pure-function tests for pipeline_modules phrasing helpers.
These cover the adapter-key → tool bridge, the plain-English ``step_phrase``
wording, ``step_status`` pill levels, and the column-prose / pluralization
helpers (``_fmt_cols`` / ``_n``). No Streamlit / AppTest needed — every symbol
under test is a pure function over plain dicts/lists.
"""
from __future__ import annotations
import pytest
from src.core.pipeline import TOOL_NAMES
from src.gui.components.pipeline_modules import (
CONFIG_RENDERERS,
PIPELINE_TOOL_META,
_fmt_cols,
_n,
step_label,
step_phrase,
step_status,
)
# ---------------------------------------------------------------------------
# Bridge completeness
# ---------------------------------------------------------------------------
@pytest.mark.parametrize("tool", TOOL_NAMES)
def test_pipeline_tool_meta_covers_every_tool(tool):
assert tool in PIPELINE_TOOL_META
assert PIPELINE_TOOL_META[tool] # non-empty tool_id
@pytest.mark.parametrize("tool", TOOL_NAMES)
def test_step_label_is_friendly_and_not_the_raw_key(tool):
label = step_label(tool)
assert isinstance(label, str)
assert label
assert label != tool
@pytest.mark.parametrize("tool", TOOL_NAMES)
def test_every_tool_has_a_config_renderer(tool):
assert tool in CONFIG_RENDERERS
assert callable(CONFIG_RENDERERS[tool])
def test_step_label_falls_back_to_raw_key_for_unknown_tool():
assert step_label("not_a_tool") == "not_a_tool"
# ---------------------------------------------------------------------------
# step_phrase — populated + no-op cases for all five tools
# ---------------------------------------------------------------------------
def test_step_phrase_text_clean_populated_and_noop():
assert step_phrase("text_clean", {
"cells_changed": 1204, "columns_processed": ["name", "city"],
}) == "1,204 cells cleaned in name & city"
assert step_phrase("text_clean", {"cells_changed": 0}) == "No changes needed."
assert step_phrase("text_clean", {}) == "No changes needed."
def test_step_phrase_format_standardize_populated_and_noop():
assert step_phrase("format_standardize", {
"cells_changed": 50, "columns_processed": ["phone"],
}) == "50 cells standardized in phone"
# unparseable cells append a "left unchanged" tail
assert step_phrase("format_standardize", {
"cells_changed": 50, "cells_unparseable": 3, "columns_processed": ["phone"],
}) == "50 cells standardized in phone (3 left unchanged)"
assert step_phrase("format_standardize", {}) == "Nothing to standardize."
assert step_phrase("format_standardize", {
"cells_changed": 0, "cells_unparseable": 0,
}) == "Nothing to standardize."
def test_step_phrase_missing_populated_and_noop():
assert step_phrase("missing", {
"cells_filled": 12, "rows_dropped": 4, "columns_dropped": ["x", "y"],
}) == "12 cells filled, 4 rows dropped, 2 columns dropped"
assert step_phrase("missing", {}) == "No missing values to handle."
# sentinel-only flagging path
assert step_phrase("missing", {
"sentinels_standardized": 7,
}) == "7 blank cells flagged"
def test_step_phrase_column_map_populated_and_noop():
assert step_phrase("column_map", {
"columns_renamed": 3, "columns_added": ["new"], "columns_dropped": ["old", "gone"],
}) == "3 columns renamed, 1 column added, 2 columns dropped"
assert step_phrase("column_map", {}) == "Columns already aligned."
def test_step_phrase_dedup_mockup_case():
assert step_phrase("dedup", {
"input_rows": 18442, "output_rows": 18130,
"duplicates_removed": 312, "groups": 147,
}) == "312 duplicates removed across 147 groups (18,442 → 18,130 rows)"
def test_step_phrase_dedup_noop():
assert step_phrase("dedup", {"duplicates_removed": 0}) == "No duplicates found."
assert step_phrase("dedup", {}) == "No duplicates found."
# ---------------------------------------------------------------------------
# Pluralization (_n) through step_phrase
# ---------------------------------------------------------------------------
def test_step_phrase_dedup_singular():
assert step_phrase("dedup", {
"input_rows": 10, "output_rows": 9,
"duplicates_removed": 1, "groups": 1,
}) == "1 duplicate removed across 1 group (10 → 9 rows)"
def test_step_phrase_missing_singular():
assert step_phrase("missing", {
"rows_dropped": 1, "columns_dropped": ["x"],
}) == "1 row dropped, 1 column dropped"
def test_n_singular_vs_plural_every_noun():
assert _n(1, "cell") == "1 cell"
assert _n(2, "cell") == "2 cells"
assert _n(1, "row") == "1 row"
assert _n(3, "row") == "3 rows"
assert _n(1, "column") == "1 column"
assert _n(5, "column") == "5 columns"
assert _n(1, "duplicate") == "1 duplicate"
assert _n(9, "duplicate") == "9 duplicates"
assert _n(1, "group") == "1 group"
assert _n(4, "group") == "4 groups"
def test_n_thousands_separator():
assert _n(1204, "cell") == "1,204 cells"
assert _n(18442, "row") == "18,442 rows"
# ---------------------------------------------------------------------------
# Column prose (_fmt_cols)
# ---------------------------------------------------------------------------
def test_fmt_cols_zero():
assert _fmt_cols([]) == ""
def test_fmt_cols_one():
assert _fmt_cols(["name"]) == "name"
def test_fmt_cols_two():
assert _fmt_cols(["name", "city"]) == "name & city"
def test_fmt_cols_three():
assert _fmt_cols(["a", "b", "c"]) == "a, b & c"
def test_fmt_cols_four_or_more():
assert _fmt_cols(["a", "b", "c", "d"]) == "a, b & 2 more"
assert _fmt_cols(["a", "b", "c", "d", "e"]) == "a, b & 3 more"
def test_fmt_cols_coerces_non_strings():
assert _fmt_cols([1, 2]) == "1 & 2"
# ---------------------------------------------------------------------------
# step_status — pill levels + details
# ---------------------------------------------------------------------------
def test_step_status_clean_is_ok():
assert step_status("text_clean", {"cells_changed": 5}) == ("✓ ok", "ok", "")
def test_step_status_skipped():
label, level, detail = step_status("text_clean", {"cells_changed": 5}, skipped=True)
assert level == "skipped"
assert detail == ""
assert "skipped" in label
def test_step_status_error_uses_first_line_only():
label, level, detail = step_status(
"dedup", {}, error="X: msg\nline2\nline3",
)
assert level == "error"
assert detail == "X: msg"
assert "error" in label
def test_step_status_error_takes_precedence_over_skipped():
label, level, detail = step_status(
"text_clean", {}, skipped=True, error="boom\nsecond",
)
assert level == "error"
assert detail == "boom"
def test_step_status_format_standardize_unparseable_warns():
label, level, detail = step_status(
"format_standardize", {"cells_changed": 100, "cells_unparseable": 141},
)
assert level == "warn"
assert "141 skipped" in label
assert detail # non-empty inline detail
def test_step_status_format_standardize_no_unparseable_is_ok():
assert step_status(
"format_standardize", {"cells_changed": 100},
) == ("✓ ok", "ok", "")
def test_step_status_column_map_coercion_failures_warn():
label, level, detail = step_status(
"column_map", {"coercion_failures": {"age": 4}},
)
assert level == "warn"
assert "4 not coerced" in label
assert detail
def test_step_status_column_map_missing_required_targets_warn():
label, level, detail = step_status(
"column_map", {"missing_required_targets": ["email"]},
)
assert level == "warn"
assert "missing targets" in label
assert "email" in detail
def test_step_status_column_map_missing_targets_take_precedence_over_coercion():
# both present → missing-targets branch wins
label, level, detail = step_status(
"column_map",
{"missing_required_targets": ["email"], "coercion_failures": {"age": 4}},
)
assert level == "warn"
assert "missing targets" in label
def test_step_status_unknown_tool_is_ok():
assert step_status("mystery", {"foo": 1}) == ("✓ ok", "ok", "")