Adds ~115 tests pinning the Automated Workflows feature end to end: - tests/test_pipeline.py (+43): per-adapter summary correctness on known inputs, multi-step data flow, error stop/continue contract, empty / single-column / all-disabled edges, dict+file serialization round-trips, recommended_pipeline(include=…), and a synthesized demo integration run. - tests/test_cli_pipeline.py (new, 21): --recommend, dry-run-by-default, --apply output CSV + audit JSON, --steps, --strict abort, arg validation, --continue-on-error vs halt, and a save→load round-trip. Invokes the Typer app directly to bypass the license guard (house pattern). - tests/gui/test_pipeline_builder.py (+9): reorder ▲/▼, disabled edge buttons, disabled-step persistence across reorder, restore-recommended, Advanced JSON export/import, and per-tool Configure panels emitting the correct option dicts (AppTest). - tests/gui/test_pipeline_phrasing.py (new, 30): step_phrase/step_status and the adapter-key→friendly-name bridge as pure functions, incl. pluralization, column prose, and warn/error status derivation. Full suite: 2565 passed, 91 skipped. No product bugs surfaced. Documents the coverage in docs/DEVELOPER.md (test tree + a pipeline-coverage note). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
282 lines
11 KiB
Python
282 lines
11 KiB
Python
"""Pipeline Runner — visual module-card builder contract (AppTest).
|
|
|
|
Pins the behaviors the JSON-table → module-card rewrite introduced:
|
|
recommended steps seed as cards with friendly names, each step exposes a
|
|
plain-language Configure panel (no raw per-row JSON), steps can be toggled /
|
|
added / removed, JSON lives only under Advanced, and a run produces results
|
|
with friendly step names. The page's bare initial-render contract across junk
|
|
files is covered separately in ``tests/test_junk_corpus_tool_pages.py``.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
from streamlit.testing.v1 import AppTest
|
|
|
|
_PAGE = (
|
|
Path(__file__).resolve().parent.parent.parent
|
|
/ "src" / "gui" / "pages" / "9_Pipeline_Runner.py"
|
|
)
|
|
|
|
_CSV = (
|
|
b"name,email,phone,signup_date\n"
|
|
b" Jane Doe ,jane@acme.io,512-555-0190,2024-01-04\n"
|
|
b"jane doe,JANE@ACME.IO,(512) 555-0190,01/04/2024\n"
|
|
b"Bob Smith,bob@globex.com,720.555.7781,2024-02-11\n"
|
|
)
|
|
|
|
|
|
def _app() -> AppTest:
|
|
at = AppTest.from_file(str(_PAGE), default_timeout=30)
|
|
at.session_state["home_uploaded_bytes"] = _CSV
|
|
at.session_state["home_uploaded_name"] = "customers.csv"
|
|
at.session_state["home_uploaded_size"] = len(_CSV)
|
|
return at.run()
|
|
|
|
|
|
def test_recommended_steps_seed_as_named_cards():
|
|
at = _app()
|
|
assert not at.exception
|
|
tools = [s["tool"] for s in at.session_state["pipeline_steps"]]
|
|
assert tools == ["text_clean", "format_standardize", "missing", "dedup"]
|
|
md = " ".join(m.value for m in at.markdown)
|
|
for friendly in ("Clean Text", "Standardize Formats",
|
|
"Fix Missing Values", "Find Duplicates"):
|
|
assert friendly in md
|
|
|
|
|
|
def test_each_step_has_a_configure_panel_and_json_is_advanced_only():
|
|
at = _app()
|
|
labels = [e.label for e in at.get("expander")]
|
|
assert any(l.startswith("Configure: Clean Text") for l in labels)
|
|
assert any(l.startswith("Configure: Find Duplicates") for l in labels)
|
|
# Raw JSON is import/export only — never a per-step editing surface.
|
|
assert any("Advanced — import / export" in l for l in labels)
|
|
|
|
|
|
def test_toggle_disables_step_and_persists():
|
|
at = _app()
|
|
at.toggle[0].set_value(False).run()
|
|
assert at.session_state["pipeline_steps"][0]["enabled"] is False
|
|
|
|
|
|
def test_add_step_appends_a_working_config_panel():
|
|
at = _app()
|
|
[s for s in at.selectbox if s.key == "pipeline_add_tool"][0].set_value("column_map").run()
|
|
[b for b in at.button if "Add step" in b.label][0].click().run()
|
|
assert not at.exception
|
|
assert at.session_state["pipeline_steps"][-1]["tool"] == "column_map"
|
|
labels = [e.label for e in at.get("expander")]
|
|
assert any(l.startswith("Configure: Map Columns") for l in labels)
|
|
|
|
|
|
def test_remove_step_drops_it():
|
|
at = _app()
|
|
before = len(at.session_state["pipeline_steps"])
|
|
# The first ✕ remove button in the card stack.
|
|
[b for b in at.button if b.label == "✕"][0].click().run()
|
|
assert not at.exception
|
|
assert len(at.session_state["pipeline_steps"]) == before - 1
|
|
|
|
|
|
def test_run_produces_results_with_friendly_names():
|
|
at = _app()
|
|
[b for b in at.button if b.label == "Run Pipeline"][0].click().run()
|
|
assert not at.exception, at.exception
|
|
assert "pipeline_result" in at.session_state
|
|
res = at.session_state["pipeline_result"]
|
|
assert res.initial_rows == 3 and res.final_rows == 2 # the two Jane rows merge
|
|
assert all(sr.error is None for sr in res.step_results)
|
|
|
|
|
|
def test_step_phrase_is_plain_english_not_json():
|
|
from src.gui.components.pipeline_modules import step_phrase, step_status
|
|
|
|
# dedup phrasing mirrors the design mockup wording exactly.
|
|
phrase = step_phrase("dedup", {
|
|
"input_rows": 18442, "output_rows": 18130,
|
|
"duplicates_removed": 312, "groups": 147,
|
|
})
|
|
assert phrase == "312 duplicates removed across 147 groups (18,442 → 18,130 rows)"
|
|
|
|
# text_clean lists affected columns in prose, with thousands separators.
|
|
assert step_phrase("text_clean", {
|
|
"cells_changed": 1204, "columns_processed": ["name", "city"],
|
|
}) == "1,204 cells cleaned in name & city"
|
|
|
|
# singular nouns pluralize correctly
|
|
assert step_phrase("missing", {"rows_dropped": 1, "columns_dropped": ["x"]}) == \
|
|
"1 row dropped, 1 column dropped"
|
|
|
|
# unparseable cells downgrade the pill to warn with an inline detail
|
|
label, level, detail = step_status(
|
|
"format_standardize", {"cells_changed": 100, "cells_unparseable": 141},
|
|
)
|
|
assert level == "warn" and "141 skipped" in label and detail
|
|
|
|
# a clean step is "ok" with no detail
|
|
assert step_status("text_clean", {"cells_changed": 5})[1] == "ok"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers for the reorder / config tests below
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _ids(at) -> dict:
|
|
"""Map tool name → that step's stable id (assumes unique tools)."""
|
|
return {s["tool"]: s["id"] for s in at.session_state["pipeline_steps"]}
|
|
|
|
|
|
def _tools(at) -> list:
|
|
return [s["tool"] for s in at.session_state["pipeline_steps"]]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Reorder
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_reorder_down_swaps_with_next_step():
|
|
at = _app()
|
|
sid = _ids(at)["text_clean"]
|
|
before = _tools(at)
|
|
assert before == ["text_clean", "format_standardize", "missing", "dedup"]
|
|
[b for b in at.button if b.key == f"text_clean_{sid}_down"][0].click().run()
|
|
assert not at.exception
|
|
assert _tools(at) == ["format_standardize", "text_clean", "missing", "dedup"]
|
|
|
|
|
|
def test_reorder_up_swaps_with_previous_step():
|
|
at = _app()
|
|
sid = _ids(at)["missing"]
|
|
[b for b in at.button if b.key == f"missing_{sid}_up"][0].click().run()
|
|
assert not at.exception
|
|
assert _tools(at) == ["text_clean", "missing", "format_standardize", "dedup"]
|
|
|
|
|
|
def test_first_up_and_last_down_buttons_are_disabled():
|
|
at = _app()
|
|
ids = _ids(at)
|
|
first_up = [b for b in at.button if b.key == f"text_clean_{ids['text_clean']}_up"][0]
|
|
last_down = [b for b in at.button if b.key == f"dedup_{ids['dedup']}_down"][0]
|
|
assert first_up.disabled is True
|
|
assert last_down.disabled is True
|
|
# interior steps are freely movable
|
|
mid_up = [b for b in at.button if b.key == f"missing_{ids['missing']}_up"][0]
|
|
assert mid_up.disabled is False
|
|
|
|
|
|
def test_disabled_step_stays_disabled_after_reorder():
|
|
at = _app()
|
|
sid = _ids(at)["text_clean"]
|
|
at.toggle[0].set_value(False).run()
|
|
assert at.session_state["pipeline_steps"][0]["enabled"] is False
|
|
# move the now-disabled first step down one slot
|
|
[b for b in at.button if b.key == f"text_clean_{sid}_down"][0].click().run()
|
|
assert not at.exception
|
|
steps = at.session_state["pipeline_steps"]
|
|
moved = [s for s in steps if s["tool"] == "text_clean"][0]
|
|
assert steps.index(moved) == 1 # it moved
|
|
assert moved["enabled"] is False # ...and stayed disabled
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Restore recommended steps
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_restore_recommended_steps_button():
|
|
at = _app()
|
|
# Diverge from the recommended default by removing a step.
|
|
[b for b in at.button if b.label == "✕"][0].click().run()
|
|
assert _tools(at) == ["format_standardize", "missing", "dedup"]
|
|
restore = [b for b in at.button if "Restore recommended steps" in b.label]
|
|
assert len(restore) == 1
|
|
restore[0].click().run()
|
|
assert not at.exception
|
|
assert _tools(at) == ["text_clean", "format_standardize", "missing", "dedup"]
|
|
|
|
|
|
def test_restore_button_absent_when_steps_match_default():
|
|
at = _app()
|
|
# Untouched recommended steps → no restore prompt.
|
|
assert not [b for b in at.button if "Restore recommended steps" in b.label]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Advanced JSON export / import
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_advanced_json_export_reflects_current_steps():
|
|
at = _app()
|
|
exported = json.loads(at.code[0].value)
|
|
assert [s["tool"] for s in exported["steps"]] == \
|
|
["text_clean", "format_standardize", "missing", "dedup"]
|
|
# Remove a step and confirm the exported JSON drops it too.
|
|
[b for b in at.button if b.label == "✕"][0].click().run()
|
|
exported = json.loads(at.code[0].value)
|
|
assert [s["tool"] for s in exported["steps"]] == \
|
|
["format_standardize", "missing", "dedup"]
|
|
|
|
|
|
def test_load_pasted_json_replaces_the_step_list():
|
|
at = _app()
|
|
one_step = json.dumps(
|
|
{"steps": [{"tool": "dedup", "options": {}, "enabled": True}]}
|
|
)
|
|
[t for t in at.text_area if t.key == "pipeline_json_paste"][0].set_value(
|
|
one_step
|
|
).run()
|
|
[b for b in at.button if b.label == "Load pasted JSON"][0].click().run()
|
|
assert not at.exception
|
|
assert _tools(at) == ["dedup"]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Config renderers emit the right options
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_format_standardize_config_emits_column_types():
|
|
at = _app()
|
|
fid = _ids(at)["format_standardize"]
|
|
[s for s in at.selectbox if s.key == f"format_standardize_{fid}_fmt__phone"][0] \
|
|
.set_value("Phone number").run()
|
|
[b for b in at.button if b.label == "Run Pipeline"][0].click().run()
|
|
assert not at.exception
|
|
step = [s for s in at.session_state["pipeline_steps"]
|
|
if s["tool"] == "format_standardize"][0]
|
|
assert step["options"]["column_types"].get("phone") == "phone"
|
|
|
|
|
|
def test_missing_config_drop_radio_emits_drop_row_strategy():
|
|
at = _app()
|
|
mid = _ids(at)["missing"]
|
|
[r for r in at.radio if r.key == f"missing_{mid}_strategy"][0] \
|
|
.set_value("Drop rows that have any blank").run()
|
|
[b for b in at.button if b.label == "Run Pipeline"][0].click().run()
|
|
assert not at.exception
|
|
step = [s for s in at.session_state["pipeline_steps"]
|
|
if s["tool"] == "missing"][0]
|
|
assert step["options"]["strategy"] == "drop_row"
|
|
|
|
|
|
def test_dedup_config_multiselect_builds_strategies():
|
|
at = _app()
|
|
did = _ids(at)["dedup"]
|
|
[m for m in at.multiselect if m.key == f"dedup_{did}_matchcols"][0] \
|
|
.set_value(["email"]).run()
|
|
[b for b in at.button if b.label == "Run Pipeline"][0].click().run()
|
|
assert not at.exception
|
|
step = [s for s in at.session_state["pipeline_steps"]
|
|
if s["tool"] == "dedup"][0]
|
|
strategies = step["options"]["strategies"]
|
|
cols = [c["column"] for c in strategies[0]["columns"]]
|
|
assert cols == ["email"]
|
|
assert strategies[0]["columns"][0]["algorithm"] == "exact"
|