"""Pipeline Runner — visual module-card builder contract (AppTest). Pins the behaviors the JSON-table → module-card rewrite introduced: recommended steps seed as cards with friendly names, each step exposes a plain-language Configure panel (no raw per-row JSON), steps can be toggled / added / removed, JSON lives only under Advanced, and a run produces results with friendly step names. The page's bare initial-render contract across junk files is covered separately in ``tests/test_junk_corpus_tool_pages.py``. """ from __future__ import annotations import json from pathlib import Path import pytest from streamlit.testing.v1 import AppTest _PAGE = ( Path(__file__).resolve().parent.parent.parent / "src" / "gui" / "pages" / "9_Pipeline_Runner.py" ) _CSV = ( b"name,email,phone,signup_date\n" b" Jane Doe ,jane@acme.io,512-555-0190,2024-01-04\n" b"jane doe,JANE@ACME.IO,(512) 555-0190,01/04/2024\n" b"Bob Smith,bob@globex.com,720.555.7781,2024-02-11\n" ) def _app() -> AppTest: at = AppTest.from_file(str(_PAGE), default_timeout=30) at.session_state["home_uploaded_bytes"] = _CSV at.session_state["home_uploaded_name"] = "customers.csv" at.session_state["home_uploaded_size"] = len(_CSV) return at.run() def test_recommended_steps_seed_as_named_cards(): at = _app() assert not at.exception tools = [s["tool"] for s in at.session_state["pipeline_steps"]] assert tools == ["text_clean", "format_standardize", "missing", "dedup"] md = " ".join(m.value for m in at.markdown) for friendly in ("Clean Text", "Standardize Formats", "Fix Missing Values", "Find Duplicates"): assert friendly in md def test_each_step_has_a_configure_panel_and_json_is_advanced_only(): at = _app() labels = [e.label for e in at.get("expander")] assert any(l.startswith("Configure: Clean Text") for l in labels) assert any(l.startswith("Configure: Find Duplicates") for l in labels) # Raw JSON is import/export only — never a per-step editing surface. assert any("Advanced — import / export" in l for l in labels) def test_toggle_disables_step_and_persists(): at = _app() at.toggle[0].set_value(False).run() assert at.session_state["pipeline_steps"][0]["enabled"] is False def test_add_step_appends_a_working_config_panel(): at = _app() [s for s in at.selectbox if s.key == "pipeline_add_tool"][0].set_value("column_map").run() [b for b in at.button if "Add step" in b.label][0].click().run() assert not at.exception assert at.session_state["pipeline_steps"][-1]["tool"] == "column_map" labels = [e.label for e in at.get("expander")] assert any(l.startswith("Configure: Map Columns") for l in labels) def test_remove_step_drops_it(): at = _app() before = len(at.session_state["pipeline_steps"]) # The first ✕ remove button in the card stack. [b for b in at.button if b.label == "✕"][0].click().run() assert not at.exception assert len(at.session_state["pipeline_steps"]) == before - 1 def test_run_produces_results_with_friendly_names(): at = _app() [b for b in at.button if b.label == "Run Pipeline"][0].click().run() assert not at.exception, at.exception assert "pipeline_result" in at.session_state res = at.session_state["pipeline_result"] assert res.initial_rows == 3 and res.final_rows == 2 # the two Jane rows merge assert all(sr.error is None for sr in res.step_results) def test_step_phrase_is_plain_english_not_json(): from src.gui.components.pipeline_modules import step_phrase, step_status # dedup phrasing mirrors the design mockup wording exactly. phrase = step_phrase("dedup", { "input_rows": 18442, "output_rows": 18130, "duplicates_removed": 312, "groups": 147, }) assert phrase == "312 duplicates removed across 147 groups (18,442 → 18,130 rows)" # text_clean lists affected columns in prose, with thousands separators. assert step_phrase("text_clean", { "cells_changed": 1204, "columns_processed": ["name", "city"], }) == "1,204 cells cleaned in name & city" # singular nouns pluralize correctly assert step_phrase("missing", {"rows_dropped": 1, "columns_dropped": ["x"]}) == \ "1 row dropped, 1 column dropped" # unparseable cells downgrade the pill to warn with an inline detail label, level, detail = step_status( "format_standardize", {"cells_changed": 100, "cells_unparseable": 141}, ) assert level == "warn" and "141 skipped" in label and detail # a clean step is "ok" with no detail assert step_status("text_clean", {"cells_changed": 5})[1] == "ok" # --------------------------------------------------------------------------- # Helpers for the reorder / config tests below # --------------------------------------------------------------------------- def _ids(at) -> dict: """Map tool name → that step's stable id (assumes unique tools).""" return {s["tool"]: s["id"] for s in at.session_state["pipeline_steps"]} def _tools(at) -> list: return [s["tool"] for s in at.session_state["pipeline_steps"]] # --------------------------------------------------------------------------- # Reorder # --------------------------------------------------------------------------- def test_reorder_down_swaps_with_next_step(): at = _app() sid = _ids(at)["text_clean"] before = _tools(at) assert before == ["text_clean", "format_standardize", "missing", "dedup"] [b for b in at.button if b.key == f"text_clean_{sid}_down"][0].click().run() assert not at.exception assert _tools(at) == ["format_standardize", "text_clean", "missing", "dedup"] def test_reorder_up_swaps_with_previous_step(): at = _app() sid = _ids(at)["missing"] [b for b in at.button if b.key == f"missing_{sid}_up"][0].click().run() assert not at.exception assert _tools(at) == ["text_clean", "missing", "format_standardize", "dedup"] def test_first_up_and_last_down_buttons_are_disabled(): at = _app() ids = _ids(at) first_up = [b for b in at.button if b.key == f"text_clean_{ids['text_clean']}_up"][0] last_down = [b for b in at.button if b.key == f"dedup_{ids['dedup']}_down"][0] assert first_up.disabled is True assert last_down.disabled is True # interior steps are freely movable mid_up = [b for b in at.button if b.key == f"missing_{ids['missing']}_up"][0] assert mid_up.disabled is False def test_disabled_step_stays_disabled_after_reorder(): at = _app() sid = _ids(at)["text_clean"] at.toggle[0].set_value(False).run() assert at.session_state["pipeline_steps"][0]["enabled"] is False # move the now-disabled first step down one slot [b for b in at.button if b.key == f"text_clean_{sid}_down"][0].click().run() assert not at.exception steps = at.session_state["pipeline_steps"] moved = [s for s in steps if s["tool"] == "text_clean"][0] assert steps.index(moved) == 1 # it moved assert moved["enabled"] is False # ...and stayed disabled # --------------------------------------------------------------------------- # Restore recommended steps # --------------------------------------------------------------------------- def test_restore_recommended_steps_button(): at = _app() # Diverge from the recommended default by removing a step. [b for b in at.button if b.label == "✕"][0].click().run() assert _tools(at) == ["format_standardize", "missing", "dedup"] restore = [b for b in at.button if "Restore recommended steps" in b.label] assert len(restore) == 1 restore[0].click().run() assert not at.exception assert _tools(at) == ["text_clean", "format_standardize", "missing", "dedup"] def test_restore_button_absent_when_steps_match_default(): at = _app() # Untouched recommended steps → no restore prompt. assert not [b for b in at.button if "Restore recommended steps" in b.label] # --------------------------------------------------------------------------- # Advanced JSON export / import # --------------------------------------------------------------------------- def test_advanced_json_export_reflects_current_steps(): at = _app() exported = json.loads(at.code[0].value) assert [s["tool"] for s in exported["steps"]] == \ ["text_clean", "format_standardize", "missing", "dedup"] # Remove a step and confirm the exported JSON drops it too. [b for b in at.button if b.label == "✕"][0].click().run() exported = json.loads(at.code[0].value) assert [s["tool"] for s in exported["steps"]] == \ ["format_standardize", "missing", "dedup"] def test_load_pasted_json_replaces_the_step_list(): at = _app() one_step = json.dumps( {"steps": [{"tool": "dedup", "options": {}, "enabled": True}]} ) [t for t in at.text_area if t.key == "pipeline_json_paste"][0].set_value( one_step ).run() [b for b in at.button if b.label == "Load pasted JSON"][0].click().run() assert not at.exception assert _tools(at) == ["dedup"] # --------------------------------------------------------------------------- # Config renderers emit the right options # --------------------------------------------------------------------------- def test_format_standardize_config_emits_column_types(): at = _app() fid = _ids(at)["format_standardize"] [s for s in at.selectbox if s.key == f"format_standardize_{fid}_fmt__phone"][0] \ .set_value("Phone number").run() [b for b in at.button if b.label == "Run Pipeline"][0].click().run() assert not at.exception step = [s for s in at.session_state["pipeline_steps"] if s["tool"] == "format_standardize"][0] assert step["options"]["column_types"].get("phone") == "phone" def test_missing_config_drop_radio_emits_drop_row_strategy(): at = _app() mid = _ids(at)["missing"] [r for r in at.radio if r.key == f"missing_{mid}_strategy"][0] \ .set_value("Drop rows that have any blank").run() [b for b in at.button if b.label == "Run Pipeline"][0].click().run() assert not at.exception step = [s for s in at.session_state["pipeline_steps"] if s["tool"] == "missing"][0] assert step["options"]["strategy"] == "drop_row" def test_dedup_config_multiselect_builds_strategies(): at = _app() did = _ids(at)["dedup"] [m for m in at.multiselect if m.key == f"dedup_{did}_matchcols"][0] \ .set_value(["email"]).run() [b for b in at.button if b.label == "Run Pipeline"][0].click().run() assert not at.exception step = [s for s in at.session_state["pipeline_steps"] if s["tool"] == "dedup"][0] strategies = step["options"]["strategies"] cols = [c["column"] for c in strategies[0]["columns"]] assert cols == ["email"] assert strategies[0]["columns"][0]["algorithm"] == "exact"