"""Demo pipelines must keep showing value (accounting personas). Each persona's preloaded dataset + saved pipeline is the marketing surface driven by ``src/gui/app_demo.py``. These tests pin that every demo loads, runs clean, and produces its headline value (duplicate rows removed, clean parse, disguised nulls caught) — so a stale dataset or an engine change can't silently gut the sales demo. The read path mirrors ``app_demo._load_demo`` exactly (``dtype=str, keep_default_na=False`` so every disguised null survives to the pipeline). """ from __future__ import annotations from pathlib import Path import pandas as pd import pytest from src.core.pipeline import Pipeline, run_pipeline _REPO = Path(__file__).resolve().parent.parent _DEMO = _REPO / "samples" / "demo" # (data_file, pipeline_file, min_duplicates_removed) — one per accounting # persona in app_demo.PERSONAS. The dup floors are the validated demo numbers. _DEMOS = [ ("bank_reconciliation.csv", "bank_reconciliation_pipeline.json", 6), ("vendor_1099.csv", "vendor_1099_pipeline.json", 8), ("ar_open_invoices.csv", "ar_open_invoices_pipeline.json", 5), ] @pytest.mark.parametrize("data_file,pipeline_file,min_dupes", _DEMOS) def test_demo_runs_clean_and_shows_value(data_file, pipeline_file, min_dupes): df = pd.read_csv(_DEMO / data_file, dtype=str, keep_default_na=False) pipe = Pipeline.from_file(_DEMO / pipeline_file) res = run_pipeline(df, pipe, stop_on_error=True) # 1. Nothing errored — the demo never shows a visitor a red banner. assert all(sr.error is None for sr in res.step_results), [ (sr.step.tool, sr.error) for sr in res.step_results ] # 2. Dedup removed the designed duplicate rows (the headline value). assert res.final_rows < res.initial_rows dedup = next(sr for sr in res.step_results if sr.step.tool == "dedup") assert dedup.summary["duplicates_removed"] >= min_dupes # 3. Standardization parsed every typed cell — a demo with unparseable # cells reads as "the tool choked," which kills the pitch. fmt = next(sr for sr in res.step_results if sr.step.tool == "format_standardize") assert fmt.summary["cells_unparseable"] == 0 assert fmt.summary["cells_changed"] > 0 # 4. The disguised nulls (—, (blank), TBD, …) were caught. miss = next(sr for sr in res.step_results if sr.step.tool == "missing") assert miss.summary["sentinels_standardized"] > 0 def test_app_demo_references_each_demo_file(): """Every data/pipeline file the demo app names must exist on disk. Guards against a rename in app_demo.py drifting away from samples/demo/ (or vice versa) without a test catching it. """ src = (_REPO / "src" / "gui" / "app_demo.py").read_text(encoding="utf-8") for data_file, pipeline_file, _ in _DEMOS: assert data_file in src, f"{data_file} not referenced in app_demo.py" assert pipeline_file in src, f"{pipeline_file} not referenced in app_demo.py" assert (_DEMO / data_file).exists(), f"missing {data_file}" assert (_DEMO / pipeline_file).exists(), f"missing {pipeline_file}" # The accounting persona keys served by the demo app — each must line up with # a landing page that embeds the matching demo. (key, data-file stem) _PERSONA_KEYS = [ ("bookkeeper", "bank_reconciliation"), ("ap-1099", "vendor_1099"), ("ar-aging", "ar_open_invoices"), ] _LANDING = _REPO / "landing" @pytest.mark.parametrize("key,stem", _PERSONA_KEYS) def test_landing_page_embeds_the_matching_demo(key, stem): """Each landing page exists and its iframe + CTA point at this persona — so the sales surface (landing -> demo app -> dataset) stays coherent.""" app_src = (_REPO / "src" / "gui" / "app_demo.py").read_text(encoding="utf-8") assert f'"{key}"' in app_src, f"persona key {key!r} not served by app_demo.py" page = _LANDING / key / "index.html" assert page.exists(), f"missing landing page for {key}" html = page.read_text(encoding="utf-8") assert f"?p={key}" in html, f"{key} landing iframe doesn't load ?p={key}" assert f"from={key}" in html, f"{key} landing CTA isn't tagged from={key}" # The hub links to this persona's page. hub = (_LANDING / "index.html").read_text(encoding="utf-8") assert f'href="{key}/"' in hub, f"hub doesn't link to {key}/" def test_landing_surface_has_no_stale_persona_refs(): """No retired Shopify / RevOps persona language remains in landing HTML.""" for html_file in _LANDING.rglob("*.html"): text = html_file.read_text(encoding="utf-8").lower() for stale in ("shopify", "revops", "klaviyo", "hubspot"): assert stale not in text, f"{stale!r} still in {html_file.relative_to(_REPO)}" def test_demo_app_builds_a_single_watermark_row(): """The demo download appends exactly one trailing watermark row (DEMO-PLAN §6: the AFTER preview must read as production-quality).""" src = (_REPO / "src" / "gui" / "app_demo.py").read_text(encoding="utf-8") assert "DataTools demo — buy at" in src # One trailing row concatenated onto the result frame. assert "watermark_row" in src and "pd.concat([result.final_df, watermark_row]" in src