Adds a demo test suite on top of the data-value pins: - tests/gui/test_app_demo.py (new, AppTest): every accounting persona renders with its dataset, the default/unknown-persona fallback resolves to bookkeeper, clicking Run produces the AFTER value (rows reduced to the validated count) with the watermarked download + Gumroad CTA, and switching persona via the quick-switch dropdown clears the stale result. - tests/test_demo_pipelines.py (extended): cross-surface coherence — each persona key served by app_demo has a matching landing page whose iframe (?p=) and CTA (from=) point at it and that the hub links to; no retired Shopify/RevOps language remains in landing HTML; and the demo download still appends exactly one watermark row. Full suite: 2584 passed, 91 skipped. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
117 lines
5.1 KiB
Python
117 lines
5.1 KiB
Python
"""Demo pipelines must keep showing value (accounting personas).
|
|
|
|
Each persona's preloaded dataset + saved pipeline is the marketing surface
|
|
driven by ``src/gui/app_demo.py``. These tests pin that every demo loads,
|
|
runs clean, and produces its headline value (duplicate rows removed, clean
|
|
parse, disguised nulls caught) — so a stale dataset or an engine change can't
|
|
silently gut the sales demo. The read path mirrors ``app_demo._load_demo``
|
|
exactly (``dtype=str, keep_default_na=False`` so every disguised null survives
|
|
to the pipeline).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
|
|
import pandas as pd
|
|
import pytest
|
|
|
|
from src.core.pipeline import Pipeline, run_pipeline
|
|
|
|
_REPO = Path(__file__).resolve().parent.parent
|
|
_DEMO = _REPO / "samples" / "demo"
|
|
|
|
# (data_file, pipeline_file, min_duplicates_removed) — one per accounting
|
|
# persona in app_demo.PERSONAS. The dup floors are the validated demo numbers.
|
|
_DEMOS = [
|
|
("bank_reconciliation.csv", "bank_reconciliation_pipeline.json", 6),
|
|
("vendor_1099.csv", "vendor_1099_pipeline.json", 8),
|
|
("ar_open_invoices.csv", "ar_open_invoices_pipeline.json", 5),
|
|
]
|
|
|
|
|
|
@pytest.mark.parametrize("data_file,pipeline_file,min_dupes", _DEMOS)
|
|
def test_demo_runs_clean_and_shows_value(data_file, pipeline_file, min_dupes):
|
|
df = pd.read_csv(_DEMO / data_file, dtype=str, keep_default_na=False)
|
|
pipe = Pipeline.from_file(_DEMO / pipeline_file)
|
|
res = run_pipeline(df, pipe, stop_on_error=True)
|
|
|
|
# 1. Nothing errored — the demo never shows a visitor a red banner.
|
|
assert all(sr.error is None for sr in res.step_results), [
|
|
(sr.step.tool, sr.error) for sr in res.step_results
|
|
]
|
|
|
|
# 2. Dedup removed the designed duplicate rows (the headline value).
|
|
assert res.final_rows < res.initial_rows
|
|
dedup = next(sr for sr in res.step_results if sr.step.tool == "dedup")
|
|
assert dedup.summary["duplicates_removed"] >= min_dupes
|
|
|
|
# 3. Standardization parsed every typed cell — a demo with unparseable
|
|
# cells reads as "the tool choked," which kills the pitch.
|
|
fmt = next(sr for sr in res.step_results if sr.step.tool == "format_standardize")
|
|
assert fmt.summary["cells_unparseable"] == 0
|
|
assert fmt.summary["cells_changed"] > 0
|
|
|
|
# 4. The disguised nulls (—, (blank), TBD, …) were caught.
|
|
miss = next(sr for sr in res.step_results if sr.step.tool == "missing")
|
|
assert miss.summary["sentinels_standardized"] > 0
|
|
|
|
|
|
def test_app_demo_references_each_demo_file():
|
|
"""Every data/pipeline file the demo app names must exist on disk.
|
|
|
|
Guards against a rename in app_demo.py drifting away from samples/demo/
|
|
(or vice versa) without a test catching it.
|
|
"""
|
|
src = (_REPO / "src" / "gui" / "app_demo.py").read_text(encoding="utf-8")
|
|
for data_file, pipeline_file, _ in _DEMOS:
|
|
assert data_file in src, f"{data_file} not referenced in app_demo.py"
|
|
assert pipeline_file in src, f"{pipeline_file} not referenced in app_demo.py"
|
|
assert (_DEMO / data_file).exists(), f"missing {data_file}"
|
|
assert (_DEMO / pipeline_file).exists(), f"missing {pipeline_file}"
|
|
|
|
|
|
# The accounting persona keys served by the demo app — each must line up with
|
|
# a landing page that embeds the matching demo. (key, data-file stem)
|
|
_PERSONA_KEYS = [
|
|
("bookkeeper", "bank_reconciliation"),
|
|
("ap-1099", "vendor_1099"),
|
|
("ar-aging", "ar_open_invoices"),
|
|
]
|
|
_LANDING = _REPO / "landing"
|
|
|
|
|
|
@pytest.mark.parametrize("key,stem", _PERSONA_KEYS)
|
|
def test_landing_page_embeds_the_matching_demo(key, stem):
|
|
"""Each landing page exists and its iframe + CTA point at this persona —
|
|
so the sales surface (landing -> demo app -> dataset) stays coherent."""
|
|
app_src = (_REPO / "src" / "gui" / "app_demo.py").read_text(encoding="utf-8")
|
|
assert f'"{key}"' in app_src, f"persona key {key!r} not served by app_demo.py"
|
|
|
|
page = _LANDING / key / "index.html"
|
|
assert page.exists(), f"missing landing page for {key}"
|
|
html = page.read_text(encoding="utf-8")
|
|
assert f"?p={key}" in html, f"{key} landing iframe doesn't load ?p={key}"
|
|
assert f"from={key}" in html, f"{key} landing CTA isn't tagged from={key}"
|
|
|
|
# The hub links to this persona's page.
|
|
hub = (_LANDING / "index.html").read_text(encoding="utf-8")
|
|
assert f'href="{key}/"' in hub, f"hub doesn't link to {key}/"
|
|
|
|
|
|
def test_landing_surface_has_no_stale_persona_refs():
|
|
"""No retired Shopify / RevOps persona language remains in landing HTML."""
|
|
for html_file in _LANDING.rglob("*.html"):
|
|
text = html_file.read_text(encoding="utf-8").lower()
|
|
for stale in ("shopify", "revops", "klaviyo", "hubspot"):
|
|
assert stale not in text, f"{stale!r} still in {html_file.relative_to(_REPO)}"
|
|
|
|
|
|
def test_demo_app_builds_a_single_watermark_row():
|
|
"""The demo download appends exactly one trailing watermark row
|
|
(DEMO-PLAN §6: the AFTER preview must read as production-quality)."""
|
|
src = (_REPO / "src" / "gui" / "app_demo.py").read_text(encoding="utf-8")
|
|
assert "DataTools demo — buy at" in src
|
|
# One trailing row concatenated onto the result frame.
|
|
assert "watermark_row" in src and "pd.concat([result.final_df, watermark_row]" in src
|