datatools-dev/tests/gui/test_app_demo.py

"""Public demo app (``src/gui/app_demo.py``) behavior — AppTest.

The demo app is the marketing surface: it preloads one accounting persona's
dataset, runs the saved pipeline, and shows BEFORE/AFTER + a buy CTA. These
tests pin that every persona renders, the run produces its headline value,
persona switching works, and the buy path is present — so a regression can't
silently ship a broken or empty demo to a prospect.

The dataset value numbers themselves are pinned separately in
``tests/test_demo_pipelines.py``; here we assert the *app* surfaces them.
"""

from __future__ import annotations

from pathlib import Path

import pandas as pd
import pytest
from streamlit.testing.v1 import AppTest

_PAGE = str(
    Path(__file__).resolve().parent.parent.parent / "src" / "gui" / "app_demo.py"
)
_DEMO = Path(__file__).resolve().parent.parent.parent / "samples" / "demo"

# (persona key, data file, expected rows before -> after, a label substring)
_PERSONAS = [
    ("bookkeeper", "bank_reconciliation.csv", 26, 20, "Bookkeeper"),
    ("ap-1099",    "vendor_1099.csv",         24, 8,  "payable"),
    ("ar-aging",   "ar_open_invoices.csv",    26, 21, "receivable"),
]


def _app(persona: str | None = None) -> AppTest:
    at = AppTest.from_file(_PAGE, default_timeout=60)
    if persona is not None:
        at.query_params["p"] = persona
    return at.run()


def _md(at: AppTest) -> str:
    return " ".join(m.value for m in at.markdown)


@pytest.mark.parametrize("key,data_file,before,after,label", _PERSONAS)
def test_persona_renders_with_its_dataset(key, data_file, before, after, label):
    at = _app(key)
    assert not at.exception
    md = _md(at)
    assert label in md, f"persona label {label!r} not rendered"
    # BEFORE preview reflects the real dataset size.
    real_rows = len(pd.read_csv(_DEMO / data_file, dtype=str, keep_default_na=False))
    assert real_rows == before  # guards the fixture against silent drift
    assert f"BEFORE — {before} rows" in md
    # The saved pipeline is shown (read-only) as the canonical steps.
    assert "text_clean" in md and "dedup" in md
    assert any("Run pipeline" in b.label for b in at.button)


def test_default_persona_is_bookkeeper():
    at = _app(None)
    assert not at.exception
    assert "Bookkeeper" in _md(at)


def test_unknown_persona_falls_back_to_default():
    at = _app("not-a-real-persona")
    assert not at.exception
    assert "Bookkeeper" in _md(at)


@pytest.mark.parametrize("key,data_file,before,after,label", _PERSONAS)
def test_run_shows_after_value_and_buy_path(key, data_file, before, after, label):
    at = _app(key)
    [b for b in at.button if "Run pipeline" in b.label][0].click().run()
    assert not at.exception, at.exception

    # A result is cached and the AFTER header reports the dedup win.
    assert "demo_result" in at.session_state
    result = at.session_state["demo_result"]
    assert len(result.final_df) == after
    assert result.final_rows < result.initial_rows
    assert f"{before} → {after} rows" in _md(at)

    # The buy path is present after a run (download + Gumroad CTA). The
    # cleaned-CSV download is a download_button, not a plain button.
    downloads = at.get("download_button")
    assert any("Download cleaned CSV" in d.label for d in downloads)
    assert f"gumroad.com/l/datatools?from={key}" in _md(at)


def test_persona_switch_clears_stale_result():
    # Run the bookkeeper demo, then switch persona via the quick-switch
    # dropdown (driving the selectbox — a raw query-param change is
    # overridden by the dropdown's persisted value).
    at = _app("bookkeeper")
    [b for b in at.button if "Run pipeline" in b.label][0].click().run()
    assert "demo_result" in at.session_state

    switch = [s for s in at.selectbox if s.key == "persona_switch"][0]
    switch.set_value("ap-1099").run()
    assert not at.exception
    # The page drops the stale bookkeeper result when the persona changes,
    # so the visitor never sees the wrong dataset's AFTER block.
    assert "demo_result" not in at.session_state
    assert "payable" in _md(at)  # now showing the AP/1099 persona


def test_run_offers_a_watermarked_download():
    """After a run the visitor gets a download, labeled as watermarked
    (the free/paid boundary from DEMO-PLAN §6)."""
    at = _app("bookkeeper")
    [b for b in at.button if "Run pipeline" in b.label][0].click().run()
    dl = [d for d in at.get("download_button") if "Download cleaned CSV" in d.label]
    assert dl, "no cleaned-CSV download after a run"
    assert "watermark" in dl[0].label.lower()