datatools-dev/tests/gui/test_workflows.py

"""Happy-path workflow tests for each Ready tool page.

These drive the GUI like a user would: pre-stash an upload + a passed
gate, render the page, click the primary action, assert the result
landed in session state. They catch wiring bugs that smoke tests
can't see — e.g., a primary button mis-keyed, a result not stashed in
session state, a page reading the wrong key.

Slow-ish (~0.5–2s per workflow). Sits behind the ``gui`` marker so
``pytest -m 'not gui'`` skips them.
"""

from __future__ import annotations

import pandas as pd
import pytest

from .conftest import collected_text, stash_upload


# ---------------------------------------------------------------------------
# Find Duplicates
# ---------------------------------------------------------------------------

class TestDeduplicatorWorkflow:
    """Upload → click Find Duplicates → result lands in session_state."""

    def _setup(self, app_factory, small_csv_bytes):
        app = app_factory("1_Deduplicator")
        stash_upload(app, name="messy.csv", data=small_csv_bytes)
        return app

    def test_upload_renders_preview(self, app_factory, small_csv_bytes):
        app = self._setup(app_factory, small_csv_bytes)
        app.run()
        text = collected_text(app)
        assert "Preview: messy.csv" in text, (
            f"upload preview header missing; got:\n{text[:500]}"
        )

    def test_find_duplicates_button_present(self, app_factory, small_csv_bytes):
        app = self._setup(app_factory, small_csv_bytes)
        app.run()
        labels = [b.label for b in app.button]
        assert any("Find Duplicates" in lbl for lbl in labels), (
            f"primary action missing; got: {labels}"
        )

    def test_clicking_find_duplicates_stashes_result(
        self, app_factory, small_csv_bytes,
    ):
        app = self._setup(app_factory, small_csv_bytes)
        app.run()
        # Find the Find-Duplicates button and click it. AppTest's
        # button-by-key access is via ``.button(key=...)`` — we don't
        # have the key here, so locate it by label.
        target = next(b for b in app.button if "Find Duplicates" in b.label)
        target.click().run()
        # The page stores the result under ``result`` in session state.
        result = app.session_state["result"]
        assert result is not None, "Find Duplicates didn't stash a result"
        # The sample has Alice twice → one match group.
        assert len(result.match_groups) >= 1


# ---------------------------------------------------------------------------
# Clean Text
# ---------------------------------------------------------------------------

class TestTextCleanerWorkflow:
    def _setup(self, app_factory, small_csv_bytes):
        app = app_factory("2_Text_Cleaner")
        stash_upload(app, name="messy.csv", data=small_csv_bytes)
        return app

    def test_page_renders_with_upload(self, app_factory, small_csv_bytes):
        app = self._setup(app_factory, small_csv_bytes)
        app.run()
        assert not app.exception
        text = collected_text(app)
        assert "Clean Text" in text

    def test_preview_or_clean_button_present(self, app_factory, small_csv_bytes):
        """The text cleaner ships a primary action (label varies by
        version). We just assert at least one primary-looking button
        exists past the upload."""
        app = self._setup(app_factory, small_csv_bytes)
        app.run()
        # Filter out the gate-redirect button (which would only be
        # present if the gate fired, which our setup prevents).
        gate_buttons = {"Go to Review & Normalize", "Ir a Revisar y Normalizar"}
        non_gate = [b for b in app.button if b.label not in gate_buttons]
        assert non_gate, (
            f"no primary buttons rendered; got: {[b.label for b in app.button]}"
        )


# ---------------------------------------------------------------------------
# Standardize Formats
# ---------------------------------------------------------------------------

class TestFormatStandardizerWorkflow:
    def test_page_renders_with_upload(self, app_factory, small_csv_bytes):
        app = app_factory("3_Format_Standardizer")
        stash_upload(app, name="messy.csv", data=small_csv_bytes)
        app.run()
        assert not app.exception
        text = collected_text(app)
        assert "Standardize Formats" in text


# ---------------------------------------------------------------------------
# Fix Missing Values
# ---------------------------------------------------------------------------

class TestMissingValuesWorkflow:
    def test_page_renders_with_upload(self, app_factory, small_csv_bytes):
        app = app_factory("4_Missing_Values")
        stash_upload(app, name="messy.csv", data=small_csv_bytes)
        app.run()
        assert not app.exception
        text = collected_text(app)
        assert "Missing" in text


# ---------------------------------------------------------------------------
# Map Columns
# ---------------------------------------------------------------------------

class TestColumnMapperWorkflow:
    def test_page_renders_with_upload(self, app_factory, small_csv_bytes):
        app = app_factory("5_Column_Mapper")
        stash_upload(app, name="messy.csv", data=small_csv_bytes)
        app.run()
        assert not app.exception
        text = collected_text(app)
        assert "Column" in text


# ---------------------------------------------------------------------------
# Automated Workflows
# ---------------------------------------------------------------------------

class TestPipelineRunnerWorkflow:
    def test_page_renders_with_upload(self, app_factory, small_csv_bytes):
        app = app_factory("9_Pipeline_Runner")
        stash_upload(app, name="messy.csv", data=small_csv_bytes)
        app.run()
        assert not app.exception
        text = collected_text(app)
        assert "Automated Workflows" in text


# ---------------------------------------------------------------------------
# Review page — special: doesn't gate on upload, has its own analyzer flow
# ---------------------------------------------------------------------------

class TestReviewWorkflow:
    """The Review page is the gate-fixer. Without an upload it shows a
    'go back to home' message. With an upload it runs the analyzer and
    shows findings."""

    def test_no_upload_shows_back_to_home(self, app_factory):
        app = app_factory("0_Review")
        app.run()
        text = collected_text(app)
        # Page shows ``No file uploaded`` + ``Back to home``.
        assert "No file uploaded" in text or "uploaded" in text.lower()

    def test_with_upload_shows_review_content(
        self, app_factory, small_csv_bytes,
    ):
        app = app_factory("0_Review")
        # Review page only needs the upload bytes, not a pre-passed gate.
        app.session_state["home_uploaded_bytes"] = small_csv_bytes
        app.session_state["home_uploaded_name"] = "messy.csv"
        app.session_state["home_uploaded_size"] = len(small_csv_bytes)
        app.run()
        assert not app.exception
        text = collected_text(app)
        # Page ran the analyzer — either we get findings or the
        # "already clean" success message. Either way confirms the
        # analyzer pipeline ran end-to-end with the stashed bytes.
        clean_msg = "No findings to review" in text
        encoding_section = "File encoding" in text
        assert clean_msg or encoding_section, (
            f"Review page didn't surface analyzer output; got:\n{text[:400]}"
        )


# ---------------------------------------------------------------------------
# Coming-Soon pages still render (just a stub) — pinned so we know if a
# Coming-Soon goes from "stub renders" to "import error".
# ---------------------------------------------------------------------------

@pytest.mark.parametrize("slug,name", [
    ("6_Outlier_Detector", "Unusual Values"),
    ("7_Multi_File_Merger", "Combine Files"),
    ("8_Validator_Reporter", "Quality Check"),
])
class TestComingSoonStubs:
    def test_stub_renders(self, app_factory, slug, name):
        app = app_factory(slug)
        app.run()
        assert not app.exception
        text = collected_text(app)
        assert name in text