Files
datatools-dev/tests/gui/test_workflows.py
Michael 93e43fc0d9 feat(gui): sidebar sections + non-technical tool labels
Sidebar nav now groups tools under Data Review / Data Cleaners /
Transformations / Automations via st.navigation, replacing the flat
auto-discovered list. Tool display names switch to action-first
phrasing (Find Duplicates, Fix Missing Values, Find Unusual Values,
Standardize Formats, Clean Text, Quality Check, Map Columns, Combine
Files, Automated Workflows) in EN + ES packs and on each page's H1.

The Data Cleaners section follows the requested order: Missing
Values → Outliers → Text Cleaner → Format Standardizer → Deduplicator
→ Quality Check. (Text Cleaner kept inside cleaners since the request
didn't list it but the tool still ships.) Registry now carries a
section field; helpers added: tools_in_section(), section_label().

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-16 19:36:01 +00:00

208 lines
8.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Happy-path workflow tests for each Ready tool page.
These drive the GUI like a user would: pre-stash an upload + a passed
gate, render the page, click the primary action, assert the result
landed in session state. They catch wiring bugs that smoke tests
can't see — e.g., a primary button mis-keyed, a result not stashed in
session state, a page reading the wrong key.
Slow-ish (~0.52s per workflow). Sits behind the ``gui`` marker so
``pytest -m 'not gui'`` skips them.
"""
from __future__ import annotations
import pandas as pd
import pytest
from .conftest import collected_text, stash_upload
# ---------------------------------------------------------------------------
# Deduplicator
# ---------------------------------------------------------------------------
class TestDeduplicatorWorkflow:
"""Upload → click Find Duplicates → result lands in session_state."""
def _setup(self, app_factory, small_csv_bytes):
app = app_factory("1_Deduplicator")
stash_upload(app, name="messy.csv", data=small_csv_bytes)
return app
def test_upload_renders_preview(self, app_factory, small_csv_bytes):
app = self._setup(app_factory, small_csv_bytes)
app.run()
text = collected_text(app)
assert "Preview: messy.csv" in text, (
f"upload preview header missing; got:\n{text[:500]}"
)
def test_find_duplicates_button_present(self, app_factory, small_csv_bytes):
app = self._setup(app_factory, small_csv_bytes)
app.run()
labels = [b.label for b in app.button]
assert any("Find Duplicates" in lbl for lbl in labels), (
f"primary action missing; got: {labels}"
)
def test_clicking_find_duplicates_stashes_result(
self, app_factory, small_csv_bytes,
):
app = self._setup(app_factory, small_csv_bytes)
app.run()
# Find the Find-Duplicates button and click it. AppTest's
# button-by-key access is via ``.button(key=...)`` — we don't
# have the key here, so locate it by label.
target = next(b for b in app.button if "Find Duplicates" in b.label)
target.click().run()
# The page stores the result under ``result`` in session state.
result = app.session_state["result"]
assert result is not None, "Find Duplicates didn't stash a result"
# The sample has Alice twice → one match group.
assert len(result.match_groups) >= 1
# ---------------------------------------------------------------------------
# Text Cleaner
# ---------------------------------------------------------------------------
class TestTextCleanerWorkflow:
def _setup(self, app_factory, small_csv_bytes):
app = app_factory("2_Text_Cleaner")
stash_upload(app, name="messy.csv", data=small_csv_bytes)
return app
def test_page_renders_with_upload(self, app_factory, small_csv_bytes):
app = self._setup(app_factory, small_csv_bytes)
app.run()
assert not app.exception
text = collected_text(app)
assert "Clean Text" in text
def test_preview_or_clean_button_present(self, app_factory, small_csv_bytes):
"""The text cleaner ships a primary action (label varies by
version). We just assert at least one primary-looking button
exists past the upload."""
app = self._setup(app_factory, small_csv_bytes)
app.run()
# Filter out the gate-redirect button (which would only be
# present if the gate fired, which our setup prevents).
gate_buttons = {"Go to Review & Normalize", "Ir a Revisar y Normalizar"}
non_gate = [b for b in app.button if b.label not in gate_buttons]
assert non_gate, (
f"no primary buttons rendered; got: {[b.label for b in app.button]}"
)
# ---------------------------------------------------------------------------
# Format Standardizer
# ---------------------------------------------------------------------------
class TestFormatStandardizerWorkflow:
def test_page_renders_with_upload(self, app_factory, small_csv_bytes):
app = app_factory("3_Format_Standardizer")
stash_upload(app, name="messy.csv", data=small_csv_bytes)
app.run()
assert not app.exception
text = collected_text(app)
assert "Standardize Formats" in text
# ---------------------------------------------------------------------------
# Missing Value Handler
# ---------------------------------------------------------------------------
class TestMissingValuesWorkflow:
def test_page_renders_with_upload(self, app_factory, small_csv_bytes):
app = app_factory("4_Missing_Values")
stash_upload(app, name="messy.csv", data=small_csv_bytes)
app.run()
assert not app.exception
text = collected_text(app)
assert "Missing" in text
# ---------------------------------------------------------------------------
# Column Mapper
# ---------------------------------------------------------------------------
class TestColumnMapperWorkflow:
def test_page_renders_with_upload(self, app_factory, small_csv_bytes):
app = app_factory("5_Column_Mapper")
stash_upload(app, name="messy.csv", data=small_csv_bytes)
app.run()
assert not app.exception
text = collected_text(app)
assert "Column" in text
# ---------------------------------------------------------------------------
# Pipeline Runner
# ---------------------------------------------------------------------------
class TestPipelineRunnerWorkflow:
def test_page_renders_with_upload(self, app_factory, small_csv_bytes):
app = app_factory("9_Pipeline_Runner")
stash_upload(app, name="messy.csv", data=small_csv_bytes)
app.run()
assert not app.exception
text = collected_text(app)
assert "Automated Workflows" in text
# ---------------------------------------------------------------------------
# Review page — special: doesn't gate on upload, has its own analyzer flow
# ---------------------------------------------------------------------------
class TestReviewWorkflow:
"""The Review page is the gate-fixer. Without an upload it shows a
'go back to home' message. With an upload it runs the analyzer and
shows findings."""
def test_no_upload_shows_back_to_home(self, app_factory):
app = app_factory("0_Review")
app.run()
text = collected_text(app)
# Page shows ``No file uploaded`` + ``Back to home``.
assert "No file uploaded" in text or "uploaded" in text.lower()
def test_with_upload_shows_review_content(
self, app_factory, small_csv_bytes,
):
app = app_factory("0_Review")
# Review page only needs the upload bytes, not a pre-passed gate.
app.session_state["home_uploaded_bytes"] = small_csv_bytes
app.session_state["home_uploaded_name"] = "messy.csv"
app.session_state["home_uploaded_size"] = len(small_csv_bytes)
app.run()
assert not app.exception
text = collected_text(app)
# Page ran the analyzer — either we get findings or the
# "already clean" success message. Either way confirms the
# analyzer pipeline ran end-to-end with the stashed bytes.
clean_msg = "No findings to review" in text
encoding_section = "File encoding" in text
assert clean_msg or encoding_section, (
f"Review page didn't surface analyzer output; got:\n{text[:400]}"
)
# ---------------------------------------------------------------------------
# Coming-Soon pages still render (just a stub) — pinned so we know if a
# Coming-Soon goes from "stub renders" to "import error".
# ---------------------------------------------------------------------------
@pytest.mark.parametrize("slug,name", [
("6_Outlier_Detector", "Unusual Values"),
("7_Multi_File_Merger", "Combine Files"),
("8_Validator_Reporter", "Quality Check"),
])
class TestComingSoonStubs:
def test_stub_renders(self, app_factory, slug, name):
app = app_factory(slug)
app.run()
assert not app.exception
text = collected_text(app)
assert name in text