"""Stress-test every tool page against the junk corpus via AppTest. For each Ready tool page, walk a representative subset of pathological files, stash them as the session upload, render the page, click the primary action, and assert that nothing raises and that a clean error is surfaced when the file is genuinely unprocessable. The home-page analyzer is covered separately by ``tests/test_junk_corpus.py``. This file pins the *tool page* contract: once a junk file makes it past the upload, the page's own read + operate + render loop must also stay stable. Subset rationale: running 35 files × 9 pages × full render is too slow for CI. The subset hits the high-value shapes that historically broke something (empty, NUL-only, mojibake, corrupt zip, mismatched columns) plus a clean one to make sure the page still works under happy-path inputs. """ from __future__ import annotations from pathlib import Path import pytest from streamlit.testing.v1 import AppTest _PAGES = Path(__file__).resolve().parent.parent / "src" / "gui" / "pages" _CORPUS = ( Path(__file__).resolve().parent.parent / "test-cases" / "junk-corpus" / "test_data" ) # Representative shapes. Each was selected because it has historically # tripped (or could trip) a different layer: file IO, encoding, pandas # parse, downstream operation, or result rendering. _SUBSET = [ "empty.csv", # zero bytes — IO short-circuit "only_nul.csv", # all-NUL — repair_bytes strips to nothing "mojibake.csv", # Latin-1-as-UTF-8 — original NameError repro "invalid_utf8.csv", # raw invalid UTF-8 sequences "utf16_le_no_bom.csv", # wide encoding without BOM "mismatched_columns.csv", # pandas ParserWarning territory "all_nulls.csv", # cleans/standardizes a no-op "corrupt_xlsx.xlsx", # bad zip — pandas/openpyxl error "single_column.csv", # only one column — many tools assume >=2 ] # Ready tool pages with a deterministic primary-action button label. # The Coming-Soon stubs (6_Outlier_Detector, 7_Multi_File_Merger, # 8_Validator_Reporter) don't process the file so they're covered by # the "renders without exception" half of the contract only. _TOOL_PAGES = [ ("1_Deduplicator.py", None), # dedup's primary action depends on UI state ("2_Text_Cleaner.py", "Clean Text"), ("3_Format_Standardizer.py", None), # button label varies with detected types ("4_Missing_Values.py", "Handle Missing Values"), ("5_Column_Mapper.py", "Apply Column Mapping"), ("6_Outlier_Detector.py", None), # stub ("7_Multi_File_Merger.py", None), # stub ("8_Validator_Reporter.py", None), # stub ("9_Pipeline_Runner.py", None), # button enabled only when pipeline valid ] def _stash_junk_upload(at: AppTest, path: Path) -> None: """Pre-populate the session upload stash with junk bytes.""" data = path.read_bytes() at.session_state["home_uploaded_bytes"] = data at.session_state["home_uploaded_name"] = path.name at.session_state["home_uploaded_size"] = len(data) @pytest.mark.parametrize("junk_file", _SUBSET) @pytest.mark.parametrize("page,primary_label", _TOOL_PAGES, ids=lambda x: x[0] if isinstance(x, tuple) else str(x)) class TestToolPagesSurvive: """Every Ready/Coming-Soon tool page must render against every junk file without raising. Errors surface as ``st.error`` banners inside the page body, not as Python tracebacks bubbled up to the Streamlit chrome. """ def test_initial_render_no_exception(self, page, primary_label, junk_file): path = _CORPUS / junk_file if not path.exists(): pytest.skip(f"junk file {junk_file} missing from corpus") at = AppTest.from_file(str(_PAGES / page)) _stash_junk_upload(at, path) at.run(timeout=20) # ``at.exception`` is a list-like; empty means no traceback. assert not at.exception, ( f"{page} crashed on first render with {junk_file}: " f"{at.exception}" ) def test_primary_action_no_exception(self, page, primary_label, junk_file): if primary_label is None: pytest.skip( f"{page} has no deterministic primary-action label; " f"covered by initial-render half of the contract" ) path = _CORPUS / junk_file if not path.exists(): pytest.skip(f"junk file {junk_file} missing from corpus") at = AppTest.from_file(str(_PAGES / page)) _stash_junk_upload(at, path) at.run(timeout=20) # First render may already have failed cleanly via st.error + # st.stop — in which case the primary action button isn't # rendered. That's the correct behaviour; skip. candidates = [b for b in at.button if b.label == primary_label] if not candidates: pytest.skip( f"{page} did not render {primary_label!r} for " f"{junk_file} (likely caught the bad file at read " f"time and showed an error banner)" ) candidates[0].click().run(timeout=20) assert not at.exception, ( f"{page} crashed after clicking {primary_label!r} with " f"{junk_file}: {at.exception}" )