"""Stress-test the upload analyzer against a corpus of pathological files. Every file under ``test-cases/junk-corpus/test_data/`` is fed through ``_run_analysis_on_upload`` — the same path the GUI takes when a user drops a file on the home page. The contract under test is: * The call never raises. Errors must surface as a synthetic ``Finding`` with severity ``"error"``, not a Python traceback that the page chrome bubbles up to the user. * The return is always a list of :class:`Finding` (possibly empty for files the analyzer judges clean). * Specific high-risk files (empty bytes, corrupt zip, etc.) MUST produce at least one error-level Finding so the UI shows a red banner rather than silently rendering "no issues found". To add a new pathological shape: 1. Edit ``test-cases/junk-corpus/make_junk_corpus.py`` to write the new file under ``test_data/``. 2. Re-run that script to materialize the file on disk. 3. (Optional) Add the filename to ``_MUST_BE_ERROR`` below if the file represents a state where "no findings" would be a silent failure. """ from __future__ import annotations from pathlib import Path import pytest from src.core.analyze import Finding from src.gui.components._legacy import _run_analysis_on_upload _CORPUS = Path(__file__).resolve().parent.parent / "test-cases" / "junk-corpus" / "test_data" class _FakeUpload: """Duck-type the Streamlit ``UploadedFile`` interface from a path.""" def __init__(self, path: Path) -> None: self.name = path.name self._bytes = path.read_bytes() def getvalue(self) -> bytes: return self._bytes def _corpus_files() -> list[Path]: files = sorted(p for p in _CORPUS.iterdir() if p.is_file()) if not files: raise RuntimeError( f"Junk corpus is empty. Run " f"`python test-cases/junk-corpus/make_junk_corpus.py` " f"to generate {_CORPUS}." ) return files # Files where "zero findings" would be a silent failure — these are # structurally broken enough that the analyzer MUST flag them. The # error-level Finding is what shows the user a red banner instead of # the misleading "no issues found" success path. _MUST_BE_ERROR = { "empty.csv", "only_bom.csv", "only_nul.csv", "corrupt_xlsx.xlsx", } @pytest.mark.parametrize( "path", _corpus_files(), ids=lambda p: p.name, ) class TestJunkCorpus: """Every pathological file must round-trip through the analyzer without raising. The error message format is checked separately via :func:`TestJunkCorpus.test_error_findings_have_a_description`. """ def test_no_exception_propagates(self, path: Path) -> None: upload = _FakeUpload(path) # The point of the test: any exception from analyze() / pandas / # repair_bytes / openpyxl SHOULD have been caught and turned # into an error Finding by ``_run_analysis_on_upload``. If this # raises, the home page would crash on this file in production. findings = _run_analysis_on_upload(upload) assert isinstance(findings, list), ( f"{path.name}: expected list[Finding], got {type(findings).__name__}" ) def test_findings_are_well_formed(self, path: Path) -> None: upload = _FakeUpload(path) findings = _run_analysis_on_upload(upload) for f in findings: assert isinstance(f, Finding), ( f"{path.name}: non-Finding in result list: {f!r}" ) assert isinstance(f.id, str) and f.id, ( f"{path.name}: Finding has empty id" ) assert f.severity in ("info", "warn", "error"), ( f"{path.name}: Finding has bad severity {f.severity!r}" ) assert isinstance(f.description, str) and f.description, ( f"{path.name}: Finding has empty description" ) def test_must_be_error_files_actually_flag(self, path: Path) -> None: if path.name not in _MUST_BE_ERROR: pytest.skip(f"{path.name} is allowed to pass clean") upload = _FakeUpload(path) findings = _run_analysis_on_upload(upload) errors = [f for f in findings if f.severity == "error"] assert errors, ( f"{path.name} should surface at least one error-level " f"Finding so the UI shows a red banner; got {len(findings)} " f"findings (none of severity 'error')." ) def test_error_findings_have_a_description(self, path: Path) -> None: """Error findings must carry a description the user can act on. For an empty / corrupt file the description is the ONLY thing the user sees — it has to name the file or include enough context that they can fix the underlying problem. """ upload = _FakeUpload(path) findings = _run_analysis_on_upload(upload) for f in findings: if f.severity != "error": continue # The synthetic error Findings always interpolate the file # name; analyzer-generated errors include the column or a # description that mentions what was wrong. assert len(f.description) >= 20, ( f"{path.name}: error Finding description is too short " f"to be useful: {f.description!r}" ) def test_corpus_contains_expected_shapes() -> None: """Sanity-check that the corpus generator wrote the files we rely on for the must-be-error matrix. If somebody renames a file in ``make_junk_corpus.py`` without updating ``_MUST_BE_ERROR``, this test catches it before the per-file parametrization silently skips the must-be-error assertion.""" names = {p.name for p in _corpus_files()} missing = _MUST_BE_ERROR - names assert not missing, ( f"_MUST_BE_ERROR references files that don't exist in the " f"corpus: {sorted(missing)}. Regenerate the corpus or update " f"_MUST_BE_ERROR." )