"""Install / dependency / entry-point sanity tests. These tests answer the question: "after running ``pip install -r requirements.txt`` on a fresh machine, can the user actually use this project?" They run on every supported platform — the asserts touch only public APIs and CLI ``--help`` exits, never any platform-specific paths. If a future dependency upgrade or refactor breaks an import that's used by the CLI or the GUI, these tests catch it before the rest of the suite even gets a chance to run. """ from __future__ import annotations import importlib import subprocess import sys import pytest pytestmark = pytest.mark.install # --------------------------------------------------------------------------- # Required dependencies # --------------------------------------------------------------------------- # Top-level packages that must import cleanly. If any of these fails, the # user's install is broken — fail loudly with the offender's name. _REQUIRED_DEPS = [ "pandas", "numpy", "openpyxl", "rapidfuzz", "charset_normalizer", "loguru", "tqdm", "typer", "phonenumbers", "streamlit", ] @pytest.mark.parametrize("module", _REQUIRED_DEPS) def test_required_dependency_imports(module: str) -> None: importlib.import_module(module) # --------------------------------------------------------------------------- # Project package imports # --------------------------------------------------------------------------- _PROJECT_MODULES = [ "src", "src.core", "src.core.io", "src.core.text_clean", "src.core.dedup", "src.core.normalizers", "src.core.analyze", "src.core.config", "src.cli", "src.cli_text_clean", "src.cli_analyze", "src.gui.components", ] @pytest.mark.parametrize("module", _PROJECT_MODULES) def test_project_module_imports(module: str) -> None: importlib.import_module(module) # --------------------------------------------------------------------------- # Public API surface # --------------------------------------------------------------------------- def test_core_public_api_present() -> None: """Spot-check the symbols re-exported via ``src.core``. Catches an accidental rename or drop in ``src/core/__init__.py``. """ import src.core as core expected = [ # I/O "read_file", "write_file", "list_sheets", "detect_encoding", "detect_delimiter", "detect_header_row", "read_csv_repaired", "repair_bytes", "RepairAction", "RepairResult", # Analyzer "Finding", "analyze", "findings_by_tool", "to_dict", # Text cleaner "CleanOptions", "CleanResult", "clean_dataframe", "clean_value", "smart_title_case", "sentence_case", "apply_case", # Dedup "deduplicate", "build_default_strategies", "Algorithm", "SurvivorRule", "MatchStrategy", "MatchResult", "DeduplicationResult", # Normalizers "normalize_email", "normalize_phone", "normalize_name", "normalize_address", "normalize_string", "get_normalizer", "NormalizerType", ] missing = [name for name in expected if not hasattr(core, name)] assert not missing, f"src.core is missing public symbols: {missing}" # --------------------------------------------------------------------------- # CLI entry points # --------------------------------------------------------------------------- def _cli_help(module: str) -> subprocess.CompletedProcess: """Run ``python -m --help`` and return the CompletedProcess. Captures both stdout and stderr so tests can inspect either; uses a short timeout so a hung CLI fails fast on CI. """ return subprocess.run( [sys.executable, "-m", module, "--help"], capture_output=True, text=True, timeout=30, ) @pytest.mark.parametrize("cli_module", [ "src.cli", "src.cli_text_clean", "src.cli_analyze", ]) def test_cli_help_exits_zero(cli_module: str) -> None: proc = _cli_help(cli_module) assert proc.returncode == 0, ( f"{cli_module} --help exited {proc.returncode}.\n" f"stdout:\n{proc.stdout}\nstderr:\n{proc.stderr}" ) # Help output must mention the command name or at least include "Usage:". combined = (proc.stdout + proc.stderr).lower() assert "usage" in combined, ( f"{cli_module} --help did not produce a Usage line" ) # --------------------------------------------------------------------------- # Streamlit GUI entry point # --------------------------------------------------------------------------- def test_streamlit_app_module_compiles() -> None: """Ensure ``src/gui/app.py`` is at least syntactically valid Python. A full Streamlit launch is too heavy for the install layer; that's covered by the e2e suite. """ import ast from pathlib import Path app_path = Path(__file__).resolve().parent.parent / "src" / "gui" / "app.py" assert app_path.exists(), f"missing {app_path}" ast.parse(app_path.read_text(encoding="utf-8")) # --------------------------------------------------------------------------- # Test runner sanity # --------------------------------------------------------------------------- def test_run_tests_help_works() -> None: """``python run_tests.py --help`` should describe the available flags.""" proc = subprocess.run( [sys.executable, "run_tests.py", "--help"], capture_output=True, text=True, timeout=30, ) assert proc.returncode == 0 assert "--tool" in proc.stdout assert "--fixtures" in proc.stdout