#!/usr/bin/env python3 """DataTools test runner — single entry point with category flags. Examples -------- Run everything (default):: python run_tests.py Run a single tool's tests:: python run_tests.py --tool dedup python run_tests.py --tool text_clean python run_tests.py --tool analyze python run_tests.py --tool io python run_tests.py --tool cli Categories:: python run_tests.py --unit # unit tests only (no e2e, no install) python run_tests.py --e2e # end-to-end smoke tests python run_tests.py --install # install / dependency sanity python run_tests.py --fixtures # corpus + dropped-file sweep python run_tests.py --coverage # add a coverage report python run_tests.py --quick # skip @pytest.mark.slow python run_tests.py -v / --verbose # verbose pytest output Multiple flags compose. ``--tool X --quick`` runs that tool's quick tests. Dropping a new fixture into ``test-cases/`` is automatic: the fixture sweep test (``tests/test_fixtures_sweep.py``) parametrizes over every CSV/XLSX in that directory (excluding ``text-cleaner-corpus/`` which has its own suite). """ from __future__ import annotations import argparse import shutil import subprocess import sys from pathlib import Path PROJECT_ROOT = Path(__file__).resolve().parent # Tool name -> matching pytest -k expression. Keep aligned with test_*.py # filenames; run_tests.py --tool foo translates to ``-k foo``. _TOOL_MAP: dict[str, str] = { "dedup": "test_dedup or test_cli.py", "text_clean": "test_text_clean or test_cli_text_clean or test_corpus", "analyze": "test_analyze or test_cli_analyze", "io": "test_io", "cli": "test_cli or test_cli_text_clean or test_cli_analyze", "config": "test_config", "normalizers": "test_normalizers", "normalize": "test_normalize", "encodings": "test_encodings_corpus or test_io", "gate": "test_normalize", } _CATEGORY_PATHS: dict[str, list[str]] = { "unit": ["tests/"], # all tests are unit unless marked otherwise "e2e": ["tests/test_e2e.py"], "install": ["tests/test_install.py"], "fixtures": [ "tests/test_corpus.py", "tests/test_fixtures_sweep.py", "tests/test_encodings_corpus.py", ], } def _build_pytest_args(args: argparse.Namespace) -> list[str]: cmd: list[str] = [sys.executable, "-m", "pytest"] # Verbosity if args.verbose: cmd.append("-vv") else: cmd.append("-q") # Coverage if args.coverage: cmd.extend(["--cov=src", "--cov-report=term-missing"]) # Quick: skip anything marked slow. if args.quick: cmd.extend(["-m", "not slow"]) # Tool filter via -k expression. if args.tool: if args.tool not in _TOOL_MAP: print( f"unknown --tool '{args.tool}'. " f"available: {', '.join(sorted(_TOOL_MAP))}", file=sys.stderr, ) sys.exit(2) cmd.extend(["-k", _TOOL_MAP[args.tool]]) # Category selection (--unit/--e2e/--install/--fixtures). When several # categories are requested they're OR'd by passing all paths. paths: list[str] = [] selected_categories = [ c for c in ("unit", "e2e", "install", "fixtures") if getattr(args, c) ] if selected_categories: for cat in selected_categories: paths.extend(_CATEGORY_PATHS[cat]) elif args.path: paths.extend(args.path) else: paths.append("tests/") cmd.extend(paths) return cmd def main(argv: list[str] | None = None) -> int: parser = argparse.ArgumentParser( prog="run_tests.py", description=( "DataTools test runner. With no flags runs every test. Use " "--tool to scope to one tool, --unit/--e2e/--install/--fixtures " "to scope by category. Combine flags freely." ), formatter_class=argparse.RawDescriptionHelpFormatter, epilog=( "Available tools: " + ", ".join(sorted(_TOOL_MAP)) + "\n\n" "To add a new fixture-driven test: drop a CSV or XLSX into " "test-cases/ and re-run. tests/test_fixtures_sweep.py picks up " "new files automatically — no test code changes required." ), ) parser.add_argument("--tool", help="Limit tests to one tool.") parser.add_argument("--unit", action="store_true", help="Unit tests only (default scope).") parser.add_argument("--e2e", action="store_true", help="End-to-end CLI/integration smoke tests.") parser.add_argument("--install", action="store_true", help="Install / import / entry-point sanity tests.") parser.add_argument("--fixtures", action="store_true", help="Run the corpus + dropped-fixture sweep.") parser.add_argument("--coverage", action="store_true", help="Emit a coverage report (term-missing).") parser.add_argument("--quick", action="store_true", help="Skip tests marked @pytest.mark.slow.") parser.add_argument("-v", "--verbose", action="store_true", help="Verbose pytest output.") parser.add_argument("path", nargs="*", help="Optional explicit test paths (override category).") args = parser.parse_args(argv) # Ensure we run from the project root so relative imports / paths work. cwd_target = PROJECT_ROOT if Path.cwd() != cwd_target: print(f"running from {cwd_target}") if shutil.which("pytest") is None and not _python_has_pytest(): print( "pytest is not installed. Install dev deps:\n" " pip install -r requirements-dev.txt", file=sys.stderr, ) return 2 cmd = _build_pytest_args(args) if args.verbose: print("→", " ".join(cmd)) proc = subprocess.run(cmd, cwd=cwd_target) return proc.returncode def _python_has_pytest() -> bool: try: __import__("pytest") return True except ImportError: return False if __name__ == "__main__": sys.exit(main())