datatools-dev/run_tests.py

#!/usr/bin/env python3
"""DataTools test runner — single entry point with category flags.

Examples
--------
Run everything (default)::

    python run_tests.py

Run a single tool's tests::

    python run_tests.py --tool dedup
    python run_tests.py --tool text_clean
    python run_tests.py --tool analyze
    python run_tests.py --tool io
    python run_tests.py --tool cli

Categories::

    python run_tests.py --unit          # unit tests only (no e2e, no install)
    python run_tests.py --e2e           # end-to-end smoke tests
    python run_tests.py --install       # install / dependency sanity
    python run_tests.py --fixtures      # corpus + dropped-file sweep
    python run_tests.py --coverage      # add a coverage report
    python run_tests.py --quick         # skip @pytest.mark.slow
    python run_tests.py -v / --verbose  # verbose pytest output

Multiple flags compose. ``--tool X --quick`` runs that tool's quick tests.

Dropping a new fixture into ``test-cases/`` is automatic: the fixture sweep
test (``tests/test_fixtures_sweep.py``) parametrizes over every CSV/XLSX in
that directory (excluding ``text-cleaner-corpus/`` which has its own suite).
"""

from __future__ import annotations

import argparse
import shutil
import subprocess
import sys
from pathlib import Path

PROJECT_ROOT = Path(__file__).resolve().parent

# Tool name -> matching pytest -k expression. Keep aligned with test_*.py
# filenames; run_tests.py --tool foo translates to ``-k foo``.
_TOOL_MAP: dict[str, str] = {
    "dedup": "test_dedup or test_cli.py",
    "text_clean": "test_text_clean or test_cli_text_clean or test_corpus",
    "analyze": "test_analyze or test_cli_analyze",
    "io": "test_io",
    "cli": "test_cli or test_cli_text_clean or test_cli_analyze",
    "config": "test_config",
    "normalizers": "test_normalizers",
    "normalize": "test_normalize",
    "encodings": "test_encodings_corpus or test_io",
    "gate": "test_normalize",
}

_CATEGORY_PATHS: dict[str, list[str]] = {
    "unit": ["tests/"],          # all tests are unit unless marked otherwise
    "e2e": ["tests/test_e2e.py"],
    "install": ["tests/test_install.py"],
    "fixtures": [
        "tests/test_corpus.py",
        "tests/test_fixtures_sweep.py",
        "tests/test_encodings_corpus.py",
    ],
}


def _build_pytest_args(args: argparse.Namespace) -> list[str]:
    cmd: list[str] = [sys.executable, "-m", "pytest"]

    # Verbosity
    if args.verbose:
        cmd.append("-vv")
    else:
        cmd.append("-q")

    # Coverage
    if args.coverage:
        cmd.extend(["--cov=src", "--cov-report=term-missing"])

    # Quick: skip anything marked slow.
    if args.quick:
        cmd.extend(["-m", "not slow"])

    # Tool filter via -k expression.
    if args.tool:
        if args.tool not in _TOOL_MAP:
            print(
                f"unknown --tool '{args.tool}'. "
                f"available: {', '.join(sorted(_TOOL_MAP))}",
                file=sys.stderr,
            )
            sys.exit(2)
        cmd.extend(["-k", _TOOL_MAP[args.tool]])

    # Category selection (--unit/--e2e/--install/--fixtures). When several
    # categories are requested they're OR'd by passing all paths.
    paths: list[str] = []
    selected_categories = [
        c for c in ("unit", "e2e", "install", "fixtures")
        if getattr(args, c)
    ]
    if selected_categories:
        for cat in selected_categories:
            paths.extend(_CATEGORY_PATHS[cat])
    elif args.path:
        paths.extend(args.path)
    else:
        paths.append("tests/")

    cmd.extend(paths)
    return cmd


def main(argv: list[str] | None = None) -> int:
    parser = argparse.ArgumentParser(
        prog="run_tests.py",
        description=(
            "DataTools test runner. With no flags runs every test. Use "
            "--tool to scope to one tool, --unit/--e2e/--install/--fixtures "
            "to scope by category. Combine flags freely."
        ),
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog=(
            "Available tools: " + ", ".join(sorted(_TOOL_MAP)) + "\n\n"
            "To add a new fixture-driven test: drop a CSV or XLSX into "
            "test-cases/ and re-run. tests/test_fixtures_sweep.py picks up "
            "new files automatically — no test code changes required."
        ),
    )
    parser.add_argument("--tool", help="Limit tests to one tool.")
    parser.add_argument("--unit", action="store_true",
                        help="Unit tests only (default scope).")
    parser.add_argument("--e2e", action="store_true",
                        help="End-to-end CLI/integration smoke tests.")
    parser.add_argument("--install", action="store_true",
                        help="Install / import / entry-point sanity tests.")
    parser.add_argument("--fixtures", action="store_true",
                        help="Run the corpus + dropped-fixture sweep.")
    parser.add_argument("--coverage", action="store_true",
                        help="Emit a coverage report (term-missing).")
    parser.add_argument("--quick", action="store_true",
                        help="Skip tests marked @pytest.mark.slow.")
    parser.add_argument("-v", "--verbose", action="store_true",
                        help="Verbose pytest output.")
    parser.add_argument("path", nargs="*",
                        help="Optional explicit test paths (override category).")

    args = parser.parse_args(argv)

    # Ensure we run from the project root so relative imports / paths work.
    cwd_target = PROJECT_ROOT
    if Path.cwd() != cwd_target:
        print(f"running from {cwd_target}")

    if shutil.which("pytest") is None and not _python_has_pytest():
        print(
            "pytest is not installed. Install dev deps:\n"
            "  pip install -r requirements-dev.txt",
            file=sys.stderr,
        )
        return 2

    cmd = _build_pytest_args(args)
    if args.verbose:
        print("→", " ".join(cmd))
    proc = subprocess.run(cmd, cwd=cwd_target)
    return proc.returncode


def _python_has_pytest() -> bool:
    try:
        __import__("pytest")
        return True
    except ImportError:
        return False


if __name__ == "__main__":
    sys.exit(main())