feat(cli): src.cli_analyze — Typer CLI for the analyzer

python -m src.cli_analyze input.csv # rich table per tool python -m src.cli_analyze input.csv --json # array of finding dicts python -m src.cli_analyze input.csv --strict # exit 1 on warn/error python -m src.cli_analyze input.csv -n 50000 # cap rows scanned Findings are grouped by destination tool so the user can see at a glance which tool to open next. Read-only; exit code 0 unless --strict is set. The CLI keeps its own tool-id -> display-name map so it doesn't depend on the GUI module. 7 tests cover: clean-file passthrough, dirty-file table, --json round-trip, missing-file (exit 2), --strict exit code, --sample-rows cap. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 15:53:11 +00:00
parent edf6ccf90b
commit 5c62fb6117
2 changed files with 255 additions and 0 deletions
--- a/tests/test_cli_analyze.py
+++ b/tests/test_cli_analyze.py
@@ -0,0 +1,97 @@
+"""Tests for src.cli_analyze — Typer CLI."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import pytest
+from typer.testing import CliRunner
+
+from src.cli_analyze import app
+
+
+runner = CliRunner()
+
+
+def _make_dirty(tmp_path: Path) -> Path:
+    """Write a small CSV with a mix of detectable issues."""
+    f = tmp_path / "dirty.csv"
+    f.write_bytes(
+        b"\xef\xbb\xbf"            # BOM
+        b"  id  ,Name,Email\n"      # padded header
+        b"1,Alice,Alice@Example.COM\n"
+        b"2,  Bob  ,bob@example.com\n"
+        b"3,N/A,carol@example.com\n"
+    )
+    return f
+
+
+class TestAnalyzeCli:
+    def test_clean_file_says_so(self, tmp_path):
+        f = tmp_path / "clean.csv"
+        f.write_text("id,name\n1,Alice\n2,Bob\n")
+        result = runner.invoke(app, [str(f)])
+        assert result.exit_code == 0
+        assert "No issues detected" in result.stdout
+
+    def test_dirty_file_lists_findings(self, tmp_path):
+        f = _make_dirty(tmp_path)
+        result = runner.invoke(app, [str(f)])
+        assert result.exit_code == 0
+        # The Rich table breaks lines; assert on stable substrings instead of
+        # full finding ids.
+        assert "Text Cleaner" in result.stdout
+        assert "Missing Value" in result.stdout
+        # Severity column is rendered.
+        assert "warn" in result.stdout
+
+    def test_json_output_round_trips(self, tmp_path):
+        f = _make_dirty(tmp_path)
+        result = runner.invoke(app, [str(f), "--json"])
+        assert result.exit_code == 0
+        data = json.loads(result.stdout)
+        assert isinstance(data, list)
+        assert len(data) > 0
+        ids = {item["id"] for item in data}
+        assert "dirty_column_headers" in ids or "whitespace_padding" in ids
+        # Each finding has the documented shape.
+        for f in data:
+            assert {"id", "severity", "tool", "count", "description", "samples"} <= set(f)
+
+    def test_missing_file_exits_2(self, tmp_path):
+        result = runner.invoke(app, [str(tmp_path / "nope.csv")])
+        assert result.exit_code == 2
+        assert "not found" in result.stdout.lower() or "not found" in (result.stderr or "")
+
+    def test_strict_exits_1_on_warnings(self, tmp_path):
+        f = _make_dirty(tmp_path)
+        result = runner.invoke(app, [str(f), "--strict", "--json"])
+        # JSON output is still printed, but exit code is 1 because warns exist.
+        assert result.exit_code == 1
+        data = json.loads(result.stdout)
+        assert any(item["severity"] in ("warn", "error") for item in data)
+
+    def test_strict_exits_0_on_clean(self, tmp_path):
+        f = tmp_path / "clean.csv"
+        f.write_text("id,name\n1,Alice\n2,Bob\n")
+        result = runner.invoke(app, [str(f), "--strict"])
+        assert result.exit_code == 0
+
+    def test_sample_rows_caps_scan(self, tmp_path):
+        # Build a file where ONLY rows past 100 have NBSP padding; with
+        # --sample-rows 50 we should miss it.
+        rows = ["id,name"]
+        for i in range(1, 101):
+            rows.append(f"{i},Alice")
+        for i in range(101, 200):
+            rows.append(f"{i},Alice ")  # NBSP padding
+        f = tmp_path / "big.csv"
+        f.write_text("\n".join(rows) + "\n", encoding="utf-8")
+
+        capped = runner.invoke(app, [str(f), "--sample-rows", "50", "--json"])
+        full = runner.invoke(app, [str(f), "--sample-rows", "200", "--json"])
+        capped_ids = {x["id"] for x in json.loads(capped.stdout)}
+        full_ids = {x["id"] for x in json.loads(full.stdout)}
+        assert "nbsp_or_unicode_whitespace" not in capped_ids
+        assert "nbsp_or_unicode_whitespace" in full_ids