feat(cli): src.cli_analyze — Typer CLI for the analyzer

python -m src.cli_analyze input.csv             # rich table per tool
python -m src.cli_analyze input.csv --json      # array of finding dicts
python -m src.cli_analyze input.csv --strict    # exit 1 on warn/error
python -m src.cli_analyze input.csv -n 50000    # cap rows scanned

Findings are grouped by destination tool so the user can see at a glance
which tool to open next. Read-only; exit code 0 unless --strict is set.
The CLI keeps its own tool-id -> display-name map so it doesn't depend on
the GUI module.

7 tests cover: clean-file passthrough, dirty-file table, --json round-trip,
missing-file (exit 2), --strict exit code, --sample-rows cap.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-29 15:53:11 +00:00
parent edf6ccf90b
commit 5c62fb6117
2 changed files with 255 additions and 0 deletions

97
tests/test_cli_analyze.py Normal file
View File

@@ -0,0 +1,97 @@
"""Tests for src.cli_analyze — Typer CLI."""
from __future__ import annotations
import json
from pathlib import Path
import pytest
from typer.testing import CliRunner
from src.cli_analyze import app
runner = CliRunner()
def _make_dirty(tmp_path: Path) -> Path:
"""Write a small CSV with a mix of detectable issues."""
f = tmp_path / "dirty.csv"
f.write_bytes(
b"\xef\xbb\xbf" # BOM
b" id ,Name,Email\n" # padded header
b"1,Alice,Alice@Example.COM\n"
b"2, Bob ,bob@example.com\n"
b"3,N/A,carol@example.com\n"
)
return f
class TestAnalyzeCli:
def test_clean_file_says_so(self, tmp_path):
f = tmp_path / "clean.csv"
f.write_text("id,name\n1,Alice\n2,Bob\n")
result = runner.invoke(app, [str(f)])
assert result.exit_code == 0
assert "No issues detected" in result.stdout
def test_dirty_file_lists_findings(self, tmp_path):
f = _make_dirty(tmp_path)
result = runner.invoke(app, [str(f)])
assert result.exit_code == 0
# The Rich table breaks lines; assert on stable substrings instead of
# full finding ids.
assert "Text Cleaner" in result.stdout
assert "Missing Value" in result.stdout
# Severity column is rendered.
assert "warn" in result.stdout
def test_json_output_round_trips(self, tmp_path):
f = _make_dirty(tmp_path)
result = runner.invoke(app, [str(f), "--json"])
assert result.exit_code == 0
data = json.loads(result.stdout)
assert isinstance(data, list)
assert len(data) > 0
ids = {item["id"] for item in data}
assert "dirty_column_headers" in ids or "whitespace_padding" in ids
# Each finding has the documented shape.
for f in data:
assert {"id", "severity", "tool", "count", "description", "samples"} <= set(f)
def test_missing_file_exits_2(self, tmp_path):
result = runner.invoke(app, [str(tmp_path / "nope.csv")])
assert result.exit_code == 2
assert "not found" in result.stdout.lower() or "not found" in (result.stderr or "")
def test_strict_exits_1_on_warnings(self, tmp_path):
f = _make_dirty(tmp_path)
result = runner.invoke(app, [str(f), "--strict", "--json"])
# JSON output is still printed, but exit code is 1 because warns exist.
assert result.exit_code == 1
data = json.loads(result.stdout)
assert any(item["severity"] in ("warn", "error") for item in data)
def test_strict_exits_0_on_clean(self, tmp_path):
f = tmp_path / "clean.csv"
f.write_text("id,name\n1,Alice\n2,Bob\n")
result = runner.invoke(app, [str(f), "--strict"])
assert result.exit_code == 0
def test_sample_rows_caps_scan(self, tmp_path):
# Build a file where ONLY rows past 100 have NBSP padding; with
# --sample-rows 50 we should miss it.
rows = ["id,name"]
for i in range(1, 101):
rows.append(f"{i},Alice")
for i in range(101, 200):
rows.append(f"{i},Alice ") # NBSP padding
f = tmp_path / "big.csv"
f.write_text("\n".join(rows) + "\n", encoding="utf-8")
capped = runner.invoke(app, [str(f), "--sample-rows", "50", "--json"])
full = runner.invoke(app, [str(f), "--sample-rows", "200", "--json"])
capped_ids = {x["id"] for x in json.loads(capped.stdout)}
full_ids = {x["id"] for x in json.loads(full.stdout)}
assert "nbsp_or_unicode_whitespace" not in capped_ids
assert "nbsp_or_unicode_whitespace" in full_ids