"""Tests for src.cli_analyze — Typer CLI.""" from __future__ import annotations import json from pathlib import Path import pytest from typer.testing import CliRunner from src.cli_analyze import app runner = CliRunner() def _make_dirty(tmp_path: Path) -> Path: """Write a small CSV with a mix of detectable issues.""" f = tmp_path / "dirty.csv" f.write_bytes( b"\xef\xbb\xbf" # BOM b" id ,Name,Email\n" # padded header b"1,Alice,Alice@Example.COM\n" b"2, Bob ,bob@example.com\n" b"3,N/A,carol@example.com\n" ) return f class TestAnalyzeCli: def test_clean_file_says_so(self, tmp_path): f = tmp_path / "clean.csv" f.write_text("id,name\n1,Alice\n2,Bob\n") result = runner.invoke(app, [str(f)]) assert result.exit_code == 0 assert "No issues detected" in result.stdout def test_dirty_file_lists_findings(self, tmp_path): f = _make_dirty(tmp_path) result = runner.invoke(app, [str(f)]) assert result.exit_code == 0 # The Rich table breaks lines; assert on stable substrings instead of # full finding ids. assert "Text Cleaner" in result.stdout assert "Missing Value" in result.stdout # Severity column is rendered. assert "warn" in result.stdout def test_json_output_round_trips(self, tmp_path): f = _make_dirty(tmp_path) result = runner.invoke(app, [str(f), "--json"]) assert result.exit_code == 0 data = json.loads(result.stdout) assert isinstance(data, list) assert len(data) > 0 ids = {item["id"] for item in data} assert "dirty_column_headers" in ids or "whitespace_padding" in ids # Each finding has the documented shape. for f in data: assert {"id", "severity", "tool", "count", "description", "samples"} <= set(f) def test_missing_file_exits_2(self, tmp_path): result = runner.invoke(app, [str(tmp_path / "nope.csv")]) assert result.exit_code == 2 assert "not found" in result.stdout.lower() or "not found" in (result.stderr or "") def test_strict_exits_1_on_warnings(self, tmp_path): f = _make_dirty(tmp_path) result = runner.invoke(app, [str(f), "--strict", "--json"]) # JSON output is still printed, but exit code is 1 because warns exist. assert result.exit_code == 1 data = json.loads(result.stdout) assert any(item["severity"] in ("warn", "error") for item in data) def test_strict_exits_0_on_clean(self, tmp_path): f = tmp_path / "clean.csv" f.write_text("id,name\n1,Alice\n2,Bob\n") result = runner.invoke(app, [str(f), "--strict"]) assert result.exit_code == 0 def test_sample_rows_caps_scan(self, tmp_path): # Build a file where ONLY rows past 100 have NBSP padding; with # --sample-rows 50 we should miss it. rows = ["id,name"] for i in range(1, 101): rows.append(f"{i},Alice") for i in range(101, 200): rows.append(f"{i},Alice ") # NBSP padding f = tmp_path / "big.csv" f.write_text("\n".join(rows) + "\n", encoding="utf-8") capped = runner.invoke(app, [str(f), "--sample-rows", "50", "--json"]) full = runner.invoke(app, [str(f), "--sample-rows", "200", "--json"]) capped_ids = {x["id"] for x in json.loads(capped.stdout)} full_ids = {x["id"] for x in json.loads(full.stdout)} assert "nbsp_or_unicode_whitespace" not in capped_ids assert "nbsp_or_unicode_whitespace" in full_ids