Files
datatools-dev/tests/test_cli_analyze.py
Michael db5ec084da docs+code: rename tool labels everywhere
Sweep follow-up to 93e43fc. Display labels now consistent across docs,
landing pages, CLI output, code comments, docstrings, and test prose.
Five parallel surfaces touched:

- docs (EN + ES): README, USER-GUIDE, CLI-REFERENCE, and 11 internal
  design/planning docs
- landing pages: index + bookkeeper/revops/shopify-pet
- src: CLI module docstrings, _TOOL_DISPLAY dicts in cli_analyze.py
  and gui/components/_legacy.py, core module headers, every tool
  page's module docstring
- tests: class/method/module docstrings and section-header comments
- test-cases READMEs

Page slugs (1_Deduplicator etc.), tool_id strings (01_deduplicator
etc.), Python class names (TestDeduplicatorWorkflow, FeatureFlag.*),
URL paths, anchor IDs, CSS classes, and asset filenames were left
intact since they're code identifiers / structural references.

All 2033 tests pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-16 19:50:09 +00:00

98 lines
3.6 KiB
Python
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Tests for src.cli_analyze — Typer CLI."""
from __future__ import annotations
import json
from pathlib import Path
import pytest
from typer.testing import CliRunner
from src.cli_analyze import app
runner = CliRunner()
def _make_dirty(tmp_path: Path) -> Path:
"""Write a small CSV with a mix of detectable issues."""
f = tmp_path / "dirty.csv"
f.write_bytes(
b"\xef\xbb\xbf" # BOM
b" id ,Name,Email\n" # padded header
b"1,Alice,Alice@Example.COM\n"
b"2, Bob ,bob@example.com\n"
b"3,N/A,carol@example.com\n"
)
return f
class TestAnalyzeCli:
def test_clean_file_says_so(self, tmp_path):
f = tmp_path / "clean.csv"
f.write_text("id,name\n1,Alice\n2,Bob\n")
result = runner.invoke(app, [str(f)])
assert result.exit_code == 0
assert "No issues detected" in result.stdout
def test_dirty_file_lists_findings(self, tmp_path):
f = _make_dirty(tmp_path)
result = runner.invoke(app, [str(f)])
assert result.exit_code == 0
# The Rich table breaks lines; assert on stable substrings instead of
# full finding ids.
assert "Clean Text" in result.stdout
assert "Fix Missing Values" in result.stdout
# Severity column is rendered.
assert "warn" in result.stdout
def test_json_output_round_trips(self, tmp_path):
f = _make_dirty(tmp_path)
result = runner.invoke(app, [str(f), "--json"])
assert result.exit_code == 0
data = json.loads(result.stdout)
assert isinstance(data, list)
assert len(data) > 0
ids = {item["id"] for item in data}
assert "dirty_column_headers" in ids or "whitespace_padding" in ids
# Each finding has the documented shape.
for f in data:
assert {"id", "severity", "tool", "count", "description", "samples"} <= set(f)
def test_missing_file_exits_2(self, tmp_path):
result = runner.invoke(app, [str(tmp_path / "nope.csv")])
assert result.exit_code == 2
assert "not found" in result.stdout.lower() or "not found" in (result.stderr or "")
def test_strict_exits_1_on_warnings(self, tmp_path):
f = _make_dirty(tmp_path)
result = runner.invoke(app, [str(f), "--strict", "--json"])
# JSON output is still printed, but exit code is 1 because warns exist.
assert result.exit_code == 1
data = json.loads(result.stdout)
assert any(item["severity"] in ("warn", "error") for item in data)
def test_strict_exits_0_on_clean(self, tmp_path):
f = tmp_path / "clean.csv"
f.write_text("id,name\n1,Alice\n2,Bob\n")
result = runner.invoke(app, [str(f), "--strict"])
assert result.exit_code == 0
def test_sample_rows_caps_scan(self, tmp_path):
# Build a file where ONLY rows past 100 have NBSP padding; with
# --sample-rows 50 we should miss it.
rows = ["id,name"]
for i in range(1, 101):
rows.append(f"{i},Alice")
for i in range(101, 200):
rows.append(f"{i},Alice ") # NBSP padding
f = tmp_path / "big.csv"
f.write_text("\n".join(rows) + "\n", encoding="utf-8")
capped = runner.invoke(app, [str(f), "--sample-rows", "50", "--json"])
full = runner.invoke(app, [str(f), "--sample-rows", "200", "--json"])
capped_ids = {x["id"] for x in json.loads(capped.stdout)}
full_ids = {x["id"] for x in json.loads(full.stdout)}
assert "nbsp_or_unicode_whitespace" not in capped_ids
assert "nbsp_or_unicode_whitespace" in full_ids