Files
datatools-dev/tests/test_cli_analyze.py
Michael db5ec084da docs+code: rename tool labels everywhere
Sweep follow-up to 93e43fc. Display labels now consistent across docs,
landing pages, CLI output, code comments, docstrings, and test prose.
Five parallel surfaces touched:

- docs (EN + ES): README, USER-GUIDE, CLI-REFERENCE, and 11 internal
  design/planning docs
- landing pages: index + bookkeeper/revops/shopify-pet
- src: CLI module docstrings, _TOOL_DISPLAY dicts in cli_analyze.py
  and gui/components/_legacy.py, core module headers, every tool
  page's module docstring
- tests: class/method/module docstrings and section-header comments
- test-cases READMEs

Page slugs (1_Deduplicator etc.), tool_id strings (01_deduplicator
etc.), Python class names (TestDeduplicatorWorkflow, FeatureFlag.*),
URL paths, anchor IDs, CSS classes, and asset filenames were left
intact since they're code identifiers / structural references.

All 2033 tests pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-16 19:50:09 +00:00

98 lines
3.6 KiB
Python
Raw Permalink Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Tests for src.cli_analyze — Typer CLI."""
from __future__ import annotations
import json
from pathlib import Path
import pytest
from typer.testing import CliRunner
from src.cli_analyze import app
runner = CliRunner()
def _make_dirty(tmp_path: Path) -> Path:
"""Write a small CSV with a mix of detectable issues."""
f = tmp_path / "dirty.csv"
f.write_bytes(
b"\xef\xbb\xbf" # BOM
b" id ,Name,Email\n" # padded header
b"1,Alice,Alice@Example.COM\n"
b"2, Bob ,bob@example.com\n"
b"3,N/A,carol@example.com\n"
)
return f
class TestAnalyzeCli:
def test_clean_file_says_so(self, tmp_path):
f = tmp_path / "clean.csv"
f.write_text("id,name\n1,Alice\n2,Bob\n")
result = runner.invoke(app, [str(f)])
assert result.exit_code == 0
assert "No issues detected" in result.stdout
def test_dirty_file_lists_findings(self, tmp_path):
f = _make_dirty(tmp_path)
result = runner.invoke(app, [str(f)])
assert result.exit_code == 0
# The Rich table breaks lines; assert on stable substrings instead of
# full finding ids.
assert "Clean Text" in result.stdout
assert "Fix Missing Values" in result.stdout
# Severity column is rendered.
assert "warn" in result.stdout
def test_json_output_round_trips(self, tmp_path):
f = _make_dirty(tmp_path)
result = runner.invoke(app, [str(f), "--json"])
assert result.exit_code == 0
data = json.loads(result.stdout)
assert isinstance(data, list)
assert len(data) > 0
ids = {item["id"] for item in data}
assert "dirty_column_headers" in ids or "whitespace_padding" in ids
# Each finding has the documented shape.
for f in data:
assert {"id", "severity", "tool", "count", "description", "samples"} <= set(f)
def test_missing_file_exits_2(self, tmp_path):
result = runner.invoke(app, [str(tmp_path / "nope.csv")])
assert result.exit_code == 2
assert "not found" in result.stdout.lower() or "not found" in (result.stderr or "")
def test_strict_exits_1_on_warnings(self, tmp_path):
f = _make_dirty(tmp_path)
result = runner.invoke(app, [str(f), "--strict", "--json"])
# JSON output is still printed, but exit code is 1 because warns exist.
assert result.exit_code == 1
data = json.loads(result.stdout)
assert any(item["severity"] in ("warn", "error") for item in data)
def test_strict_exits_0_on_clean(self, tmp_path):
f = tmp_path / "clean.csv"
f.write_text("id,name\n1,Alice\n2,Bob\n")
result = runner.invoke(app, [str(f), "--strict"])
assert result.exit_code == 0
def test_sample_rows_caps_scan(self, tmp_path):
# Build a file where ONLY rows past 100 have NBSP padding; with
# --sample-rows 50 we should miss it.
rows = ["id,name"]
for i in range(1, 101):
rows.append(f"{i},Alice")
for i in range(101, 200):
rows.append(f"{i},Alice ") # NBSP padding
f = tmp_path / "big.csv"
f.write_text("\n".join(rows) + "\n", encoding="utf-8")
capped = runner.invoke(app, [str(f), "--sample-rows", "50", "--json"])
full = runner.invoke(app, [str(f), "--sample-rows", "200", "--json"])
capped_ids = {x["id"] for x in json.loads(capped.stdout)}
full_ids = {x["id"] for x in json.loads(full.stdout)}
assert "nbsp_or_unicode_whitespace" not in capped_ids
assert "nbsp_or_unicode_whitespace" in full_ids