"""Integration tests for the CLI via Typer's CliRunner.""" import pytest from pathlib import Path from typer.testing import CliRunner from src.cli import app runner = CliRunner() class TestCliPreview: def test_preview_default(self, tmp_csv): result = runner.invoke(app, [str(tmp_csv)]) assert result.exit_code == 0 assert "preview" in result.output.lower() or "Rows in" in result.output def test_preview_shows_row_counts(self, tmp_csv): result = runner.invoke(app, [str(tmp_csv)]) assert result.exit_code == 0 assert "Rows in" in result.output assert "Rows out" in result.output def test_file_not_found(self): result = runner.invoke(app, ["/tmp/nonexistent_xyz_abc.csv"]) assert result.exit_code != 0 assert "not found" in result.output.lower() class TestCliApply: def test_apply_writes_output(self, tmp_csv, tmp_path): out = tmp_path / "output.csv" result = runner.invoke(app, [str(tmp_csv), "--apply", "-o", str(out)]) assert result.exit_code == 0 assert out.exists() def test_apply_default_output_name(self, tmp_csv): result = runner.invoke(app, [str(tmp_csv), "--apply"]) assert result.exit_code == 0 expected = tmp_csv.parent / f"{tmp_csv.stem}_deduplicated.csv" assert expected.exists() def test_apply_creates_removed_file(self, tmp_csv): result = runner.invoke(app, [str(tmp_csv), "--apply"]) assert result.exit_code == 0 removed = tmp_csv.parent / f"{tmp_csv.stem}_removed.csv" # May or may not exist depending on whether duplicates were found # with default auto-detect on simple_df class TestCliFuzzy: def test_fuzzy_flag(self, tmp_csv): result = runner.invoke(app, [ str(tmp_csv), "--fuzzy", "name", "--threshold", "80", ]) assert result.exit_code == 0 def test_subset_flag(self, tmp_csv): result = runner.invoke(app, [ str(tmp_csv), "--subset", "email", ]) assert result.exit_code == 0 def test_bad_column_error(self, tmp_csv): result = runner.invoke(app, [ str(tmp_csv), "--subset", "nonexistent_column", ]) assert result.exit_code != 0 assert "not found" in result.output.lower() class TestCliConfig: def test_save_and_load_config(self, tmp_csv, tmp_path): cfg_path = tmp_path / "my_config.json" # Save result = runner.invoke(app, [ str(tmp_csv), "--subset", "email", "--save-config", str(cfg_path), ]) assert result.exit_code == 0 assert cfg_path.exists() # Load and apply result = runner.invoke(app, [ str(tmp_csv), "--config", str(cfg_path), "--apply", ]) assert result.exit_code == 0 class TestCliSurvivor: def test_survivor_last(self, tmp_csv): result = runner.invoke(app, [str(tmp_csv), "--survivor", "last"]) assert result.exit_code == 0 def test_survivor_most_complete(self, tmp_csv): result = runner.invoke(app, [str(tmp_csv), "--survivor", "most-complete"]) assert result.exit_code == 0 def test_invalid_survivor(self, tmp_csv): result = runner.invoke(app, [str(tmp_csv), "--survivor", "bogus"]) assert result.exit_code != 0 class TestCliMerge: def test_merge_flag(self, tmp_csv): result = runner.invoke(app, [str(tmp_csv), "--merge", "--apply"]) assert result.exit_code == 0 class TestCliSampleData: def test_sample_preview(self, sample_csv_path): result = runner.invoke(app, [str(sample_csv_path)]) assert result.exit_code == 0 assert "Rows in: 50" in result.output # Should find duplicates assert "Removed:" in result.output def test_sample_apply(self, sample_csv_path, tmp_path): out = tmp_path / "deduped.csv" result = runner.invoke(app, [ str(sample_csv_path), "--apply", "-o", str(out), ]) assert result.exit_code == 0 assert out.exists() import pandas as pd df = pd.read_csv(out, encoding="utf-8-sig") # Should have fewer than 50 rows assert len(df) < 50 def test_sample_fuzzy_with_merge(self, sample_csv_path, tmp_path): out = tmp_path / "fuzzy_merged.csv" result = runner.invoke(app, [ str(sample_csv_path), "--fuzzy", "customer_name", "--threshold", "80", "--merge", "--apply", "-o", str(out), ]) assert result.exit_code == 0 assert out.exists() class TestCliHelp: def test_help(self): result = runner.invoke(app, ["--help"]) assert result.exit_code == 0 assert "--apply" in result.output