- Rewrite README.md with project overview, quick-start, and CLI summary - Add docs/CLI-REFERENCE.md with full flag reference and 8 recipe sections - Add docs/DEVELOPER.md with architecture, data flow, and extension guides - Rewrite src/core/__init__.py with public API exports and module docstring - Add Streamlit GUI (src/gui/) with file upload, advanced options, interactive match group review with side-by-side diff, and download buttons - Add .gitignore, requirements.txt, all source code, tests, and sample data - Add streamlit to requirements.txt Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
48 lines
1.3 KiB
Python
48 lines
1.3 KiB
Python
"""Shared test fixtures."""
|
|
|
|
import pandas as pd
|
|
import pytest
|
|
from pathlib import Path
|
|
|
|
SAMPLES_DIR = Path(__file__).parent.parent / "samples"
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_csv_path():
|
|
return SAMPLES_DIR / "messy_sales.csv"
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_df(sample_csv_path):
|
|
return pd.read_csv(sample_csv_path, dtype=str, keep_default_na=False)
|
|
|
|
|
|
@pytest.fixture
|
|
def simple_df():
|
|
"""Small DataFrame with obvious duplicates for unit testing."""
|
|
return pd.DataFrame({
|
|
"name": ["Alice", "alice", "Bob", "Charlie", "ALICE"],
|
|
"email": ["alice@test.com", "alice@test.com", "bob@test.com",
|
|
"charlie@test.com", "alice@test.com"],
|
|
"phone": ["555-1234", "555-1234", "555-5678", "555-9012", "555-1234"],
|
|
})
|
|
|
|
|
|
@pytest.fixture
|
|
def merge_df():
|
|
"""DataFrame with partial records that benefit from merge."""
|
|
return pd.DataFrame({
|
|
"name": ["John Doe", "John Doe", "Jane Smith"],
|
|
"email": ["john@test.com", "john@test.com", "jane@test.com"],
|
|
"phone": ["555-1111", "", "555-3333"],
|
|
"address": ["", "123 Main St", "456 Oak Ave"],
|
|
})
|
|
|
|
|
|
@pytest.fixture
|
|
def tmp_csv(tmp_path, simple_df):
|
|
"""Write simple_df to a temp CSV and return the path."""
|
|
path = tmp_path / "test_input.csv"
|
|
simple_df.to_csv(path, index=False)
|
|
return path
|