feat: add documentation, Streamlit GUI, and full source tree
- Rewrite README.md with project overview, quick-start, and CLI summary - Add docs/CLI-REFERENCE.md with full flag reference and 8 recipe sections - Add docs/DEVELOPER.md with architecture, data flow, and extension guides - Rewrite src/core/__init__.py with public API exports and module docstring - Add Streamlit GUI (src/gui/) with file upload, advanced options, interactive match group review with side-by-side diff, and download buttons - Add .gitignore, requirements.txt, all source code, tests, and sample data - Add streamlit to requirements.txt Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
47
tests/conftest.py
Normal file
47
tests/conftest.py
Normal file
@@ -0,0 +1,47 @@
|
||||
"""Shared test fixtures."""
|
||||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
|
||||
SAMPLES_DIR = Path(__file__).parent.parent / "samples"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_csv_path():
|
||||
return SAMPLES_DIR / "messy_sales.csv"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_df(sample_csv_path):
|
||||
return pd.read_csv(sample_csv_path, dtype=str, keep_default_na=False)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def simple_df():
|
||||
"""Small DataFrame with obvious duplicates for unit testing."""
|
||||
return pd.DataFrame({
|
||||
"name": ["Alice", "alice", "Bob", "Charlie", "ALICE"],
|
||||
"email": ["alice@test.com", "alice@test.com", "bob@test.com",
|
||||
"charlie@test.com", "alice@test.com"],
|
||||
"phone": ["555-1234", "555-1234", "555-5678", "555-9012", "555-1234"],
|
||||
})
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def merge_df():
|
||||
"""DataFrame with partial records that benefit from merge."""
|
||||
return pd.DataFrame({
|
||||
"name": ["John Doe", "John Doe", "Jane Smith"],
|
||||
"email": ["john@test.com", "john@test.com", "jane@test.com"],
|
||||
"phone": ["555-1111", "", "555-3333"],
|
||||
"address": ["", "123 Main St", "456 Oak Ave"],
|
||||
})
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def tmp_csv(tmp_path, simple_df):
|
||||
"""Write simple_df to a temp CSV and return the path."""
|
||||
path = tmp_path / "test_input.csv"
|
||||
simple_df.to_csv(path, index=False)
|
||||
return path
|
||||
Reference in New Issue
Block a user