"""Shared test fixtures.""" import pandas as pd import pytest from pathlib import Path SAMPLES_DIR = Path(__file__).parent.parent / "samples" @pytest.fixture def sample_csv_path(): return SAMPLES_DIR / "messy_sales.csv" @pytest.fixture def sample_df(sample_csv_path): return pd.read_csv(sample_csv_path, dtype=str, keep_default_na=False) @pytest.fixture def simple_df(): """Small DataFrame with obvious duplicates for unit testing.""" return pd.DataFrame({ "name": ["Alice", "alice", "Bob", "Charlie", "ALICE"], "email": ["alice@test.com", "alice@test.com", "bob@test.com", "charlie@test.com", "alice@test.com"], "phone": ["555-1234", "555-1234", "555-5678", "555-9012", "555-1234"], }) @pytest.fixture def merge_df(): """DataFrame with partial records that benefit from merge.""" return pd.DataFrame({ "name": ["John Doe", "John Doe", "Jane Smith"], "email": ["john@test.com", "john@test.com", "jane@test.com"], "phone": ["555-1111", "", "555-3333"], "address": ["", "123 Main St", "456 Oak Ave"], }) @pytest.fixture def tmp_csv(tmp_path, simple_df): """Write simple_df to a temp CSV and return the path.""" path = tmp_path / "test_input.csv" simple_df.to_csv(path, index=False) return path