Files
datatools-dev/tests/test_config.py
Michael b871ab24fc feat: add documentation, Streamlit GUI, and full source tree
- Rewrite README.md with project overview, quick-start, and CLI summary
- Add docs/CLI-REFERENCE.md with full flag reference and 8 recipe sections
- Add docs/DEVELOPER.md with architecture, data flow, and extension guides
- Rewrite src/core/__init__.py with public API exports and module docstring
- Add Streamlit GUI (src/gui/) with file upload, advanced options, interactive
  match group review with side-by-side diff, and download buttons
- Add .gitignore, requirements.txt, all source code, tests, and sample data
- Add streamlit to requirements.txt

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-28 23:06:39 +00:00

103 lines
3.5 KiB
Python

"""Tests for src.core.config — save/load configuration profiles."""
import json
import pytest
from pathlib import Path
from src.core.config import (
DeduplicationConfig,
StrategyConfig,
ColumnStrategyConfig,
)
from src.core.dedup import Algorithm, SurvivorRule
from src.core.normalizers import NormalizerType
class TestDeduplicationConfig:
def test_default(self):
cfg = DeduplicationConfig.default()
assert cfg.survivor_rule == "first"
assert cfg.merge is False
assert cfg.strategies == []
def test_to_dict_roundtrip(self):
cfg = DeduplicationConfig(
strategies=[
StrategyConfig(columns=[
ColumnStrategyConfig(
column="email",
algorithm="exact",
threshold=100.0,
normalizer="email",
),
]),
],
survivor_rule="most_complete",
merge=True,
)
d = cfg.to_dict()
cfg2 = DeduplicationConfig.from_dict(d)
assert cfg2.survivor_rule == "most_complete"
assert cfg2.merge is True
assert len(cfg2.strategies) == 1
assert cfg2.strategies[0].columns[0].column == "email"
def test_to_file_from_file(self, tmp_path):
cfg = DeduplicationConfig(
strategies=[
StrategyConfig(columns=[
ColumnStrategyConfig(column="name", algorithm="jaro_winkler",
threshold=85.0, normalizer="name"),
]),
],
survivor_rule="last",
)
path = tmp_path / "test_config.json"
cfg.to_file(path)
loaded = DeduplicationConfig.from_file(path)
assert loaded.survivor_rule == "last"
assert len(loaded.strategies) == 1
assert loaded.strategies[0].columns[0].algorithm == "jaro_winkler"
def test_to_strategies(self):
cfg = DeduplicationConfig(
strategies=[
StrategyConfig(columns=[
ColumnStrategyConfig(column="email", algorithm="exact",
threshold=100.0, normalizer="email"),
ColumnStrategyConfig(column="phone", algorithm="exact",
threshold=100.0, normalizer="phone"),
]),
],
)
strats = cfg.to_strategies()
assert strats is not None
assert len(strats) == 1
assert len(strats[0].column_strategies) == 2
assert strats[0].column_strategies[0].algorithm == Algorithm.EXACT
assert strats[0].column_strategies[0].normalizer == NormalizerType.EMAIL
def test_to_strategies_empty(self):
cfg = DeduplicationConfig.default()
assert cfg.to_strategies() is None
def test_to_survivor_rule(self):
cfg = DeduplicationConfig(survivor_rule="most_complete")
assert cfg.to_survivor_rule() == SurvivorRule.KEEP_MOST_COMPLETE
def test_json_is_valid(self, tmp_path):
cfg = DeduplicationConfig(
strategies=[
StrategyConfig(columns=[
ColumnStrategyConfig(column="x", algorithm="exact"),
]),
],
normalize_map={"email": "email"},
)
path = tmp_path / "valid.json"
cfg.to_file(path)
data = json.loads(path.read_text())
assert "strategies" in data
assert "normalize_map" in data