"""Tests for src.core.config — save/load configuration profiles.""" import json import pytest from pathlib import Path from src.core.config import ( DeduplicationConfig, StrategyConfig, ColumnStrategyConfig, ) from src.core.dedup import Algorithm, SurvivorRule from src.core.normalizers import NormalizerType class TestDeduplicationConfig: def test_default(self): cfg = DeduplicationConfig.default() assert cfg.survivor_rule == "first" assert cfg.merge is False assert cfg.strategies == [] def test_to_dict_roundtrip(self): cfg = DeduplicationConfig( strategies=[ StrategyConfig(columns=[ ColumnStrategyConfig( column="email", algorithm="exact", threshold=100.0, normalizer="email", ), ]), ], survivor_rule="most_complete", merge=True, ) d = cfg.to_dict() cfg2 = DeduplicationConfig.from_dict(d) assert cfg2.survivor_rule == "most_complete" assert cfg2.merge is True assert len(cfg2.strategies) == 1 assert cfg2.strategies[0].columns[0].column == "email" def test_to_file_from_file(self, tmp_path): cfg = DeduplicationConfig( strategies=[ StrategyConfig(columns=[ ColumnStrategyConfig(column="name", algorithm="jaro_winkler", threshold=85.0, normalizer="name"), ]), ], survivor_rule="last", ) path = tmp_path / "test_config.json" cfg.to_file(path) loaded = DeduplicationConfig.from_file(path) assert loaded.survivor_rule == "last" assert len(loaded.strategies) == 1 assert loaded.strategies[0].columns[0].algorithm == "jaro_winkler" def test_to_strategies(self): cfg = DeduplicationConfig( strategies=[ StrategyConfig(columns=[ ColumnStrategyConfig(column="email", algorithm="exact", threshold=100.0, normalizer="email"), ColumnStrategyConfig(column="phone", algorithm="exact", threshold=100.0, normalizer="phone"), ]), ], ) strats = cfg.to_strategies() assert strats is not None assert len(strats) == 1 assert len(strats[0].column_strategies) == 2 assert strats[0].column_strategies[0].algorithm == Algorithm.EXACT assert strats[0].column_strategies[0].normalizer == NormalizerType.EMAIL def test_to_strategies_empty(self): cfg = DeduplicationConfig.default() assert cfg.to_strategies() is None def test_to_survivor_rule(self): cfg = DeduplicationConfig(survivor_rule="most_complete") assert cfg.to_survivor_rule() == SurvivorRule.KEEP_MOST_COMPLETE def test_json_is_valid(self, tmp_path): cfg = DeduplicationConfig( strategies=[ StrategyConfig(columns=[ ColumnStrategyConfig(column="x", algorithm="exact"), ]), ], normalize_map={"email": "email"}, ) path = tmp_path / "valid.json" cfg.to_file(path) data = json.loads(path.read_text()) assert "strategies" in data assert "normalize_map" in data