"""DataTools deduplication engine. Public API ---------- Core: deduplicate(df, ...) -> DeduplicationResult build_default_strategies(df) -> list[MatchStrategy] Types: Algorithm, SurvivorRule, ColumnMatchStrategy, MatchStrategy MatchResult, DeduplicationResult Normalizers: get_normalizer(type) -> Callable NormalizerType normalize_email, normalize_phone, normalize_name, normalize_address, normalize_string I/O: read_file(path, ...) -> DataFrame write_file(df, path, ...) list_sheets(path) -> list[str] detect_encoding, detect_delimiter, detect_header_row Configuration: DeduplicationConfig.from_file(path) -> DeduplicationConfig DeduplicationConfig.to_file(path) """ from .dedup import ( Algorithm, ColumnMatchStrategy, DeduplicationResult, MatchResult, MatchStrategy, SurvivorRule, build_default_strategies, deduplicate, ) from .normalizers import ( NormalizerType, get_normalizer, normalize_address, normalize_email, normalize_name, normalize_phone, normalize_string, ) from .io import ( detect_delimiter, detect_encoding, detect_header_row, list_sheets, read_file, write_file, ) from .config import ( ColumnStrategyConfig, DeduplicationConfig, StrategyConfig, ) __all__ = [ # Core "deduplicate", "build_default_strategies", # Types "Algorithm", "SurvivorRule", "ColumnMatchStrategy", "MatchStrategy", "MatchResult", "DeduplicationResult", # Normalizers "NormalizerType", "get_normalizer", "normalize_email", "normalize_phone", "normalize_name", "normalize_address", "normalize_string", # I/O "read_file", "write_file", "list_sheets", "detect_encoding", "detect_delimiter", "detect_header_row", # Config "DeduplicationConfig", "StrategyConfig", "ColumnStrategyConfig", ]