"""Tests for the structured error-handling infrastructure. Covers: - DataToolsError base class formatting (path, column, operation, suggestion). - Specialized subclasses inherit from the right stdlib bases so existing ``except OSError`` / ``except ValueError`` handlers still catch them. - ensure_dataframe / ensure_choice raise the right structured errors. - format_for_user produces readable output for both DataTools and unrecognized exceptions. - Per-module integration: bad config / bad file / bad input each surface a helpful error rather than a deep library traceback. """ from __future__ import annotations import json from pathlib import Path import pandas as pd import pytest from src.core.errors import ( ConfigError, DataToolsError, FileAccessError, FileFormatError, InputValidationError, ensure_choice, ensure_dataframe, format_for_user, wrap_file_read, wrap_file_write, ) # --------------------------------------------------------------------------- # Base class # --------------------------------------------------------------------------- class TestDataToolsError: def test_message_only(self): err = DataToolsError("something failed") assert "something failed" in str(err) def test_full_context(self): err = DataToolsError( "could not parse", path="/tmp/foo.csv", column="email", operation="read_file", suggestion="check encoding", cause=ValueError("inner"), ) text = str(err) assert "could not parse" in text assert "read_file" in text assert "/tmp/foo.csv" in text assert "'email'" in text assert "ValueError" in text assert "check encoding" in text def test_inheritance_for_oserror_handlers(self): # FileAccessError must be catchable as OSError so callers using # the stdlib hierarchy continue to work. with pytest.raises(OSError): raise FileAccessError("nope", path="/tmp/x") def test_inheritance_for_valueerror_handlers(self): for cls in (InputValidationError, ConfigError, FileFormatError): with pytest.raises(ValueError): raise cls("nope") # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- class TestEnsureDataframe: def test_passes_real_df(self): ensure_dataframe(pd.DataFrame({"a": [1]}), function="x") def test_rejects_dict(self): with pytest.raises(InputValidationError, match="DataFrame"): ensure_dataframe({"a": 1}, function="my_func") def test_includes_function_name(self): try: ensure_dataframe(None, function="my_func") except InputValidationError as e: assert "my_func" in str(e) else: # pragma: no cover pytest.fail("should have raised") def test_includes_actual_type(self): try: ensure_dataframe([1, 2, 3], function="x") except InputValidationError as e: assert "list" in str(e) class TestEnsureChoice: def test_passes_valid(self): ensure_choice("a", name="mode", choices=["a", "b"]) def test_rejects_invalid(self): with pytest.raises(InputValidationError, match="Invalid mode"): ensure_choice("c", name="mode", choices=["a", "b"]) def test_lists_choices_in_message(self): try: ensure_choice("c", name="mode", choices=["a", "b"]) except InputValidationError as e: assert "'a'" in str(e) and "'b'" in str(e) class TestWrapFileHelpers: def test_wrap_read_keeps_cause(self): inner = OSError("disk error") wrapped = wrap_file_read("/tmp/x", "read_file", inner) assert wrapped.cause is inner assert "/tmp/x" in str(wrapped) def test_wrap_write_permission_hint(self): inner = PermissionError("no perm") wrapped = wrap_file_write("/tmp/x", "save", inner) # Permission failures get a Windows-aware suggestion assert "Windows" in str(wrapped) or "permission" in str(wrapped).lower() # --------------------------------------------------------------------------- # format_for_user # --------------------------------------------------------------------------- class TestFormatForUser: def test_datatools_error(self): err = InputValidationError( "bad date_order", suggestion="use MDY or DMY", ) out = format_for_user(err) assert "bad date_order" in out assert "use MDY or DMY" in out def test_with_context_prefix(self): err = ValueError("inner") out = format_for_user(err, context="Failed to read upload") assert out.startswith("Failed to read upload") assert "ValueError" in out def test_unrecognized_exception(self): err = RuntimeError("oops") out = format_for_user(err) assert "RuntimeError" in out assert "oops" in out # --------------------------------------------------------------------------- # Integration — every public entry point surfaces structured errors # --------------------------------------------------------------------------- class TestIntegration: def test_io_read_missing_file_is_structured(self, tmp_path): from src.core.io import read_file with pytest.raises(FileAccessError) as exc_info: read_file(tmp_path / "missing.csv") msg = str(exc_info.value) assert "Input file not found" in msg assert str(tmp_path) in msg assert "exists" in msg or "does NOT exist" in msg def test_io_write_to_missing_dir(self, tmp_path): from src.core.io import write_file # Writing into a non-existent directory raises a wrapped # FileAccessError rather than a raw FileNotFoundError, so the # user sees the path and a recovery hint. df = pd.DataFrame({"a": [1]}) with pytest.raises(FileAccessError) as exc_info: write_file(df, tmp_path / "no_such_dir" / "out.csv") msg = str(exc_info.value) assert "Could not write" in msg assert "no_such_dir" in msg def test_config_bad_json(self, tmp_path): from src.core.config import DeduplicationConfig path = tmp_path / "bad.json" path.write_text("{not json") with pytest.raises(ConfigError) as exc_info: DeduplicationConfig.from_file(path) assert "Invalid JSON" in str(exc_info.value) assert "line" in str(exc_info.value) def test_config_bad_algorithm_includes_strategy_index(self, tmp_path): from src.core.config import DeduplicationConfig path = tmp_path / "cfg.json" path.write_text(json.dumps({ "strategies": [{ "columns": [{ "column": "name", "algorithm": "not_a_real_algo", "threshold": 90.0, }], }], })) loaded = DeduplicationConfig.from_file(path) with pytest.raises(ConfigError) as exc_info: loaded.to_strategies() msg = str(exc_info.value) assert "not_a_real_algo" in msg assert "name" in msg # column name assert "strategy[0]" in msg # strategy index def test_standardize_options_bad_field_type_includes_column(self): from src.core.format_standardize import StandardizeOptions with pytest.raises(ConfigError) as exc_info: StandardizeOptions.from_dict({ "column_types": {"my_col": "made_up"}, }) msg = str(exc_info.value) assert "my_col" in msg assert "made_up" in msg def test_standardize_dataframe_unknown_column(self): from src.core.format_standardize import ( FieldType, StandardizeOptions, standardize_dataframe, ) df = pd.DataFrame({"name": ["a"]}) opts = StandardizeOptions(column_types={"missing": FieldType.DATE}) with pytest.raises(InputValidationError) as exc_info: standardize_dataframe(df, opts) assert "missing" in str(exc_info.value) assert "['name']" in str(exc_info.value)