"""Tests for src.core.reconcile — two-source matching engine.""" import pandas as pd import pytest from src.core.reconcile import ( ReconcileOptions, ReconcileResult, reconcile, ) def _bank(rows): return pd.DataFrame(rows, columns=["date", "amount", "desc"]) def _ledger(rows): return pd.DataFrame(rows, columns=["posted", "amt", "memo"]) class TestExactMatch: def test_one_to_one_exact(self): left = _bank([ ("2026-01-05", 100.00, "ACME"), ("2026-01-06", 250.00, "WIDGET CO"), ]) right = _ledger([ ("2026-01-05", 100.00, "Acme Inc"), ("2026-01-06", 250.00, "Widget"), ]) result = reconcile(left, right, ReconcileOptions( left_amount="amount", right_amount="amt", left_date="date", right_date="posted", )) assert result.stats["matched"] == 2 assert result.stats["unmatched_left"] == 0 assert result.stats["unmatched_right"] == 0 assert (result.matched["match_pass"] == "exact").all() def test_unmatched_left_and_right(self): left = _bank([ ("2026-01-05", 100.00, "ACME"), ("2026-01-07", 99.99, "ONLY ON LEFT"), ]) right = _ledger([ ("2026-01-05", 100.00, "Acme"), ("2026-01-08", 500.00, "Only on right"), ]) result = reconcile(left, right, ReconcileOptions( left_amount="amount", right_amount="amt", left_date="date", right_date="posted", )) assert result.stats["matched"] == 1 assert result.stats["unmatched_left"] == 1 assert result.stats["unmatched_right"] == 1 # The unmatched rows preserve their original columns. assert "ONLY ON LEFT" in result.unmatched_left["desc"].tolist() assert "Only on right" in result.unmatched_right["memo"].tolist() def test_amount_only_no_date(self): # No date columns set — match purely on amount. Distinct # amounts pair off one-to-one. left = _bank([ ("2026-01-01", 42.50, "A"), ("2026-02-15", 99.00, "B"), ]) right = _ledger([ ("2099-12-31", 42.50, "X"), ("1970-01-01", 99.00, "Y"), ]) result = reconcile(left, right, ReconcileOptions( left_amount="amount", right_amount="amt", )) assert result.stats["matched"] == 2 def test_identical_amounts_with_no_date_are_ambiguous(self): # Without a date column to disambiguate, two left rows with # the same amount and two right rows with the same amount # are genuinely undecidable — route to review. left = _bank([ ("2026-01-01", 42.50, "A"), ("2026-02-15", 42.50, "B"), ]) right = _ledger([ ("2099-12-31", 42.50, "X"), ("1970-01-01", 42.50, "Y"), ]) result = reconcile(left, right, ReconcileOptions( left_amount="amount", right_amount="amt", )) assert result.stats["matched"] == 0 assert result.stats["review"] >= 2 class TestAmountTolerance: def test_amount_within_tolerance(self): left = _bank([("2026-01-05", 100.00, "A")]) right = _ledger([("2026-01-05", 100.02, "X")]) # Exact pass misses (100.00 != 100.02). Tolerance pass catches it. result = reconcile(left, right, ReconcileOptions( left_amount="amount", right_amount="amt", left_date="date", right_date="posted", amount_tolerance=0.05, )) assert result.stats["matched"] == 1 assert result.matched.iloc[0]["match_pass"] == "tolerance" assert abs(result.matched.iloc[0]["amount_diff"] - -0.02) < 1e-9 def test_outside_tolerance_unmatched(self): left = _bank([("2026-01-05", 100.00, "A")]) right = _ledger([("2026-01-05", 100.50, "X")]) result = reconcile(left, right, ReconcileOptions( left_amount="amount", right_amount="amt", left_date="date", right_date="posted", amount_tolerance=0.05, )) assert result.stats["matched"] == 0 assert result.stats["unmatched_left"] == 1 assert result.stats["unmatched_right"] == 1 class TestDateWindow: def test_date_within_window(self): left = _bank([("2026-01-05", 100.00, "A")]) right = _ledger([("2026-01-07", 100.00, "X")]) # 2 days later result = reconcile(left, right, ReconcileOptions( left_amount="amount", right_amount="amt", left_date="date", right_date="posted", date_tolerance_days=3, )) assert result.stats["matched"] == 1 assert result.matched.iloc[0]["date_diff_days"] == -2 def test_date_outside_window(self): left = _bank([("2026-01-05", 100.00, "A")]) right = _ledger([("2026-01-20", 100.00, "X")]) # 15 days later result = reconcile(left, right, ReconcileOptions( left_amount="amount", right_amount="amt", left_date="date", right_date="posted", date_tolerance_days=5, )) assert result.stats["matched"] == 0 class TestSignInversion: def test_invert_right_sign(self): # Bank: deposit = +100 ; Ledger: deposit recorded as -100. left = _bank([("2026-01-05", 100.00, "A")]) right = _ledger([("2026-01-05", -100.00, "X")]) # Without inversion: no match. r1 = reconcile(left, right, ReconcileOptions( left_amount="amount", right_amount="amt", left_date="date", right_date="posted", )) assert r1.stats["matched"] == 0 # With inversion: match. r2 = reconcile(left, right, ReconcileOptions( left_amount="amount", right_amount="amt", left_date="date", right_date="posted", invert_right_sign=True, )) assert r2.stats["matched"] == 1 class TestAmbiguity: def test_two_equal_candidates_go_to_review(self): # One left row, two identical right rows → ambiguous. left = _bank([("2026-01-05", 100.00, "A")]) right = _ledger([ ("2026-01-05", 100.00, "X"), ("2026-01-05", 100.00, "Y"), ]) result = reconcile(left, right, ReconcileOptions( left_amount="amount", right_amount="amt", left_date="date", right_date="posted", )) assert result.stats["matched"] == 0 assert result.stats["review"] == 2 # both candidate pairs flagged # Left was consumed by the ambiguity, both rights too. assert result.stats["unmatched_left"] == 0 assert result.stats["unmatched_right"] == 0 def test_uniquely_better_match_wins(self): # Two left rows, two right rows; one pair is a closer match. left = _bank([ ("2026-01-05", 100.00, "A"), ("2026-01-05", 100.05, "B"), ]) right = _ledger([ ("2026-01-05", 100.00, "X"), # closer to A ("2026-01-05", 100.05, "Y"), # closer to B ]) result = reconcile(left, right, ReconcileOptions( left_amount="amount", right_amount="amt", left_date="date", right_date="posted", amount_tolerance=0.10, )) # Both should pair uniquely on the exact pass (penalty inside # exact pass breaks the symmetric near-ties). assert result.stats["matched"] == 2 class TestKeyMatch: def test_reference_number_authoritative(self): # Same check number, same amount, different posting dates. # Key match should pair them even though dates differ. left = pd.DataFrame([ {"date": "2026-01-05", "amount": 100.00, "check_no": "1042"}, ]) right = pd.DataFrame([ {"posted": "2026-01-12", "amt": 100.00, "ref": "1042"}, ]) result = reconcile(left, right, ReconcileOptions( left_amount="amount", right_amount="amt", left_date="date", right_date="posted", left_keys=["check_no"], right_keys=["ref"], date_tolerance_days=0, # exact-pass would miss )) assert result.stats["matched"] == 1 assert result.matched.iloc[0]["match_pass"] == "key" def test_key_requires_amount_to_tie(self): # Same ref but mismatched amounts → not a key match. left = pd.DataFrame([ {"date": "2026-01-05", "amount": 100.00, "check_no": "1042"}, ]) right = pd.DataFrame([ {"posted": "2026-01-05", "amt": 200.00, "ref": "1042"}, ]) result = reconcile(left, right, ReconcileOptions( left_amount="amount", right_amount="amt", left_date="date", right_date="posted", left_keys=["check_no"], right_keys=["ref"], )) assert result.stats["matched"] == 0 class TestInputValidation: def test_missing_amount_columns(self): left = _bank([("2026-01-05", 100.00, "A")]) right = _ledger([("2026-01-05", 100.00, "X")]) with pytest.raises(ValueError, match="left_amount"): reconcile(left, right, ReconcileOptions( right_amount="amt", )) def test_left_date_without_right_date(self): left = _bank([("2026-01-05", 100.00, "A")]) right = _ledger([("2026-01-05", 100.00, "X")]) with pytest.raises(ValueError, match="both be set or both be None"): reconcile(left, right, ReconcileOptions( left_amount="amount", right_amount="amt", left_date="date", # right_date missing )) def test_mismatched_key_lengths(self): left = _bank([("2026-01-05", 100.00, "A")]) right = _ledger([("2026-01-05", 100.00, "X")]) with pytest.raises(ValueError, match="same length"): reconcile(left, right, ReconcileOptions( left_amount="amount", right_amount="amt", left_keys=["a", "b"], right_keys=["x"], )) def test_negative_tolerance_rejected(self): left = _bank([("2026-01-05", 100.00, "A")]) right = _ledger([("2026-01-05", 100.00, "X")]) with pytest.raises(ValueError, match="amount_tolerance"): reconcile(left, right, ReconcileOptions( left_amount="amount", right_amount="amt", amount_tolerance=-0.01, )) class TestUnparseableInputs: def test_non_numeric_amount_falls_through(self): # Left row with garbage amount should land in unmatched_left # (it can't participate in matching but must be visible). left = pd.DataFrame([ {"date": "2026-01-05", "amount": "not a number", "desc": "BAD"}, {"date": "2026-01-05", "amount": 100.00, "desc": "OK"}, ]) right = _ledger([("2026-01-05", 100.00, "X")]) result = reconcile(left, right, ReconcileOptions( left_amount="amount", right_amount="amt", left_date="date", right_date="posted", )) assert result.stats["matched"] == 1 # The garbage row appears in unmatched_left. assert "BAD" in result.unmatched_left["desc"].tolist() class TestResultShape: def test_matched_carries_both_sides(self): left = _bank([("2026-01-05", 100.00, "ACME")]) right = _ledger([("2026-01-05", 100.00, "Acme Inc")]) result = reconcile(left, right, ReconcileOptions( left_amount="amount", right_amount="amt", left_date="date", right_date="posted", )) row = result.matched.iloc[0] assert row["left_desc"] == "ACME" assert row["right_memo"] == "Acme Inc" assert row["left_amount"] == 100.00 assert row["right_amt"] == 100.00 def test_empty_inputs_return_empty_result(self): left = _bank([]) right = _ledger([]) result = reconcile(left, right, ReconcileOptions( left_amount="amount", right_amount="amt", left_date="date", right_date="posted", )) assert result.stats["matched"] == 0 assert result.matched.empty assert result.unmatched_left.empty assert result.unmatched_right.empty def test_one_side_empty_keeps_other_unmatched(self): # A reconcile against an empty ledger should surface every # left row as unmatched, not crash. Mirror case for the # other side. left = _bank([ ("2026-01-05", 100.00, "ACME"), ("2026-01-06", 250.00, "WIDGET"), ]) right = _ledger([]) result = reconcile(left, right, ReconcileOptions( left_amount="amount", right_amount="amt", left_date="date", right_date="posted", )) assert result.stats["matched"] == 0 assert result.stats["unmatched_left"] == 2 assert result.stats["unmatched_right"] == 0 def test_match_pass_tagged_for_key_pass(self): # Pass name on each matched row tells the user *why* the engine # accepted the pair — verify the "key" label propagates. left = pd.DataFrame([ {"date": "2026-01-05", "amount": 100.00, "check_no": "1042"}, ]) right = pd.DataFrame([ {"posted": "2099-12-31", "amt": 100.00, "ref": "1042"}, ]) result = reconcile(left, right, ReconcileOptions( left_amount="amount", right_amount="amt", left_date="date", right_date="posted", left_keys=["check_no"], right_keys=["ref"], )) assert result.stats["matched"] == 1 assert result.matched.iloc[0]["match_pass"] == "key" class TestAdditionalValidation: """Boundary cases for ``_validate_options`` not pinned elsewhere.""" def test_unknown_left_amount_column_raises(self): left = pd.DataFrame([{"date": "2026-01-05", "amount": 1.0}]) right = pd.DataFrame([{"posted": "2026-01-05", "amt": 1.0}]) with pytest.raises(ValueError, match="not in left DataFrame"): reconcile(left, right, ReconcileOptions( left_amount="NOT_A_COLUMN", right_amount="amt", )) def test_unknown_right_amount_column_raises(self): left = pd.DataFrame([{"date": "2026-01-05", "amount": 1.0}]) right = pd.DataFrame([{"posted": "2026-01-05", "amt": 1.0}]) with pytest.raises(ValueError, match="not in right DataFrame"): reconcile(left, right, ReconcileOptions( left_amount="amount", right_amount="NOT_A_COLUMN", )) def test_unknown_left_key_column_raises(self): left = pd.DataFrame([{"date": "2026-01-05", "amount": 1.0}]) right = pd.DataFrame([{"posted": "2026-01-05", "amt": 1.0}]) with pytest.raises(ValueError, match="left key column"): reconcile(left, right, ReconcileOptions( left_amount="amount", right_amount="amt", left_keys=["nope"], right_keys=["nope"], )) def test_negative_date_tolerance_rejected(self): left = pd.DataFrame([{"date": "2026-01-05", "amount": 1.0}]) right = pd.DataFrame([{"posted": "2026-01-05", "amt": 1.0}]) with pytest.raises(ValueError, match="date_tolerance_days"): reconcile(left, right, ReconcileOptions( left_amount="amount", right_amount="amt", left_date="date", right_date="posted", date_tolerance_days=-1, )) def test_desc_min_score_out_of_range_rejected(self): left = pd.DataFrame([{"date": "2026-01-05", "amount": 1.0}]) right = pd.DataFrame([{"posted": "2026-01-05", "amt": 1.0}]) with pytest.raises(ValueError, match="desc_min_score"): reconcile(left, right, ReconcileOptions( left_amount="amount", right_amount="amt", desc_min_score=150, )) class TestImmutability: """The engine must NOT mutate the caller's DataFrames — callers rely on holding onto their input frames after the call (the GUI Reconciler page re-renders previews from them).""" def test_left_df_columns_unchanged(self): left = _bank([("2026-01-05", 100.00, "ACME")]) right = _ledger([("2026-01-05", 100.00, "Acme Inc")]) before_cols = list(left.columns) before_id = id(left) reconcile(left, right, ReconcileOptions( left_amount="amount", right_amount="amt", left_date="date", right_date="posted", )) assert list(left.columns) == before_cols # And the caller's DataFrame object identity is preserved. assert id(left) == before_id def test_amounts_preserved_when_invert_right_sign_set(self): # Even with --invert-right-sign, the original right amounts # must come back unchanged in the result. left = _bank([("2026-01-05", 100.00, "A")]) right = _ledger([("2026-01-05", -100.00, "X")]) original_right_amts = right["amt"].tolist() reconcile(left, right, ReconcileOptions( left_amount="amount", right_amount="amt", left_date="date", right_date="posted", invert_right_sign=True, )) assert right["amt"].tolist() == original_right_amts