Files
datatools-dev/tests/test_reconcile.py
Michael e44af3a45e feat(reconcile): two-source reconciliation tool
Bank-feed-vs-ledger style matcher: 4-pass greedy assignment (key →
exact → tolerance → fuzzy) with ambiguous candidates routed to a
review bucket instead of arbitrary picks. CLI mirrors the
cli_text_clean preview/--apply pattern; Streamlit page registered
in the automations section.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-20 22:33:14 +00:00

318 lines
12 KiB
Python

"""Tests for src.core.reconcile — two-source matching engine."""
import pandas as pd
import pytest
from src.core.reconcile import (
ReconcileOptions,
ReconcileResult,
reconcile,
)
def _bank(rows):
return pd.DataFrame(rows, columns=["date", "amount", "desc"])
def _ledger(rows):
return pd.DataFrame(rows, columns=["posted", "amt", "memo"])
class TestExactMatch:
def test_one_to_one_exact(self):
left = _bank([
("2026-01-05", 100.00, "ACME"),
("2026-01-06", 250.00, "WIDGET CO"),
])
right = _ledger([
("2026-01-05", 100.00, "Acme Inc"),
("2026-01-06", 250.00, "Widget"),
])
result = reconcile(left, right, ReconcileOptions(
left_amount="amount", right_amount="amt",
left_date="date", right_date="posted",
))
assert result.stats["matched"] == 2
assert result.stats["unmatched_left"] == 0
assert result.stats["unmatched_right"] == 0
assert (result.matched["match_pass"] == "exact").all()
def test_unmatched_left_and_right(self):
left = _bank([
("2026-01-05", 100.00, "ACME"),
("2026-01-07", 99.99, "ONLY ON LEFT"),
])
right = _ledger([
("2026-01-05", 100.00, "Acme"),
("2026-01-08", 500.00, "Only on right"),
])
result = reconcile(left, right, ReconcileOptions(
left_amount="amount", right_amount="amt",
left_date="date", right_date="posted",
))
assert result.stats["matched"] == 1
assert result.stats["unmatched_left"] == 1
assert result.stats["unmatched_right"] == 1
# The unmatched rows preserve their original columns.
assert "ONLY ON LEFT" in result.unmatched_left["desc"].tolist()
assert "Only on right" in result.unmatched_right["memo"].tolist()
def test_amount_only_no_date(self):
# No date columns set — match purely on amount. Distinct
# amounts pair off one-to-one.
left = _bank([
("2026-01-01", 42.50, "A"),
("2026-02-15", 99.00, "B"),
])
right = _ledger([
("2099-12-31", 42.50, "X"),
("1970-01-01", 99.00, "Y"),
])
result = reconcile(left, right, ReconcileOptions(
left_amount="amount", right_amount="amt",
))
assert result.stats["matched"] == 2
def test_identical_amounts_with_no_date_are_ambiguous(self):
# Without a date column to disambiguate, two left rows with
# the same amount and two right rows with the same amount
# are genuinely undecidable — route to review.
left = _bank([
("2026-01-01", 42.50, "A"),
("2026-02-15", 42.50, "B"),
])
right = _ledger([
("2099-12-31", 42.50, "X"),
("1970-01-01", 42.50, "Y"),
])
result = reconcile(left, right, ReconcileOptions(
left_amount="amount", right_amount="amt",
))
assert result.stats["matched"] == 0
assert result.stats["review"] >= 2
class TestAmountTolerance:
def test_amount_within_tolerance(self):
left = _bank([("2026-01-05", 100.00, "A")])
right = _ledger([("2026-01-05", 100.02, "X")])
# Exact pass misses (100.00 != 100.02). Tolerance pass catches it.
result = reconcile(left, right, ReconcileOptions(
left_amount="amount", right_amount="amt",
left_date="date", right_date="posted",
amount_tolerance=0.05,
))
assert result.stats["matched"] == 1
assert result.matched.iloc[0]["match_pass"] == "tolerance"
assert abs(result.matched.iloc[0]["amount_diff"] - -0.02) < 1e-9
def test_outside_tolerance_unmatched(self):
left = _bank([("2026-01-05", 100.00, "A")])
right = _ledger([("2026-01-05", 100.50, "X")])
result = reconcile(left, right, ReconcileOptions(
left_amount="amount", right_amount="amt",
left_date="date", right_date="posted",
amount_tolerance=0.05,
))
assert result.stats["matched"] == 0
assert result.stats["unmatched_left"] == 1
assert result.stats["unmatched_right"] == 1
class TestDateWindow:
def test_date_within_window(self):
left = _bank([("2026-01-05", 100.00, "A")])
right = _ledger([("2026-01-07", 100.00, "X")]) # 2 days later
result = reconcile(left, right, ReconcileOptions(
left_amount="amount", right_amount="amt",
left_date="date", right_date="posted",
date_tolerance_days=3,
))
assert result.stats["matched"] == 1
assert result.matched.iloc[0]["date_diff_days"] == -2
def test_date_outside_window(self):
left = _bank([("2026-01-05", 100.00, "A")])
right = _ledger([("2026-01-20", 100.00, "X")]) # 15 days later
result = reconcile(left, right, ReconcileOptions(
left_amount="amount", right_amount="amt",
left_date="date", right_date="posted",
date_tolerance_days=5,
))
assert result.stats["matched"] == 0
class TestSignInversion:
def test_invert_right_sign(self):
# Bank: deposit = +100 ; Ledger: deposit recorded as -100.
left = _bank([("2026-01-05", 100.00, "A")])
right = _ledger([("2026-01-05", -100.00, "X")])
# Without inversion: no match.
r1 = reconcile(left, right, ReconcileOptions(
left_amount="amount", right_amount="amt",
left_date="date", right_date="posted",
))
assert r1.stats["matched"] == 0
# With inversion: match.
r2 = reconcile(left, right, ReconcileOptions(
left_amount="amount", right_amount="amt",
left_date="date", right_date="posted",
invert_right_sign=True,
))
assert r2.stats["matched"] == 1
class TestAmbiguity:
def test_two_equal_candidates_go_to_review(self):
# One left row, two identical right rows → ambiguous.
left = _bank([("2026-01-05", 100.00, "A")])
right = _ledger([
("2026-01-05", 100.00, "X"),
("2026-01-05", 100.00, "Y"),
])
result = reconcile(left, right, ReconcileOptions(
left_amount="amount", right_amount="amt",
left_date="date", right_date="posted",
))
assert result.stats["matched"] == 0
assert result.stats["review"] == 2 # both candidate pairs flagged
# Left was consumed by the ambiguity, both rights too.
assert result.stats["unmatched_left"] == 0
assert result.stats["unmatched_right"] == 0
def test_uniquely_better_match_wins(self):
# Two left rows, two right rows; one pair is a closer match.
left = _bank([
("2026-01-05", 100.00, "A"),
("2026-01-05", 100.05, "B"),
])
right = _ledger([
("2026-01-05", 100.00, "X"), # closer to A
("2026-01-05", 100.05, "Y"), # closer to B
])
result = reconcile(left, right, ReconcileOptions(
left_amount="amount", right_amount="amt",
left_date="date", right_date="posted",
amount_tolerance=0.10,
))
# Both should pair uniquely on the exact pass (penalty inside
# exact pass breaks the symmetric near-ties).
assert result.stats["matched"] == 2
class TestKeyMatch:
def test_reference_number_authoritative(self):
# Same check number, same amount, different posting dates.
# Key match should pair them even though dates differ.
left = pd.DataFrame([
{"date": "2026-01-05", "amount": 100.00, "check_no": "1042"},
])
right = pd.DataFrame([
{"posted": "2026-01-12", "amt": 100.00, "ref": "1042"},
])
result = reconcile(left, right, ReconcileOptions(
left_amount="amount", right_amount="amt",
left_date="date", right_date="posted",
left_keys=["check_no"], right_keys=["ref"],
date_tolerance_days=0, # exact-pass would miss
))
assert result.stats["matched"] == 1
assert result.matched.iloc[0]["match_pass"] == "key"
def test_key_requires_amount_to_tie(self):
# Same ref but mismatched amounts → not a key match.
left = pd.DataFrame([
{"date": "2026-01-05", "amount": 100.00, "check_no": "1042"},
])
right = pd.DataFrame([
{"posted": "2026-01-05", "amt": 200.00, "ref": "1042"},
])
result = reconcile(left, right, ReconcileOptions(
left_amount="amount", right_amount="amt",
left_date="date", right_date="posted",
left_keys=["check_no"], right_keys=["ref"],
))
assert result.stats["matched"] == 0
class TestInputValidation:
def test_missing_amount_columns(self):
left = _bank([("2026-01-05", 100.00, "A")])
right = _ledger([("2026-01-05", 100.00, "X")])
with pytest.raises(ValueError, match="left_amount"):
reconcile(left, right, ReconcileOptions(
right_amount="amt",
))
def test_left_date_without_right_date(self):
left = _bank([("2026-01-05", 100.00, "A")])
right = _ledger([("2026-01-05", 100.00, "X")])
with pytest.raises(ValueError, match="both be set or both be None"):
reconcile(left, right, ReconcileOptions(
left_amount="amount", right_amount="amt",
left_date="date", # right_date missing
))
def test_mismatched_key_lengths(self):
left = _bank([("2026-01-05", 100.00, "A")])
right = _ledger([("2026-01-05", 100.00, "X")])
with pytest.raises(ValueError, match="same length"):
reconcile(left, right, ReconcileOptions(
left_amount="amount", right_amount="amt",
left_keys=["a", "b"], right_keys=["x"],
))
def test_negative_tolerance_rejected(self):
left = _bank([("2026-01-05", 100.00, "A")])
right = _ledger([("2026-01-05", 100.00, "X")])
with pytest.raises(ValueError, match="amount_tolerance"):
reconcile(left, right, ReconcileOptions(
left_amount="amount", right_amount="amt",
amount_tolerance=-0.01,
))
class TestUnparseableInputs:
def test_non_numeric_amount_falls_through(self):
# Left row with garbage amount should land in unmatched_left
# (it can't participate in matching but must be visible).
left = pd.DataFrame([
{"date": "2026-01-05", "amount": "not a number", "desc": "BAD"},
{"date": "2026-01-05", "amount": 100.00, "desc": "OK"},
])
right = _ledger([("2026-01-05", 100.00, "X")])
result = reconcile(left, right, ReconcileOptions(
left_amount="amount", right_amount="amt",
left_date="date", right_date="posted",
))
assert result.stats["matched"] == 1
# The garbage row appears in unmatched_left.
assert "BAD" in result.unmatched_left["desc"].tolist()
class TestResultShape:
def test_matched_carries_both_sides(self):
left = _bank([("2026-01-05", 100.00, "ACME")])
right = _ledger([("2026-01-05", 100.00, "Acme Inc")])
result = reconcile(left, right, ReconcileOptions(
left_amount="amount", right_amount="amt",
left_date="date", right_date="posted",
))
row = result.matched.iloc[0]
assert row["left_desc"] == "ACME"
assert row["right_memo"] == "Acme Inc"
assert row["left_amount"] == 100.00
assert row["right_amt"] == 100.00
def test_empty_inputs_return_empty_result(self):
left = _bank([])
right = _ledger([])
result = reconcile(left, right, ReconcileOptions(
left_amount="amount", right_amount="amt",
left_date="date", right_date="posted",
))
assert result.stats["matched"] == 0
assert result.matched.empty
assert result.unmatched_left.empty
assert result.unmatched_right.empty