GUI/lang-pack tests were asserting against pre-v3 strings ("Data
Cleaning Mastery", "Maestría en limpieza…") that the brand refresh
replaced with "UNALOGIX DataTools" + "Clean. Normalize. Transform."
Updated assertions to the current copy and switched the findings
panel tests to the redesigned flat-list layout (per-finding "Open
Tool →" buttons instead of per-tool expanders).
New coverage:
- tests/test_cli_reconcile.py (13) — preview/apply, tolerance flags,
sign inversion, key flags, error paths, Excel input.
- tests/test_tools_registry.py (27) — unique tool_ids, page_slug →
real file, valid sections/tiers, localized accessor fallbacks,
explicit pins for PDF Extractor + Reconciler entries.
- tests/test_reconcile.py — one-side-empty, key-pass tagging,
additional validation cases, input-DataFrame immutability.
- tests/gui/test_smoke.py — PAGE_SLUGS now includes 10_PDF_Extractor
and 11_Reconciler in both en/es.
- tests/gui/test_workflows.py — TestPdfExtractorWorkflow and
TestReconcilerWorkflow render checks.
Net: 2317 passed → 2418 passed, 0 failures.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
432 lines
17 KiB
Python
432 lines
17 KiB
Python
"""Tests for src.core.reconcile — two-source matching engine."""
|
|
|
|
import pandas as pd
|
|
import pytest
|
|
|
|
from src.core.reconcile import (
|
|
ReconcileOptions,
|
|
ReconcileResult,
|
|
reconcile,
|
|
)
|
|
|
|
|
|
def _bank(rows):
|
|
return pd.DataFrame(rows, columns=["date", "amount", "desc"])
|
|
|
|
|
|
def _ledger(rows):
|
|
return pd.DataFrame(rows, columns=["posted", "amt", "memo"])
|
|
|
|
|
|
class TestExactMatch:
|
|
def test_one_to_one_exact(self):
|
|
left = _bank([
|
|
("2026-01-05", 100.00, "ACME"),
|
|
("2026-01-06", 250.00, "WIDGET CO"),
|
|
])
|
|
right = _ledger([
|
|
("2026-01-05", 100.00, "Acme Inc"),
|
|
("2026-01-06", 250.00, "Widget"),
|
|
])
|
|
result = reconcile(left, right, ReconcileOptions(
|
|
left_amount="amount", right_amount="amt",
|
|
left_date="date", right_date="posted",
|
|
))
|
|
assert result.stats["matched"] == 2
|
|
assert result.stats["unmatched_left"] == 0
|
|
assert result.stats["unmatched_right"] == 0
|
|
assert (result.matched["match_pass"] == "exact").all()
|
|
|
|
def test_unmatched_left_and_right(self):
|
|
left = _bank([
|
|
("2026-01-05", 100.00, "ACME"),
|
|
("2026-01-07", 99.99, "ONLY ON LEFT"),
|
|
])
|
|
right = _ledger([
|
|
("2026-01-05", 100.00, "Acme"),
|
|
("2026-01-08", 500.00, "Only on right"),
|
|
])
|
|
result = reconcile(left, right, ReconcileOptions(
|
|
left_amount="amount", right_amount="amt",
|
|
left_date="date", right_date="posted",
|
|
))
|
|
assert result.stats["matched"] == 1
|
|
assert result.stats["unmatched_left"] == 1
|
|
assert result.stats["unmatched_right"] == 1
|
|
# The unmatched rows preserve their original columns.
|
|
assert "ONLY ON LEFT" in result.unmatched_left["desc"].tolist()
|
|
assert "Only on right" in result.unmatched_right["memo"].tolist()
|
|
|
|
def test_amount_only_no_date(self):
|
|
# No date columns set — match purely on amount. Distinct
|
|
# amounts pair off one-to-one.
|
|
left = _bank([
|
|
("2026-01-01", 42.50, "A"),
|
|
("2026-02-15", 99.00, "B"),
|
|
])
|
|
right = _ledger([
|
|
("2099-12-31", 42.50, "X"),
|
|
("1970-01-01", 99.00, "Y"),
|
|
])
|
|
result = reconcile(left, right, ReconcileOptions(
|
|
left_amount="amount", right_amount="amt",
|
|
))
|
|
assert result.stats["matched"] == 2
|
|
|
|
def test_identical_amounts_with_no_date_are_ambiguous(self):
|
|
# Without a date column to disambiguate, two left rows with
|
|
# the same amount and two right rows with the same amount
|
|
# are genuinely undecidable — route to review.
|
|
left = _bank([
|
|
("2026-01-01", 42.50, "A"),
|
|
("2026-02-15", 42.50, "B"),
|
|
])
|
|
right = _ledger([
|
|
("2099-12-31", 42.50, "X"),
|
|
("1970-01-01", 42.50, "Y"),
|
|
])
|
|
result = reconcile(left, right, ReconcileOptions(
|
|
left_amount="amount", right_amount="amt",
|
|
))
|
|
assert result.stats["matched"] == 0
|
|
assert result.stats["review"] >= 2
|
|
|
|
|
|
class TestAmountTolerance:
|
|
def test_amount_within_tolerance(self):
|
|
left = _bank([("2026-01-05", 100.00, "A")])
|
|
right = _ledger([("2026-01-05", 100.02, "X")])
|
|
# Exact pass misses (100.00 != 100.02). Tolerance pass catches it.
|
|
result = reconcile(left, right, ReconcileOptions(
|
|
left_amount="amount", right_amount="amt",
|
|
left_date="date", right_date="posted",
|
|
amount_tolerance=0.05,
|
|
))
|
|
assert result.stats["matched"] == 1
|
|
assert result.matched.iloc[0]["match_pass"] == "tolerance"
|
|
assert abs(result.matched.iloc[0]["amount_diff"] - -0.02) < 1e-9
|
|
|
|
def test_outside_tolerance_unmatched(self):
|
|
left = _bank([("2026-01-05", 100.00, "A")])
|
|
right = _ledger([("2026-01-05", 100.50, "X")])
|
|
result = reconcile(left, right, ReconcileOptions(
|
|
left_amount="amount", right_amount="amt",
|
|
left_date="date", right_date="posted",
|
|
amount_tolerance=0.05,
|
|
))
|
|
assert result.stats["matched"] == 0
|
|
assert result.stats["unmatched_left"] == 1
|
|
assert result.stats["unmatched_right"] == 1
|
|
|
|
|
|
class TestDateWindow:
|
|
def test_date_within_window(self):
|
|
left = _bank([("2026-01-05", 100.00, "A")])
|
|
right = _ledger([("2026-01-07", 100.00, "X")]) # 2 days later
|
|
result = reconcile(left, right, ReconcileOptions(
|
|
left_amount="amount", right_amount="amt",
|
|
left_date="date", right_date="posted",
|
|
date_tolerance_days=3,
|
|
))
|
|
assert result.stats["matched"] == 1
|
|
assert result.matched.iloc[0]["date_diff_days"] == -2
|
|
|
|
def test_date_outside_window(self):
|
|
left = _bank([("2026-01-05", 100.00, "A")])
|
|
right = _ledger([("2026-01-20", 100.00, "X")]) # 15 days later
|
|
result = reconcile(left, right, ReconcileOptions(
|
|
left_amount="amount", right_amount="amt",
|
|
left_date="date", right_date="posted",
|
|
date_tolerance_days=5,
|
|
))
|
|
assert result.stats["matched"] == 0
|
|
|
|
|
|
class TestSignInversion:
|
|
def test_invert_right_sign(self):
|
|
# Bank: deposit = +100 ; Ledger: deposit recorded as -100.
|
|
left = _bank([("2026-01-05", 100.00, "A")])
|
|
right = _ledger([("2026-01-05", -100.00, "X")])
|
|
# Without inversion: no match.
|
|
r1 = reconcile(left, right, ReconcileOptions(
|
|
left_amount="amount", right_amount="amt",
|
|
left_date="date", right_date="posted",
|
|
))
|
|
assert r1.stats["matched"] == 0
|
|
# With inversion: match.
|
|
r2 = reconcile(left, right, ReconcileOptions(
|
|
left_amount="amount", right_amount="amt",
|
|
left_date="date", right_date="posted",
|
|
invert_right_sign=True,
|
|
))
|
|
assert r2.stats["matched"] == 1
|
|
|
|
|
|
class TestAmbiguity:
|
|
def test_two_equal_candidates_go_to_review(self):
|
|
# One left row, two identical right rows → ambiguous.
|
|
left = _bank([("2026-01-05", 100.00, "A")])
|
|
right = _ledger([
|
|
("2026-01-05", 100.00, "X"),
|
|
("2026-01-05", 100.00, "Y"),
|
|
])
|
|
result = reconcile(left, right, ReconcileOptions(
|
|
left_amount="amount", right_amount="amt",
|
|
left_date="date", right_date="posted",
|
|
))
|
|
assert result.stats["matched"] == 0
|
|
assert result.stats["review"] == 2 # both candidate pairs flagged
|
|
# Left was consumed by the ambiguity, both rights too.
|
|
assert result.stats["unmatched_left"] == 0
|
|
assert result.stats["unmatched_right"] == 0
|
|
|
|
def test_uniquely_better_match_wins(self):
|
|
# Two left rows, two right rows; one pair is a closer match.
|
|
left = _bank([
|
|
("2026-01-05", 100.00, "A"),
|
|
("2026-01-05", 100.05, "B"),
|
|
])
|
|
right = _ledger([
|
|
("2026-01-05", 100.00, "X"), # closer to A
|
|
("2026-01-05", 100.05, "Y"), # closer to B
|
|
])
|
|
result = reconcile(left, right, ReconcileOptions(
|
|
left_amount="amount", right_amount="amt",
|
|
left_date="date", right_date="posted",
|
|
amount_tolerance=0.10,
|
|
))
|
|
# Both should pair uniquely on the exact pass (penalty inside
|
|
# exact pass breaks the symmetric near-ties).
|
|
assert result.stats["matched"] == 2
|
|
|
|
|
|
class TestKeyMatch:
|
|
def test_reference_number_authoritative(self):
|
|
# Same check number, same amount, different posting dates.
|
|
# Key match should pair them even though dates differ.
|
|
left = pd.DataFrame([
|
|
{"date": "2026-01-05", "amount": 100.00, "check_no": "1042"},
|
|
])
|
|
right = pd.DataFrame([
|
|
{"posted": "2026-01-12", "amt": 100.00, "ref": "1042"},
|
|
])
|
|
result = reconcile(left, right, ReconcileOptions(
|
|
left_amount="amount", right_amount="amt",
|
|
left_date="date", right_date="posted",
|
|
left_keys=["check_no"], right_keys=["ref"],
|
|
date_tolerance_days=0, # exact-pass would miss
|
|
))
|
|
assert result.stats["matched"] == 1
|
|
assert result.matched.iloc[0]["match_pass"] == "key"
|
|
|
|
def test_key_requires_amount_to_tie(self):
|
|
# Same ref but mismatched amounts → not a key match.
|
|
left = pd.DataFrame([
|
|
{"date": "2026-01-05", "amount": 100.00, "check_no": "1042"},
|
|
])
|
|
right = pd.DataFrame([
|
|
{"posted": "2026-01-05", "amt": 200.00, "ref": "1042"},
|
|
])
|
|
result = reconcile(left, right, ReconcileOptions(
|
|
left_amount="amount", right_amount="amt",
|
|
left_date="date", right_date="posted",
|
|
left_keys=["check_no"], right_keys=["ref"],
|
|
))
|
|
assert result.stats["matched"] == 0
|
|
|
|
|
|
class TestInputValidation:
|
|
def test_missing_amount_columns(self):
|
|
left = _bank([("2026-01-05", 100.00, "A")])
|
|
right = _ledger([("2026-01-05", 100.00, "X")])
|
|
with pytest.raises(ValueError, match="left_amount"):
|
|
reconcile(left, right, ReconcileOptions(
|
|
right_amount="amt",
|
|
))
|
|
|
|
def test_left_date_without_right_date(self):
|
|
left = _bank([("2026-01-05", 100.00, "A")])
|
|
right = _ledger([("2026-01-05", 100.00, "X")])
|
|
with pytest.raises(ValueError, match="both be set or both be None"):
|
|
reconcile(left, right, ReconcileOptions(
|
|
left_amount="amount", right_amount="amt",
|
|
left_date="date", # right_date missing
|
|
))
|
|
|
|
def test_mismatched_key_lengths(self):
|
|
left = _bank([("2026-01-05", 100.00, "A")])
|
|
right = _ledger([("2026-01-05", 100.00, "X")])
|
|
with pytest.raises(ValueError, match="same length"):
|
|
reconcile(left, right, ReconcileOptions(
|
|
left_amount="amount", right_amount="amt",
|
|
left_keys=["a", "b"], right_keys=["x"],
|
|
))
|
|
|
|
def test_negative_tolerance_rejected(self):
|
|
left = _bank([("2026-01-05", 100.00, "A")])
|
|
right = _ledger([("2026-01-05", 100.00, "X")])
|
|
with pytest.raises(ValueError, match="amount_tolerance"):
|
|
reconcile(left, right, ReconcileOptions(
|
|
left_amount="amount", right_amount="amt",
|
|
amount_tolerance=-0.01,
|
|
))
|
|
|
|
|
|
class TestUnparseableInputs:
|
|
def test_non_numeric_amount_falls_through(self):
|
|
# Left row with garbage amount should land in unmatched_left
|
|
# (it can't participate in matching but must be visible).
|
|
left = pd.DataFrame([
|
|
{"date": "2026-01-05", "amount": "not a number", "desc": "BAD"},
|
|
{"date": "2026-01-05", "amount": 100.00, "desc": "OK"},
|
|
])
|
|
right = _ledger([("2026-01-05", 100.00, "X")])
|
|
result = reconcile(left, right, ReconcileOptions(
|
|
left_amount="amount", right_amount="amt",
|
|
left_date="date", right_date="posted",
|
|
))
|
|
assert result.stats["matched"] == 1
|
|
# The garbage row appears in unmatched_left.
|
|
assert "BAD" in result.unmatched_left["desc"].tolist()
|
|
|
|
|
|
class TestResultShape:
|
|
def test_matched_carries_both_sides(self):
|
|
left = _bank([("2026-01-05", 100.00, "ACME")])
|
|
right = _ledger([("2026-01-05", 100.00, "Acme Inc")])
|
|
result = reconcile(left, right, ReconcileOptions(
|
|
left_amount="amount", right_amount="amt",
|
|
left_date="date", right_date="posted",
|
|
))
|
|
row = result.matched.iloc[0]
|
|
assert row["left_desc"] == "ACME"
|
|
assert row["right_memo"] == "Acme Inc"
|
|
assert row["left_amount"] == 100.00
|
|
assert row["right_amt"] == 100.00
|
|
|
|
def test_empty_inputs_return_empty_result(self):
|
|
left = _bank([])
|
|
right = _ledger([])
|
|
result = reconcile(left, right, ReconcileOptions(
|
|
left_amount="amount", right_amount="amt",
|
|
left_date="date", right_date="posted",
|
|
))
|
|
assert result.stats["matched"] == 0
|
|
assert result.matched.empty
|
|
assert result.unmatched_left.empty
|
|
assert result.unmatched_right.empty
|
|
|
|
def test_one_side_empty_keeps_other_unmatched(self):
|
|
# A reconcile against an empty ledger should surface every
|
|
# left row as unmatched, not crash. Mirror case for the
|
|
# other side.
|
|
left = _bank([
|
|
("2026-01-05", 100.00, "ACME"),
|
|
("2026-01-06", 250.00, "WIDGET"),
|
|
])
|
|
right = _ledger([])
|
|
result = reconcile(left, right, ReconcileOptions(
|
|
left_amount="amount", right_amount="amt",
|
|
left_date="date", right_date="posted",
|
|
))
|
|
assert result.stats["matched"] == 0
|
|
assert result.stats["unmatched_left"] == 2
|
|
assert result.stats["unmatched_right"] == 0
|
|
|
|
def test_match_pass_tagged_for_key_pass(self):
|
|
# Pass name on each matched row tells the user *why* the engine
|
|
# accepted the pair — verify the "key" label propagates.
|
|
left = pd.DataFrame([
|
|
{"date": "2026-01-05", "amount": 100.00, "check_no": "1042"},
|
|
])
|
|
right = pd.DataFrame([
|
|
{"posted": "2099-12-31", "amt": 100.00, "ref": "1042"},
|
|
])
|
|
result = reconcile(left, right, ReconcileOptions(
|
|
left_amount="amount", right_amount="amt",
|
|
left_date="date", right_date="posted",
|
|
left_keys=["check_no"], right_keys=["ref"],
|
|
))
|
|
assert result.stats["matched"] == 1
|
|
assert result.matched.iloc[0]["match_pass"] == "key"
|
|
|
|
|
|
class TestAdditionalValidation:
|
|
"""Boundary cases for ``_validate_options`` not pinned elsewhere."""
|
|
|
|
def test_unknown_left_amount_column_raises(self):
|
|
left = pd.DataFrame([{"date": "2026-01-05", "amount": 1.0}])
|
|
right = pd.DataFrame([{"posted": "2026-01-05", "amt": 1.0}])
|
|
with pytest.raises(ValueError, match="not in left DataFrame"):
|
|
reconcile(left, right, ReconcileOptions(
|
|
left_amount="NOT_A_COLUMN", right_amount="amt",
|
|
))
|
|
|
|
def test_unknown_right_amount_column_raises(self):
|
|
left = pd.DataFrame([{"date": "2026-01-05", "amount": 1.0}])
|
|
right = pd.DataFrame([{"posted": "2026-01-05", "amt": 1.0}])
|
|
with pytest.raises(ValueError, match="not in right DataFrame"):
|
|
reconcile(left, right, ReconcileOptions(
|
|
left_amount="amount", right_amount="NOT_A_COLUMN",
|
|
))
|
|
|
|
def test_unknown_left_key_column_raises(self):
|
|
left = pd.DataFrame([{"date": "2026-01-05", "amount": 1.0}])
|
|
right = pd.DataFrame([{"posted": "2026-01-05", "amt": 1.0}])
|
|
with pytest.raises(ValueError, match="left key column"):
|
|
reconcile(left, right, ReconcileOptions(
|
|
left_amount="amount", right_amount="amt",
|
|
left_keys=["nope"], right_keys=["nope"],
|
|
))
|
|
|
|
def test_negative_date_tolerance_rejected(self):
|
|
left = pd.DataFrame([{"date": "2026-01-05", "amount": 1.0}])
|
|
right = pd.DataFrame([{"posted": "2026-01-05", "amt": 1.0}])
|
|
with pytest.raises(ValueError, match="date_tolerance_days"):
|
|
reconcile(left, right, ReconcileOptions(
|
|
left_amount="amount", right_amount="amt",
|
|
left_date="date", right_date="posted",
|
|
date_tolerance_days=-1,
|
|
))
|
|
|
|
def test_desc_min_score_out_of_range_rejected(self):
|
|
left = pd.DataFrame([{"date": "2026-01-05", "amount": 1.0}])
|
|
right = pd.DataFrame([{"posted": "2026-01-05", "amt": 1.0}])
|
|
with pytest.raises(ValueError, match="desc_min_score"):
|
|
reconcile(left, right, ReconcileOptions(
|
|
left_amount="amount", right_amount="amt",
|
|
desc_min_score=150,
|
|
))
|
|
|
|
|
|
class TestImmutability:
|
|
"""The engine must NOT mutate the caller's DataFrames — callers
|
|
rely on holding onto their input frames after the call (the GUI
|
|
Reconciler page re-renders previews from them)."""
|
|
|
|
def test_left_df_columns_unchanged(self):
|
|
left = _bank([("2026-01-05", 100.00, "ACME")])
|
|
right = _ledger([("2026-01-05", 100.00, "Acme Inc")])
|
|
before_cols = list(left.columns)
|
|
before_id = id(left)
|
|
reconcile(left, right, ReconcileOptions(
|
|
left_amount="amount", right_amount="amt",
|
|
left_date="date", right_date="posted",
|
|
))
|
|
assert list(left.columns) == before_cols
|
|
# And the caller's DataFrame object identity is preserved.
|
|
assert id(left) == before_id
|
|
|
|
def test_amounts_preserved_when_invert_right_sign_set(self):
|
|
# Even with --invert-right-sign, the original right amounts
|
|
# must come back unchanged in the result.
|
|
left = _bank([("2026-01-05", 100.00, "A")])
|
|
right = _ledger([("2026-01-05", -100.00, "X")])
|
|
original_right_amts = right["amt"].tolist()
|
|
reconcile(left, right, ReconcileOptions(
|
|
left_amount="amount", right_amount="amt",
|
|
left_date="date", right_date="posted",
|
|
invert_right_sign=True,
|
|
))
|
|
assert right["amt"].tolist() == original_right_amts
|