feat(reconcile): two-source reconciliation tool

Bank-feed-vs-ledger style matcher: 4-pass greedy assignment (key → exact → tolerance → fuzzy) with ambiguous candidates routed to a review bucket instead of arbitrary picks. CLI mirrors the cli_text_clean preview/--apply pattern; Streamlit page registered in the automations section. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-20 22:33:14 +00:00
parent 450d4fc9a8
commit e44af3a45e
5 changed files with 1449 additions and 0 deletions
--- a/src/cli_reconcile.py
+++ b/src/cli_reconcile.py
@@ -0,0 +1,198 @@
+"""CLI for the DataTools reconciliation tool.
+
+Usage:
+    python -m src.cli_reconcile bank.csv ledger.csv \\
+        --left-amount amount --right-amount amt \\
+        --left-date date --right-date posted             # dry-run preview
+    python -m src.cli_reconcile bank.csv ledger.csv \\
+        --left-amount amount --right-amount amt \\
+        --left-date date --right-date posted --apply     # write matched/unmatched CSVs
+    python -m src.cli_reconcile --help                   # full help
+
+Outputs (with --apply) sit beside the LEFT input file:
+    {stem}_matched.csv           one row per accepted pair
+    {stem}_unmatched_left.csv    left rows with no counterpart
+    {stem}_unmatched_right.csv   right rows with no counterpart
+    {stem}_review.csv            ambiguous pairs flagged for review
+"""
+
+from __future__ import annotations
+
+import sys
+from datetime import datetime
+from pathlib import Path
+from typing import Optional
+
+import typer
+from loguru import logger
+
+app = typer.Typer(
+    name="reconcile",
+    help=(
+        "Reconcile two data sources (e.g. bank feed vs. ledger export).\n\n"
+        "By default, runs in preview mode — shows the match stats without "
+        "writing anything. Add --apply to write the four output CSVs.\n\n"
+        "Examples:\n\n"
+        "  # Bank feed vs ledger, exact match\n"
+        "  python -m src.cli_reconcile bank.csv ledger.csv \\\n"
+        "      --left-amount amount --right-amount amt \\\n"
+        "      --left-date date --right-date posted\n\n"
+        "  # Allow 2-day posting drift and a cent of rounding tolerance\n"
+        "  python -m src.cli_reconcile bank.csv ledger.csv \\\n"
+        "      --left-amount amount --right-amount amt \\\n"
+        "      --left-date date --right-date posted \\\n"
+        "      --date-tolerance 2 --amount-tolerance 0.01 --apply\n\n"
+        "  # Bank shows debits as positive; ledger as negative\n"
+        "  python -m src.cli_reconcile bank.csv ledger.csv \\\n"
+        "      --left-amount amount --right-amount amt --invert-right-sign --apply\n"
+    ),
+    add_completion=False,
+    no_args_is_help=True,
+)
+
+
+def _setup_logging(log_dir: Path) -> Path:
+    log_dir.mkdir(parents=True, exist_ok=True)
+    ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+    log_path = log_dir / f"reconcile_{ts}.log"
+    logger.remove()
+    logger.add(sys.stderr, level="WARNING", format="{message}")
+    logger.add(
+        str(log_path),
+        level="DEBUG",
+        format="{time:YYYY-MM-DD HH:mm:ss} | {level:<8} | {message}",
+    )
+    return log_path
+
+
+def _split_csv_arg(raw: Optional[str]) -> list[str]:
+    if raw is None:
+        return []
+    return [c.strip() for c in raw.split(",") if c.strip()]
+
+
+@app.command()
+def run(
+    left_file: str = typer.Argument(..., help="Path to the LEFT input (e.g. bank feed)."),
+    right_file: str = typer.Argument(..., help="Path to the RIGHT input (e.g. ledger)."),
+    left_amount: str = typer.Option(..., "--left-amount", help="Amount column on the LEFT."),
+    right_amount: str = typer.Option(..., "--right-amount", help="Amount column on the RIGHT."),
+    left_date: Optional[str] = typer.Option(None, "--left-date", help="Date column on the LEFT."),
+    right_date: Optional[str] = typer.Option(None, "--right-date", help="Date column on the RIGHT."),
+    left_keys: Optional[str] = typer.Option(
+        None, "--left-keys",
+        help="Comma-separated reference/key columns on the LEFT (paired with --right-keys).",
+    ),
+    right_keys: Optional[str] = typer.Option(
+        None, "--right-keys",
+        help="Comma-separated reference/key columns on the RIGHT (paired with --left-keys).",
+    ),
+    left_desc: Optional[str] = typer.Option(None, "--left-desc", help="Description column on the LEFT (fuzzy)."),
+    right_desc: Optional[str] = typer.Option(None, "--right-desc", help="Description column on the RIGHT (fuzzy)."),
+    desc_min_score: int = typer.Option(
+        0, "--desc-min-score",
+        help="Min description similarity (0-100) to accept a fuzzy match. 0 disables.",
+    ),
+    amount_tolerance: float = typer.Option(
+        0.0, "--amount-tolerance",
+        help="Absolute amount tolerance (e.g. 0.01 to absorb cent-rounding).",
+    ),
+    date_tolerance: int = typer.Option(
+        0, "--date-tolerance",
+        help="Date tolerance in calendar days (± N).",
+    ),
+    invert_right_sign: bool = typer.Option(
+        False, "--invert-right-sign",
+        help="Negate the RIGHT amount before matching (use when sign conventions differ).",
+    ),
+    apply: bool = typer.Option(
+        False, "--apply",
+        help="Write the four output CSV files. Without this flag, only stats are shown.",
+    ),
+):
+    """Reconcile two CSV/Excel files."""
+    from src.core.io import read_file, write_file
+    from src.core.reconcile import ReconcileOptions, reconcile
+
+    left_path = Path(left_file)
+    right_path = Path(right_file)
+    for p in (left_path, right_path):
+        if not p.exists():
+            typer.echo(f"Error: File not found: {p}", err=True)
+            raise typer.Exit(1)
+
+    log_path = _setup_logging(Path("logs"))
+
+    typer.echo(f"Reading {left_path.name}...")
+    try:
+        left_df = read_file(left_path)
+    except Exception as e:
+        typer.echo(f"Error reading {left_path.name}: {e}", err=True)
+        raise typer.Exit(1)
+    typer.echo(f"  {len(left_df)} rows, {len(left_df.columns)} columns")
+
+    typer.echo(f"Reading {right_path.name}...")
+    try:
+        right_df = read_file(right_path)
+    except Exception as e:
+        typer.echo(f"Error reading {right_path.name}: {e}", err=True)
+        raise typer.Exit(1)
+    typer.echo(f"  {len(right_df)} rows, {len(right_df.columns)} columns")
+
+    options = ReconcileOptions(
+        left_amount=left_amount,
+        right_amount=right_amount,
+        left_date=left_date,
+        right_date=right_date,
+        left_keys=_split_csv_arg(left_keys),
+        right_keys=_split_csv_arg(right_keys),
+        left_desc=left_desc,
+        right_desc=right_desc,
+        desc_min_score=desc_min_score,
+        amount_tolerance=amount_tolerance,
+        date_tolerance_days=date_tolerance,
+        invert_right_sign=invert_right_sign,
+    )
+
+    typer.echo("Reconciling...")
+    try:
+        result = reconcile(left_df, right_df, options)
+    except ValueError as e:
+        typer.echo(f"Error: {e}", err=True)
+        raise typer.Exit(1)
+
+    _print_stats(result.stats)
+
+    if apply:
+        stem = left_path.stem
+        out_dir = left_path.parent
+        write_file(result.matched, out_dir / f"{stem}_matched.csv")
+        write_file(result.unmatched_left, out_dir / f"{stem}_unmatched_left.csv")
+        write_file(result.unmatched_right, out_dir / f"{stem}_unmatched_right.csv")
+        write_file(result.review, out_dir / f"{stem}_review.csv")
+        typer.echo(f"\nWrote 4 files to {out_dir}:")
+        for suffix in ("matched", "unmatched_left", "unmatched_right", "review"):
+            typer.echo(f"  {stem}_{suffix}.csv")
+    else:
+        typer.echo("\nThis was a preview. Add --apply to write the output files.")
+
+    typer.echo(f"Log: {log_path}")
+
+
+def _print_stats(stats: dict) -> None:
+    typer.echo(f"\n{'─'*50}")
+    typer.echo(f"  Left rows:           {stats['left_rows']}")
+    typer.echo(f"  Right rows:          {stats['right_rows']}")
+    typer.echo(f"  Matched:             {stats['matched']}")
+    typer.echo(f"  Review (ambiguous):  {stats['review']}")
+    typer.echo(f"  Unmatched left:      {stats['unmatched_left']}")
+    typer.echo(f"  Unmatched right:     {stats['unmatched_right']}")
+    typer.echo(f"{'─'*50}")
+
+
+def main():
+    app()
+
+
+if __name__ == "__main__":
+    main()
--- a/src/core/reconcile.py
+++ b/src/core/reconcile.py
@@ -0,0 +1,598 @@
+"""Two-source data reconciliation.
+
+Given two DataFrames (typically a bank/credit-card feed and a ledger
+export), find which rows on the left correspond to rows on the right
+based on amount, date, and optional reference/description fields.
+
+Output buckets:
+    matched          — one row per accepted pair, with both originals.
+    unmatched_left   — left rows with no acceptable right counterpart.
+    unmatched_right  — right rows with no acceptable left counterpart.
+    review           — ambiguous cases (a left row had >1 equally good
+                       right candidates, or vice versa) surfaced for the
+                       user to disambiguate manually.
+
+Matching strategy is a multi-pass greedy one-to-one assignment:
+    Pass 1: exact key match (when ``key_columns`` is set on either side)
+    Pass 2: exact (amount, date) match
+    Pass 3: amount within tolerance AND date within window
+    Pass 4: + optional description fuzzy similarity boost
+
+Within each pass, candidate pairs are scored and assigned greedily by
+descending score; ties for the same left row that span multiple right
+rows (or vice versa) are sent to ``review`` instead of being matched
+arbitrarily.
+
+The module is pure: no I/O, no Streamlit, no logging side effects beyond
+loguru. Caller drives file reading and result rendering.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Optional
+
+import pandas as pd
+from loguru import logger
+
+try:
+    from rapidfuzz import fuzz as _rf_fuzz
+    _HAS_RAPIDFUZZ = True
+except ImportError:  # pragma: no cover — rapidfuzz is in requirements.txt
+    _HAS_RAPIDFUZZ = False
+
+
+# ---------------------------------------------------------------------------
+# Options & result
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class ReconcileOptions:
+    """Configuration for :func:`reconcile`.
+
+    ``left_amount`` / ``right_amount`` are required: every match needs
+    an amount to anchor on. Everything else is optional.
+    """
+
+    # Amount columns (required). Values are coerced to float; non-numeric
+    # rows are dropped from matching but appear in the unmatched buckets.
+    left_amount: str = ""
+    right_amount: str = ""
+
+    # Date columns. When both are set, candidates must fall within
+    # ``date_tolerance_days``. When unset, date is ignored entirely.
+    left_date: Optional[str] = None
+    right_date: Optional[str] = None
+
+    # Optional reference / key columns for exact-match Pass 1. List
+    # forms must be the same length so the i-th left key pairs with the
+    # i-th right key (e.g. ``["check_no"]`` ↔ ``["ref"]``).
+    left_keys: list[str] = field(default_factory=list)
+    right_keys: list[str] = field(default_factory=list)
+
+    # Description columns for fuzzy similarity boost (optional). Only
+    # used when ``desc_min_score`` > 0 AND rapidfuzz is installed.
+    left_desc: Optional[str] = None
+    right_desc: Optional[str] = None
+    desc_min_score: int = 0  # 0–100; 0 disables fuzzy.
+
+    # Tolerances. Defaults are exact match.
+    amount_tolerance: float = 0.0   # absolute (e.g. 0.01 for cent rounding)
+    date_tolerance_days: int = 0    # ± N calendar days
+
+    # Some bank feeds use opposite sign convention from the ledger
+    # (debits positive vs. negative). Flipping this multiplies the
+    # right side's amount by -1 before matching.
+    invert_right_sign: bool = False
+
+
+@dataclass
+class ReconcileResult:
+    """Outcome of a reconcile run.
+
+    All four DataFrames preserve the original columns from each side,
+    prefixed with ``left_`` and ``right_`` where applicable, plus a
+    small set of bookkeeping columns (``match_pass``, ``amount_diff``,
+    ``date_diff_days``, ``desc_score``).
+    """
+
+    matched: pd.DataFrame
+    unmatched_left: pd.DataFrame
+    unmatched_right: pd.DataFrame
+    review: pd.DataFrame
+    stats: dict[str, int] = field(default_factory=dict)
+
+
+# ---------------------------------------------------------------------------
+# Public entry point
+# ---------------------------------------------------------------------------
+
+
+def reconcile(
+    left: pd.DataFrame,
+    right: pd.DataFrame,
+    options: ReconcileOptions,
+) -> ReconcileResult:
+    """Reconcile *left* against *right* using *options*.
+
+    Neither input is mutated. The result's frames hold copies of the
+    relevant rows from the originals, joined via the bookkeeping
+    columns described on :class:`ReconcileResult`.
+    """
+    _validate_options(left, right, options)
+
+    # Normalize amounts and dates to typed columns we can reason about
+    # without re-parsing per pass. The work columns live on copies so
+    # callers' inputs are untouched.
+    L = _prep_side(left, options, side="left")
+    R = _prep_side(right, options, side="right")
+
+    # Track which left/right indices remain unmatched across passes.
+    # Seeded from the FULL input frame, not the prepped one — rows
+    # dropped during prep (unparseable amount/date) must still surface
+    # in the unmatched bucket so users can see they exist. Candidate
+    # generators iterate L.index, so prep-dropped rows simply never
+    # get claimed.
+    left_open: set = set(left.index)
+    right_open: set = set(right.index)
+
+    matched_pairs: list[dict] = []
+    review_pairs: list[dict] = []
+
+    # Pass 1 — exact key match on user-supplied reference columns.
+    if options.left_keys and options.right_keys:
+        _run_pass(
+            L, R, left_open, right_open, matched_pairs, review_pairs,
+            options=options, pass_name="key",
+            candidate_fn=_candidates_by_key,
+        )
+
+    # Pass 2 — exact (amount, date) match.
+    _run_pass(
+        L, R, left_open, right_open, matched_pairs, review_pairs,
+        options=options, pass_name="exact",
+        candidate_fn=_candidates_exact,
+    )
+
+    # Pass 3 — tolerance-window match.
+    if options.amount_tolerance > 0 or options.date_tolerance_days > 0:
+        _run_pass(
+            L, R, left_open, right_open, matched_pairs, review_pairs,
+            options=options, pass_name="tolerance",
+            candidate_fn=_candidates_tolerance,
+        )
+
+    # Pass 4 — description fuzzy boost (only over what's left).
+    if (
+        options.desc_min_score > 0
+        and options.left_desc
+        and options.right_desc
+        and _HAS_RAPIDFUZZ
+    ):
+        _run_pass(
+            L, R, left_open, right_open, matched_pairs, review_pairs,
+            options=options, pass_name="fuzzy",
+            candidate_fn=_candidates_fuzzy,
+        )
+
+    # Build the four output frames from what remains.
+    matched_df = _build_matched(left, right, matched_pairs, options)
+    review_df = _build_matched(left, right, review_pairs, options, review=True)
+    unmatched_left_df = left.loc[sorted(left_open)].copy()
+    unmatched_right_df = right.loc[sorted(right_open)].copy()
+
+    stats = {
+        "left_rows": len(left),
+        "right_rows": len(right),
+        "matched": len(matched_pairs),
+        "review": len(review_pairs),
+        "unmatched_left": len(unmatched_left_df),
+        "unmatched_right": len(unmatched_right_df),
+    }
+    logger.debug("reconcile stats: {}", stats)
+
+    return ReconcileResult(
+        matched=matched_df,
+        unmatched_left=unmatched_left_df,
+        unmatched_right=unmatched_right_df,
+        review=review_df,
+        stats=stats,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Input validation & prep
+# ---------------------------------------------------------------------------
+
+
+def _validate_options(
+    left: pd.DataFrame, right: pd.DataFrame, options: ReconcileOptions
+) -> None:
+    if not options.left_amount or not options.right_amount:
+        raise ValueError(
+            "Reconcile requires both left_amount and right_amount columns."
+        )
+    if options.left_amount not in left.columns:
+        raise ValueError(
+            f"left_amount column {options.left_amount!r} not in left DataFrame."
+        )
+    if options.right_amount not in right.columns:
+        raise ValueError(
+            f"right_amount column {options.right_amount!r} not in right DataFrame."
+        )
+    if bool(options.left_date) != bool(options.right_date):
+        raise ValueError(
+            "left_date and right_date must both be set or both be None."
+        )
+    if options.left_date and options.left_date not in left.columns:
+        raise ValueError(f"left_date column {options.left_date!r} not in left.")
+    if options.right_date and options.right_date not in right.columns:
+        raise ValueError(f"right_date column {options.right_date!r} not in right.")
+    if len(options.left_keys) != len(options.right_keys):
+        raise ValueError(
+            "left_keys and right_keys must be the same length "
+            f"(got {len(options.left_keys)} vs {len(options.right_keys)})."
+        )
+    for c in options.left_keys:
+        if c not in left.columns:
+            raise ValueError(f"left key column {c!r} not in left DataFrame.")
+    for c in options.right_keys:
+        if c not in right.columns:
+            raise ValueError(f"right key column {c!r} not in right DataFrame.")
+    if options.amount_tolerance < 0:
+        raise ValueError("amount_tolerance must be >= 0.")
+    if options.date_tolerance_days < 0:
+        raise ValueError("date_tolerance_days must be >= 0.")
+    if not (0 <= options.desc_min_score <= 100):
+        raise ValueError("desc_min_score must be between 0 and 100.")
+
+
+def _prep_side(
+    df: pd.DataFrame, options: ReconcileOptions, side: str
+) -> pd.DataFrame:
+    """Return a copy with ``_amt`` and ``_date`` work columns added.
+
+    Rows whose amount cannot be parsed as a number are dropped from the
+    matching frame so they fall through to the unmatched bucket on the
+    caller side. The same is true for unparseable dates when date
+    matching is in use — date is required-when-configured.
+    """
+    work = df.copy()
+    amt_col = options.left_amount if side == "left" else options.right_amount
+    date_col = options.left_date if side == "left" else options.right_date
+
+    work["_amt"] = pd.to_numeric(work[amt_col], errors="coerce")
+    if side == "right" and options.invert_right_sign:
+        work["_amt"] = -work["_amt"]
+
+    if date_col:
+        work["_date"] = pd.to_datetime(work[date_col], errors="coerce")
+    else:
+        work["_date"] = pd.NaT
+
+    # Drop rows that lack the inputs needed to participate. Their
+    # original index labels are intentionally preserved on the source
+    # frame so they show up in unmatched buckets below.
+    bad_amt = work["_amt"].isna()
+    bad_date = work["_date"].isna() if date_col else pd.Series(False, index=work.index)
+    keep = ~(bad_amt | bad_date)
+    if (~keep).any():
+        logger.debug(
+            "{} side: dropping {} row(s) with unparseable amount/date",
+            side, (~keep).sum(),
+        )
+    return work.loc[keep].copy()
+
+
+# ---------------------------------------------------------------------------
+# Per-pass orchestration
+# ---------------------------------------------------------------------------
+
+
+def _run_pass(
+    L: pd.DataFrame,
+    R: pd.DataFrame,
+    left_open: set,
+    right_open: set,
+    matched_pairs: list[dict],
+    review_pairs: list[dict],
+    *,
+    options: ReconcileOptions,
+    pass_name: str,
+    candidate_fn,
+) -> None:
+    """Run one matching pass over the still-open indices.
+
+    The pass collects (left_idx, right_idx, score, extras) candidates
+    from ``candidate_fn``, then greedily assigns by descending score.
+    A left row with two right candidates tied at the top score (and
+    vice versa) gets routed to the review bucket so we don't pick one
+    arbitrarily.
+    """
+    L_open = L.loc[L.index.intersection(left_open)]
+    R_open = R.loc[R.index.intersection(right_open)]
+    if L_open.empty or R_open.empty:
+        return
+
+    candidates = candidate_fn(L_open, R_open, options)
+    if not candidates:
+        return
+
+    # Group candidates by left index. For each left row, partition into
+    # confident-best (single top score) vs. ambiguous (top score tied).
+    by_left: dict = {}
+    for cand in candidates:
+        by_left.setdefault(cand["left_idx"], []).append(cand)
+
+    # Two-pointer assignment by best-score-first, with reverse-direction
+    # ambiguity check so a right row claimed by two equally-good lefts
+    # also routes to review.
+    by_right_top: dict = {}
+    for li, cands in by_left.items():
+        cands.sort(key=lambda c: c["score"], reverse=True)
+        top = cands[0]["score"]
+        leaders = [c for c in cands if c["score"] == top]
+        for c in leaders:
+            by_right_top.setdefault(c["right_idx"], []).append(c)
+
+    # Sort left rows by their leader's score so high-confidence matches
+    # claim their right counterpart first; low-confidence rows lose
+    # contention if the right row was already taken.
+    left_order = sorted(
+        by_left.keys(),
+        key=lambda li: -by_left[li][0]["score"],
+    )
+
+    for li in left_order:
+        if li not in left_open:
+            continue
+        cands = by_left[li]
+        top_score = cands[0]["score"]
+        leaders = [c for c in cands if c["score"] == top_score]
+
+        # Filter to still-open right indices.
+        leaders = [c for c in leaders if c["right_idx"] in right_open]
+        if not leaders:
+            continue
+
+        if len(leaders) > 1:
+            # Left row is ambiguous on its own side — multiple equally
+            # good right candidates remain. Park them all in review.
+            for c in leaders:
+                review_pairs.append({**c, "pass": pass_name})
+            left_open.discard(li)
+            for c in leaders:
+                right_open.discard(c["right_idx"])
+            continue
+
+        pick = leaders[0]
+        ri = pick["right_idx"]
+
+        # Mirror check: is the right row contested by another left at
+        # the same top score? If so, both lefts go to review and the
+        # right row is consumed.
+        contenders = [
+            c for c in by_right_top.get(ri, [])
+            if c["left_idx"] in left_open and c["score"] == pick["score"]
+        ]
+        if len(contenders) > 1:
+            for c in contenders:
+                review_pairs.append({**c, "pass": pass_name})
+                left_open.discard(c["left_idx"])
+            right_open.discard(ri)
+            continue
+
+        matched_pairs.append({**pick, "pass": pass_name})
+        left_open.discard(li)
+        right_open.discard(ri)
+
+
+# ---------------------------------------------------------------------------
+# Candidate generators (one per pass)
+# ---------------------------------------------------------------------------
+
+
+def _candidates_by_key(
+    L: pd.DataFrame, R: pd.DataFrame, options: ReconcileOptions
+) -> list[dict]:
+    """Exact match on the user-supplied key columns + same amount.
+
+    Amount must still tie out; otherwise a shared reference number
+    (e.g. a check number reused across years) would over-match. We do
+    NOT require date in this pass — the assumption is that a confirmed
+    reference like an invoice number is authoritative even when the
+    posting date drifts.
+    """
+    if not options.left_keys:
+        return []
+    # Build a composite key on each side as a tuple of stringified values.
+    L_key = L[options.left_keys].astype(str).agg("|".join, axis=1)
+    R_key = R[options.right_keys].astype(str).agg("|".join, axis=1)
+    R_by_key: dict = {}
+    for ri, k in R_key.items():
+        R_by_key.setdefault(k, []).append(ri)
+
+    out: list[dict] = []
+    for li, k in L_key.items():
+        if k == "" or k == "|".join([""] * len(options.left_keys)):
+            continue
+        for ri in R_by_key.get(k, []):
+            if abs(L.at[li, "_amt"] - R.at[ri, "_amt"]) <= options.amount_tolerance:
+                out.append(_score_pair(L, R, li, ri, base_score=1000))
+    return out
+
+
+def _candidates_exact(
+    L: pd.DataFrame, R: pd.DataFrame, options: ReconcileOptions
+) -> list[dict]:
+    """Exact match on amount (and date if configured)."""
+    out: list[dict] = []
+    has_date = options.left_date is not None
+    # Bucket right side by amount for cheap lookup.
+    R_by_amt: dict = {}
+    for ri, amt in R["_amt"].items():
+        R_by_amt.setdefault(amt, []).append(ri)
+
+    for li, amt in L["_amt"].items():
+        for ri in R_by_amt.get(amt, []):
+            if has_date and L.at[li, "_date"] != R.at[ri, "_date"]:
+                continue
+            out.append(_score_pair(L, R, li, ri, base_score=900))
+    return out
+
+
+def _candidates_tolerance(
+    L: pd.DataFrame, R: pd.DataFrame, options: ReconcileOptions
+) -> list[dict]:
+    """Amount within tolerance and (if configured) date within window.
+
+    Quadratic in the open set size. For typical reconciliation sizes
+    (a month of statements: low thousands of rows) this is fine; if a
+    user hands us 100k×100k we'll need a smarter blocking strategy.
+    """
+    out: list[dict] = []
+    has_date = options.left_date is not None
+    tol = options.amount_tolerance
+    win = pd.Timedelta(days=options.date_tolerance_days) if has_date else None
+
+    R_amts = R["_amt"].to_numpy()
+    R_dates = R["_date"].to_numpy() if has_date else None
+    R_index = R.index.to_numpy()
+
+    for li in L.index:
+        l_amt = L.at[li, "_amt"]
+        l_date = L.at[li, "_date"] if has_date else None
+        amt_ok = (R_amts >= l_amt - tol) & (R_amts <= l_amt + tol)
+        if has_date:
+            date_diff = R_dates - l_date.to_datetime64()
+            date_ok = (date_diff >= -win.to_timedelta64()) & (
+                date_diff <= win.to_timedelta64()
+            )
+            mask = amt_ok & date_ok
+        else:
+            mask = amt_ok
+        for ri in R_index[mask]:
+            out.append(_score_pair(L, R, li, ri, base_score=500))
+    return out
+
+
+def _candidates_fuzzy(
+    L: pd.DataFrame, R: pd.DataFrame, options: ReconcileOptions
+) -> list[dict]:
+    """Tolerance-pass candidates re-scored by description similarity.
+
+    Only kept when the description similarity meets the threshold AND
+    the amount is within tolerance. Score blends the two so a strong
+    description match outranks a marginal amount match within the same
+    pass.
+    """
+    if not (_HAS_RAPIDFUZZ and options.left_desc and options.right_desc):
+        return []
+    out: list[dict] = []
+    has_date = options.left_date is not None
+    tol = options.amount_tolerance
+    win = pd.Timedelta(days=options.date_tolerance_days) if has_date else None
+    min_score = options.desc_min_score
+
+    L_desc = L[options.left_desc].astype(str)
+    R_desc = R[options.right_desc].astype(str)
+
+    for li in L.index:
+        l_amt = L.at[li, "_amt"]
+        l_date = L.at[li, "_date"] if has_date else None
+        l_text = L_desc.at[li]
+        for ri in R.index:
+            if abs(R.at[ri, "_amt"] - l_amt) > tol:
+                continue
+            if has_date:
+                diff = R.at[ri, "_date"] - l_date
+                if abs(diff) > win:
+                    continue
+            score = int(_rf_fuzz.token_set_ratio(l_text, R_desc.at[ri]))
+            if score < min_score:
+                continue
+            # Base 300 keeps fuzzy below exact/tolerance passes; the
+            # 0–100 description score breaks ties within the pass.
+            out.append(
+                _score_pair(L, R, li, ri, base_score=300 + score, desc_score=score)
+            )
+    return out
+
+
+# ---------------------------------------------------------------------------
+# Scoring & output assembly
+# ---------------------------------------------------------------------------
+
+
+def _score_pair(
+    L: pd.DataFrame,
+    R: pd.DataFrame,
+    li,
+    ri,
+    *,
+    base_score: int,
+    desc_score: int = 0,
+) -> dict:
+    """Build the candidate record used by the assignment phase."""
+    amt_diff = float(L.at[li, "_amt"] - R.at[ri, "_amt"])
+    l_date = L.at[li, "_date"]
+    r_date = R.at[ri, "_date"]
+    if pd.notna(l_date) and pd.notna(r_date):
+        date_diff_days = int((l_date - r_date).days)
+    else:
+        date_diff_days = None
+    # Penalize larger differences within the same pass so closer matches
+    # win ties. Cap penalty so it can't flip pass ordering.
+    penalty = min(abs(amt_diff) * 10, 50)
+    if date_diff_days is not None:
+        penalty += min(abs(date_diff_days), 50)
+    return {
+        "left_idx": li,
+        "right_idx": ri,
+        "score": base_score - penalty,
+        "amount_diff": amt_diff,
+        "date_diff_days": date_diff_days,
+        "desc_score": desc_score,
+    }
+
+
+def _build_matched(
+    left: pd.DataFrame,
+    right: pd.DataFrame,
+    pairs: list[dict],
+    options: ReconcileOptions,
+    *,
+    review: bool = False,
+) -> pd.DataFrame:
+    """Assemble a matched/review frame: bookkeeping cols + originals."""
+    if not pairs:
+        cols = ["match_pass", "score", "amount_diff", "date_diff_days", "desc_score"]
+        cols += [f"left_{c}" for c in left.columns]
+        cols += [f"right_{c}" for c in right.columns]
+        return pd.DataFrame(columns=cols)
+
+    rows = []
+    for p in pairs:
+        li, ri = p["left_idx"], p["right_idx"]
+        row = {
+            "match_pass": p["pass"],
+            "score": p["score"],
+            "amount_diff": p["amount_diff"],
+            "date_diff_days": p["date_diff_days"],
+            "desc_score": p["desc_score"],
+        }
+        for c in left.columns:
+            row[f"left_{c}"] = left.at[li, c]
+        for c in right.columns:
+            row[f"right_{c}"] = right.at[ri, c]
+        rows.append(row)
+    out = pd.DataFrame(rows)
+    # Stable ordering: review by left_idx so paired rows stay adjacent;
+    # matched by score descending so the user sees the strongest pairs
+    # first.
+    if review:
+        out = out.sort_values("score", ascending=False, kind="stable")
+    else:
+        out = out.sort_values("score", ascending=False, kind="stable")
+    return out.reset_index(drop=True)
--- a/src/gui/pages/11_Reconciler.py
+++ b/src/gui/pages/11_Reconciler.py
@@ -0,0 +1,324 @@
+"""DataTools Reconcile — Streamlit page.
+
+Two-source reconciliation (e.g. bank feed vs. ledger): upload both
+files, pick the amount/date columns on each side, choose tolerance
+settings, then download four output CSVs (matched, unmatched-left,
+unmatched-right, review).
+"""
+
+from __future__ import annotations
+
+import io
+import sys
+from pathlib import Path
+
+import pandas as pd
+import streamlit as st
+
+_project_root = Path(__file__).resolve().parent.parent.parent.parent
+if str(_project_root) not in sys.path:
+    sys.path.insert(0, str(_project_root))
+
+from src.audit import log_event, log_page_open
+from src.gui.components import (
+    back_to_home_link,
+    hide_streamlit_chrome,
+    html_download_button,
+    render_sticky_footer,
+)
+from src.core.reconcile import ReconcileOptions, reconcile
+
+hide_streamlit_chrome()
+render_sticky_footer()
+back_to_home_link()
+log_page_open("11_Reconciler")
+
+
+# ---------------------------------------------------------------------------
+# Header
+# ---------------------------------------------------------------------------
+
+st.title("Reconcile Two Files")
+st.caption(
+    "Match transactions between two sources (e.g. bank feed vs. ledger). "
+    "Outputs four buckets: matched, unmatched-left, unmatched-right, and "
+    "ambiguous-for-review."
+)
+
+
+# ---------------------------------------------------------------------------
+# File readers
+# ---------------------------------------------------------------------------
+
+
+@st.cache_data(show_spinner=False)
+def _read_uploaded(name: str, data: bytes) -> pd.DataFrame:
+    """Read uploaded bytes into a DataFrame. Mirrors the helper used by
+    other tool pages — keeps everything as strings so the user controls
+    coercion via the column-type selectors below."""
+    suffix = Path(name).suffix.lower()
+    bio = io.BytesIO(data)
+    if suffix in (".xlsx", ".xls"):
+        return pd.read_excel(bio, dtype=str, keep_default_na=False)
+    for enc in ("utf-8", "utf-8-sig", "latin-1"):
+        try:
+            bio.seek(0)
+            sep = "\t" if suffix == ".tsv" else ","
+            return pd.read_csv(
+                bio, dtype=str, keep_default_na=False,
+                encoding=enc, sep=sep, on_bad_lines="warn",
+            )
+        except UnicodeDecodeError:
+            continue
+    bio.seek(0)
+    return pd.read_csv(bio, dtype=str, keep_default_na=False, encoding="latin-1")
+
+
+def _side_panel(side_label: str, key_prefix: str):
+    """Render one side's upload + preview. Returns the DataFrame or None."""
+    st.markdown(f"**{side_label}**")
+    upload = st.file_uploader(
+        f"Upload {side_label.lower()} file (CSV / Excel)",
+        type=["csv", "tsv", "xlsx", "xls"],
+        key=f"{key_prefix}_upload",
+        label_visibility="collapsed",
+    )
+    if upload is None:
+        st.caption(f"_No {side_label.lower()} file yet._")
+        return None, None
+    try:
+        df = _read_uploaded(upload.name, upload.getvalue())
+    except Exception as e:
+        st.error(f"Could not read `{upload.name}`: {e}")
+        return None, None
+    st.caption(f"`{upload.name}` — {len(df)} rows, {len(df.columns)} columns")
+    with st.expander(f"Preview {side_label.lower()}", expanded=False):
+        st.dataframe(df.head(10), width="stretch")
+    return df, upload.name
+
+
+# ---------------------------------------------------------------------------
+# Side-by-side upload
+# ---------------------------------------------------------------------------
+
+col_left, col_right = st.columns(2)
+with col_left:
+    left_df, left_name = _side_panel("Left (e.g. bank feed)", "left")
+with col_right:
+    right_df, right_name = _side_panel("Right (e.g. ledger)", "right")
+
+if left_df is None or right_df is None:
+    st.info("Upload both files to continue.")
+    st.stop()
+
+
+# ---------------------------------------------------------------------------
+# Column mapping
+# ---------------------------------------------------------------------------
+
+st.divider()
+st.subheader("Match settings")
+
+map_left, map_right = st.columns(2)
+
+
+def _col_pick(label: str, df: pd.DataFrame, key: str, *, allow_none: bool):
+    """Selectbox for picking a column. Optional 'None' slot for date/desc."""
+    cols = list(df.columns)
+    if allow_none:
+        cols = ["(none)"] + cols
+    pick = st.selectbox(label, cols, key=key)
+    return None if pick == "(none)" else pick
+
+
+with map_left:
+    st.markdown("**Left columns**")
+    left_amount = _col_pick("Amount column", left_df, "left_amount_col", allow_none=False)
+    left_date = _col_pick("Date column (optional)", left_df, "left_date_col", allow_none=True)
+    left_desc = _col_pick("Description column (optional)", left_df, "left_desc_col", allow_none=True)
+    left_keys = st.multiselect(
+        "Reference columns (optional, e.g. check / invoice no.)",
+        list(left_df.columns), key="left_keys_col",
+    )
+
+with map_right:
+    st.markdown("**Right columns**")
+    right_amount = _col_pick("Amount column", right_df, "right_amount_col", allow_none=False)
+    right_date = _col_pick("Date column (optional)", right_df, "right_date_col", allow_none=True)
+    right_desc = _col_pick("Description column (optional)", right_df, "right_desc_col", allow_none=True)
+    right_keys = st.multiselect(
+        "Reference columns (must match left count)",
+        list(right_df.columns), key="right_keys_col",
+    )
+
+# ---------------------------------------------------------------------------
+# Tolerances & options
+# ---------------------------------------------------------------------------
+
+with st.expander("Tolerances & options", expanded=True):
+    tol_a, tol_b, tol_c = st.columns(3)
+    with tol_a:
+        amount_tolerance = st.number_input(
+            "Amount tolerance",
+            min_value=0.0, value=0.0, step=0.01, format="%.4f",
+            help="Absolute tolerance on amount (e.g. 0.01 to absorb cent rounding).",
+        )
+    with tol_b:
+        date_tolerance = st.number_input(
+            "Date tolerance (days)",
+            min_value=0, value=0, step=1,
+            help="Allow N calendar days of drift between posting dates.",
+        )
+    with tol_c:
+        invert_right_sign = st.checkbox(
+            "Invert right amount sign",
+            value=False,
+            help="Use when one side records debits as positive and the other as negative.",
+        )
+    desc_min_score = st.slider(
+        "Description similarity boost (0 disables)",
+        min_value=0, max_value=100, value=0, step=5,
+        help=(
+            "When both sides have a description column set, accept matches with "
+            "this minimum fuzzy similarity even if amount/date are merely within "
+            "tolerance. Lower = more permissive."
+        ),
+    )
+
+# ---------------------------------------------------------------------------
+# Run
+# ---------------------------------------------------------------------------
+
+st.divider()
+
+if st.button("Reconcile", type="primary", width="stretch"):
+    if len(left_keys) != len(right_keys):
+        st.error(
+            "Reference columns must match in count: "
+            f"left has {len(left_keys)}, right has {len(right_keys)}."
+        )
+        st.stop()
+    options = ReconcileOptions(
+        left_amount=left_amount,
+        right_amount=right_amount,
+        left_date=left_date,
+        right_date=right_date,
+        left_keys=list(left_keys),
+        right_keys=list(right_keys),
+        left_desc=left_desc,
+        right_desc=right_desc,
+        desc_min_score=int(desc_min_score),
+        amount_tolerance=float(amount_tolerance),
+        date_tolerance_days=int(date_tolerance),
+        invert_right_sign=bool(invert_right_sign),
+    )
+    with st.spinner("Reconciling..."):
+        try:
+            result = reconcile(left_df, right_df, options)
+        except ValueError as e:
+            st.error(str(e))
+            st.stop()
+    st.session_state["reconcile_result"] = result
+    st.session_state["reconcile_left_name"] = left_name
+    log_event("tool_run", "Reconcile run", page="11_Reconciler")
+
+result = st.session_state.get("reconcile_result")
+if result is None:
+    st.stop()
+
+# ---------------------------------------------------------------------------
+# Results
+# ---------------------------------------------------------------------------
+
+st.subheader("Results")
+
+stats = result.stats
+m1, m2, m3, m4 = st.columns(4)
+m1.metric("Matched", stats["matched"])
+m2.metric("Review", stats["review"])
+m3.metric("Unmatched left", stats["unmatched_left"])
+m4.metric("Unmatched right", stats["unmatched_right"])
+
+# Health bar: matched / max(left, right)
+denom = max(stats["left_rows"], stats["right_rows"]) or 1
+pct = stats["matched"] / denom * 100
+st.caption(f"Coverage: {pct:.1f}% of the larger side")
+
+tab_matched, tab_review, tab_left, tab_right = st.tabs(
+    [
+        f"Matched ({stats['matched']})",
+        f"Review ({stats['review']})",
+        f"Unmatched left ({stats['unmatched_left']})",
+        f"Unmatched right ({stats['unmatched_right']})",
+    ]
+)
+
+with tab_matched:
+    if result.matched.empty:
+        st.info("No matches.")
+    else:
+        st.dataframe(result.matched, width="stretch", hide_index=True)
+
+with tab_review:
+    if result.review.empty:
+        st.info("Nothing to review — no ambiguous candidates.")
+    else:
+        st.caption(
+            "Pairs flagged because the algorithm couldn't pick a single "
+            "best match (e.g. multiple equally-good candidates). Use the "
+            "left/right indices to disambiguate manually."
+        )
+        st.dataframe(result.review, width="stretch", hide_index=True)
+
+with tab_left:
+    if result.unmatched_left.empty:
+        st.info("Every left row was matched.")
+    else:
+        st.dataframe(result.unmatched_left, width="stretch", hide_index=True)
+
+with tab_right:
+    if result.unmatched_right.empty:
+        st.info("Every right row was matched.")
+    else:
+        st.dataframe(result.unmatched_right, width="stretch", hide_index=True)
+
+# ---------------------------------------------------------------------------
+# Downloads
+# ---------------------------------------------------------------------------
+
+st.divider()
+stem = Path(st.session_state.get("reconcile_left_name", "reconcile")).stem
+
+dl_a, dl_b, dl_c, dl_d = st.columns(4)
+with dl_a:
+    html_download_button(
+        "Matched CSV",
+        result.matched.to_csv(index=False).encode("utf-8-sig"),
+        file_name=f"{stem}_matched.csv",
+        mime="text/csv",
+        disabled=result.matched.empty,
+    )
+with dl_b:
+    html_download_button(
+        "Review CSV",
+        result.review.to_csv(index=False).encode("utf-8-sig"),
+        file_name=f"{stem}_review.csv",
+        mime="text/csv",
+        disabled=result.review.empty,
+    )
+with dl_c:
+    html_download_button(
+        "Unmatched left",
+        result.unmatched_left.to_csv(index=False).encode("utf-8-sig"),
+        file_name=f"{stem}_unmatched_left.csv",
+        mime="text/csv",
+        disabled=result.unmatched_left.empty,
+    )
+with dl_d:
+    html_download_button(
+        "Unmatched right",
+        result.unmatched_right.to_csv(index=False).encode("utf-8-sig"),
+        file_name=f"{stem}_unmatched_right.csv",
+        mime="text/csv",
+        disabled=result.unmatched_right.empty,
+    )
--- a/src/gui/tools_registry.py
+++ b/src/gui/tools_registry.py
@@ -157,6 +157,18 @@ TOOLS: list[Tool] = [
        status="Ready",
        section="transformations",
    ),
+    Tool(
+        tool_id="11_reconciler",
+        icon=":material/compare_arrows:",
+        name="Reconcile Two Files",
+        description=(
+            "Match transactions between two sources (e.g. bank feed vs. "
+            "ledger) with amount and date tolerance."
+        ),
+        page_slug="11_Reconciler",
+        status="Ready",
+        section="automations",
+    ),
 ]


--- a/tests/test_reconcile.py
+++ b/tests/test_reconcile.py
@@ -0,0 +1,317 @@
+"""Tests for src.core.reconcile — two-source matching engine."""
+
+import pandas as pd
+import pytest
+
+from src.core.reconcile import (
+    ReconcileOptions,
+    ReconcileResult,
+    reconcile,
+)
+
+
+def _bank(rows):
+    return pd.DataFrame(rows, columns=["date", "amount", "desc"])
+
+
+def _ledger(rows):
+    return pd.DataFrame(rows, columns=["posted", "amt", "memo"])
+
+
+class TestExactMatch:
+    def test_one_to_one_exact(self):
+        left = _bank([
+            ("2026-01-05", 100.00, "ACME"),
+            ("2026-01-06", 250.00, "WIDGET CO"),
+        ])
+        right = _ledger([
+            ("2026-01-05", 100.00, "Acme Inc"),
+            ("2026-01-06", 250.00, "Widget"),
+        ])
+        result = reconcile(left, right, ReconcileOptions(
+            left_amount="amount", right_amount="amt",
+            left_date="date", right_date="posted",
+        ))
+        assert result.stats["matched"] == 2
+        assert result.stats["unmatched_left"] == 0
+        assert result.stats["unmatched_right"] == 0
+        assert (result.matched["match_pass"] == "exact").all()
+
+    def test_unmatched_left_and_right(self):
+        left = _bank([
+            ("2026-01-05", 100.00, "ACME"),
+            ("2026-01-07", 99.99, "ONLY ON LEFT"),
+        ])
+        right = _ledger([
+            ("2026-01-05", 100.00, "Acme"),
+            ("2026-01-08", 500.00, "Only on right"),
+        ])
+        result = reconcile(left, right, ReconcileOptions(
+            left_amount="amount", right_amount="amt",
+            left_date="date", right_date="posted",
+        ))
+        assert result.stats["matched"] == 1
+        assert result.stats["unmatched_left"] == 1
+        assert result.stats["unmatched_right"] == 1
+        # The unmatched rows preserve their original columns.
+        assert "ONLY ON LEFT" in result.unmatched_left["desc"].tolist()
+        assert "Only on right" in result.unmatched_right["memo"].tolist()
+
+    def test_amount_only_no_date(self):
+        # No date columns set — match purely on amount. Distinct
+        # amounts pair off one-to-one.
+        left = _bank([
+            ("2026-01-01", 42.50, "A"),
+            ("2026-02-15", 99.00, "B"),
+        ])
+        right = _ledger([
+            ("2099-12-31", 42.50, "X"),
+            ("1970-01-01", 99.00, "Y"),
+        ])
+        result = reconcile(left, right, ReconcileOptions(
+            left_amount="amount", right_amount="amt",
+        ))
+        assert result.stats["matched"] == 2
+
+    def test_identical_amounts_with_no_date_are_ambiguous(self):
+        # Without a date column to disambiguate, two left rows with
+        # the same amount and two right rows with the same amount
+        # are genuinely undecidable — route to review.
+        left = _bank([
+            ("2026-01-01", 42.50, "A"),
+            ("2026-02-15", 42.50, "B"),
+        ])
+        right = _ledger([
+            ("2099-12-31", 42.50, "X"),
+            ("1970-01-01", 42.50, "Y"),
+        ])
+        result = reconcile(left, right, ReconcileOptions(
+            left_amount="amount", right_amount="amt",
+        ))
+        assert result.stats["matched"] == 0
+        assert result.stats["review"] >= 2
+
+
+class TestAmountTolerance:
+    def test_amount_within_tolerance(self):
+        left = _bank([("2026-01-05", 100.00, "A")])
+        right = _ledger([("2026-01-05", 100.02, "X")])
+        # Exact pass misses (100.00 != 100.02). Tolerance pass catches it.
+        result = reconcile(left, right, ReconcileOptions(
+            left_amount="amount", right_amount="amt",
+            left_date="date", right_date="posted",
+            amount_tolerance=0.05,
+        ))
+        assert result.stats["matched"] == 1
+        assert result.matched.iloc[0]["match_pass"] == "tolerance"
+        assert abs(result.matched.iloc[0]["amount_diff"] - -0.02) < 1e-9
+
+    def test_outside_tolerance_unmatched(self):
+        left = _bank([("2026-01-05", 100.00, "A")])
+        right = _ledger([("2026-01-05", 100.50, "X")])
+        result = reconcile(left, right, ReconcileOptions(
+            left_amount="amount", right_amount="amt",
+            left_date="date", right_date="posted",
+            amount_tolerance=0.05,
+        ))
+        assert result.stats["matched"] == 0
+        assert result.stats["unmatched_left"] == 1
+        assert result.stats["unmatched_right"] == 1
+
+
+class TestDateWindow:
+    def test_date_within_window(self):
+        left = _bank([("2026-01-05", 100.00, "A")])
+        right = _ledger([("2026-01-07", 100.00, "X")])  # 2 days later
+        result = reconcile(left, right, ReconcileOptions(
+            left_amount="amount", right_amount="amt",
+            left_date="date", right_date="posted",
+            date_tolerance_days=3,
+        ))
+        assert result.stats["matched"] == 1
+        assert result.matched.iloc[0]["date_diff_days"] == -2
+
+    def test_date_outside_window(self):
+        left = _bank([("2026-01-05", 100.00, "A")])
+        right = _ledger([("2026-01-20", 100.00, "X")])  # 15 days later
+        result = reconcile(left, right, ReconcileOptions(
+            left_amount="amount", right_amount="amt",
+            left_date="date", right_date="posted",
+            date_tolerance_days=5,
+        ))
+        assert result.stats["matched"] == 0
+
+
+class TestSignInversion:
+    def test_invert_right_sign(self):
+        # Bank: deposit = +100 ; Ledger: deposit recorded as -100.
+        left = _bank([("2026-01-05", 100.00, "A")])
+        right = _ledger([("2026-01-05", -100.00, "X")])
+        # Without inversion: no match.
+        r1 = reconcile(left, right, ReconcileOptions(
+            left_amount="amount", right_amount="amt",
+            left_date="date", right_date="posted",
+        ))
+        assert r1.stats["matched"] == 0
+        # With inversion: match.
+        r2 = reconcile(left, right, ReconcileOptions(
+            left_amount="amount", right_amount="amt",
+            left_date="date", right_date="posted",
+            invert_right_sign=True,
+        ))
+        assert r2.stats["matched"] == 1
+
+
+class TestAmbiguity:
+    def test_two_equal_candidates_go_to_review(self):
+        # One left row, two identical right rows → ambiguous.
+        left = _bank([("2026-01-05", 100.00, "A")])
+        right = _ledger([
+            ("2026-01-05", 100.00, "X"),
+            ("2026-01-05", 100.00, "Y"),
+        ])
+        result = reconcile(left, right, ReconcileOptions(
+            left_amount="amount", right_amount="amt",
+            left_date="date", right_date="posted",
+        ))
+        assert result.stats["matched"] == 0
+        assert result.stats["review"] == 2  # both candidate pairs flagged
+        # Left was consumed by the ambiguity, both rights too.
+        assert result.stats["unmatched_left"] == 0
+        assert result.stats["unmatched_right"] == 0
+
+    def test_uniquely_better_match_wins(self):
+        # Two left rows, two right rows; one pair is a closer match.
+        left = _bank([
+            ("2026-01-05", 100.00, "A"),
+            ("2026-01-05", 100.05, "B"),
+        ])
+        right = _ledger([
+            ("2026-01-05", 100.00, "X"),  # closer to A
+            ("2026-01-05", 100.05, "Y"),  # closer to B
+        ])
+        result = reconcile(left, right, ReconcileOptions(
+            left_amount="amount", right_amount="amt",
+            left_date="date", right_date="posted",
+            amount_tolerance=0.10,
+        ))
+        # Both should pair uniquely on the exact pass (penalty inside
+        # exact pass breaks the symmetric near-ties).
+        assert result.stats["matched"] == 2
+
+
+class TestKeyMatch:
+    def test_reference_number_authoritative(self):
+        # Same check number, same amount, different posting dates.
+        # Key match should pair them even though dates differ.
+        left = pd.DataFrame([
+            {"date": "2026-01-05", "amount": 100.00, "check_no": "1042"},
+        ])
+        right = pd.DataFrame([
+            {"posted": "2026-01-12", "amt": 100.00, "ref": "1042"},
+        ])
+        result = reconcile(left, right, ReconcileOptions(
+            left_amount="amount", right_amount="amt",
+            left_date="date", right_date="posted",
+            left_keys=["check_no"], right_keys=["ref"],
+            date_tolerance_days=0,  # exact-pass would miss
+        ))
+        assert result.stats["matched"] == 1
+        assert result.matched.iloc[0]["match_pass"] == "key"
+
+    def test_key_requires_amount_to_tie(self):
+        # Same ref but mismatched amounts → not a key match.
+        left = pd.DataFrame([
+            {"date": "2026-01-05", "amount": 100.00, "check_no": "1042"},
+        ])
+        right = pd.DataFrame([
+            {"posted": "2026-01-05", "amt": 200.00, "ref": "1042"},
+        ])
+        result = reconcile(left, right, ReconcileOptions(
+            left_amount="amount", right_amount="amt",
+            left_date="date", right_date="posted",
+            left_keys=["check_no"], right_keys=["ref"],
+        ))
+        assert result.stats["matched"] == 0
+
+
+class TestInputValidation:
+    def test_missing_amount_columns(self):
+        left = _bank([("2026-01-05", 100.00, "A")])
+        right = _ledger([("2026-01-05", 100.00, "X")])
+        with pytest.raises(ValueError, match="left_amount"):
+            reconcile(left, right, ReconcileOptions(
+                right_amount="amt",
+            ))
+
+    def test_left_date_without_right_date(self):
+        left = _bank([("2026-01-05", 100.00, "A")])
+        right = _ledger([("2026-01-05", 100.00, "X")])
+        with pytest.raises(ValueError, match="both be set or both be None"):
+            reconcile(left, right, ReconcileOptions(
+                left_amount="amount", right_amount="amt",
+                left_date="date",  # right_date missing
+            ))
+
+    def test_mismatched_key_lengths(self):
+        left = _bank([("2026-01-05", 100.00, "A")])
+        right = _ledger([("2026-01-05", 100.00, "X")])
+        with pytest.raises(ValueError, match="same length"):
+            reconcile(left, right, ReconcileOptions(
+                left_amount="amount", right_amount="amt",
+                left_keys=["a", "b"], right_keys=["x"],
+            ))
+
+    def test_negative_tolerance_rejected(self):
+        left = _bank([("2026-01-05", 100.00, "A")])
+        right = _ledger([("2026-01-05", 100.00, "X")])
+        with pytest.raises(ValueError, match="amount_tolerance"):
+            reconcile(left, right, ReconcileOptions(
+                left_amount="amount", right_amount="amt",
+                amount_tolerance=-0.01,
+            ))
+
+
+class TestUnparseableInputs:
+    def test_non_numeric_amount_falls_through(self):
+        # Left row with garbage amount should land in unmatched_left
+        # (it can't participate in matching but must be visible).
+        left = pd.DataFrame([
+            {"date": "2026-01-05", "amount": "not a number", "desc": "BAD"},
+            {"date": "2026-01-05", "amount": 100.00, "desc": "OK"},
+        ])
+        right = _ledger([("2026-01-05", 100.00, "X")])
+        result = reconcile(left, right, ReconcileOptions(
+            left_amount="amount", right_amount="amt",
+            left_date="date", right_date="posted",
+        ))
+        assert result.stats["matched"] == 1
+        # The garbage row appears in unmatched_left.
+        assert "BAD" in result.unmatched_left["desc"].tolist()
+
+
+class TestResultShape:
+    def test_matched_carries_both_sides(self):
+        left = _bank([("2026-01-05", 100.00, "ACME")])
+        right = _ledger([("2026-01-05", 100.00, "Acme Inc")])
+        result = reconcile(left, right, ReconcileOptions(
+            left_amount="amount", right_amount="amt",
+            left_date="date", right_date="posted",
+        ))
+        row = result.matched.iloc[0]
+        assert row["left_desc"] == "ACME"
+        assert row["right_memo"] == "Acme Inc"
+        assert row["left_amount"] == 100.00
+        assert row["right_amt"] == 100.00
+
+    def test_empty_inputs_return_empty_result(self):
+        left = _bank([])
+        right = _ledger([])
+        result = reconcile(left, right, ReconcileOptions(
+            left_amount="amount", right_amount="amt",
+            left_date="date", right_date="posted",
+        ))
+        assert result.stats["matched"] == 0
+        assert result.matched.empty
+        assert result.unmatched_left.empty
+        assert result.unmatched_right.empty