feat(reconcile): auto-detect role columns, preview result tabs

Match-settings selectors now reorder per side to match the file's column order, using name heuristics (amount / date / desc) so a typical bank CSV reads Date → Description → Amount → Reference without manual fiddling. Detected columns also pre-fill as the default selection. Result tabs render at most 25 rows with a "preview of N of M" caption; full data is still available via the existing download buttons. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-20 22:39:47 +00:00
parent e44af3a45e
commit d090f8cb5e
1 changed files with 152 additions and 39 deletions
--- a/src/gui/pages/11_Reconciler.py
+++ b/src/gui/pages/11_Reconciler.py
@@ -122,34 +122,119 @@ st.subheader("Match settings")
 map_left, map_right = st.columns(2)


-def _col_pick(label: str, df: pd.DataFrame, key: str, *, allow_none: bool):
-    """Selectbox for picking a column. Optional 'None' slot for date/desc."""
+# Name-pattern heuristics for auto-detecting which file column plays
+# which semantic role. First-match-wins per role, scanning the file's
+# columns in their on-disk order so a Date selector lands where the
+# user expects to see it.
+_AMOUNT_PATTERNS = ("amount", "amt", "value", "total", "debit", "credit", "sum")
+_DATE_PATTERNS = ("date", "posted", "posting", "transaction date", "txn date", "when")
+_DESC_PATTERNS = ("description", "desc", "memo", "narrative", "payee", "vendor", "name", "details")
+
+
+def _detect_role(columns: list[str], patterns: tuple[str, ...]) -> str | None:
+    """Return the first column whose name contains any of *patterns*."""
+    lowered = [(c, str(c).lower()) for c in columns]
+    for pat in patterns:
+        for original, low in lowered:
+            if pat in low:
+                return original
+    return None
+
+
+def _col_pick(
+    label: str,
+    df: pd.DataFrame,
+    key: str,
+    *,
+    allow_none: bool,
+    default: str | None = None,
+):
+    """Selectbox for picking a column. Pre-selects *default* if present."""
    cols = list(df.columns)
-    if allow_none:
-        cols = ["(none)"] + cols
-    pick = st.selectbox(label, cols, key=key)
+    options = ["(none)"] + cols if allow_none else cols
+    if default is not None and default in cols:
+        index = options.index(default)
+    else:
+        index = 0
+    pick = st.selectbox(label, options, key=key, index=index)
    return None if pick == "(none)" else pick


+def _render_side_pickers(
+    side_label: str,
+    df: pd.DataFrame,
+    key_prefix: str,
+    *,
+    keys_help: str,
+):
+    """Render the four column selectors in the order their detected
+    columns appear in the file. Roles that go undetected fall to the
+    bottom in a stable default order so the layout is predictable
+    when the file uses unusual column names."""
+    st.markdown(f"**{side_label}**")
+    cols = list(df.columns)
+    detected = {
+        "amount": _detect_role(cols, _AMOUNT_PATTERNS),
+        "date":   _detect_role(cols, _DATE_PATTERNS),
+        "desc":   _detect_role(cols, _DESC_PATTERNS),
+    }
+    # Position within the file's column order. Undetected roles get a
+    # large sentinel so they sort to the end without disturbing the
+    # detected ones' relative order.
+    def _position(role: str) -> int:
+        col = detected.get(role)
+        return cols.index(col) if col in cols else 10**9
+
+    # Stable default among undetected: amount → date → desc (the order
+    # a brand-new user is most likely to think about). The multiselect
+    # for reference keys always renders last because it's the least
+    # frequently used.
+    role_order = sorted(["amount", "date", "desc"], key=lambda r: (_position(r), ["amount","date","desc"].index(r)))
+
+    picks: dict[str, str | None] = {}
+    for role in role_order:
+        if role == "amount":
+            picks["amount"] = _col_pick(
+                "Amount column", df, f"{key_prefix}_amount_col",
+                allow_none=False, default=detected["amount"],
+            )
+        elif role == "date":
+            picks["date"] = _col_pick(
+                "Date column (optional)", df, f"{key_prefix}_date_col",
+                allow_none=True, default=detected["date"],
+            )
+        elif role == "desc":
+            picks["desc"] = _col_pick(
+                "Description column (optional)", df, f"{key_prefix}_desc_col",
+                allow_none=True, default=detected["desc"],
+            )
+
+    picks["keys"] = st.multiselect(
+        f"Reference columns ({keys_help})",
+        cols, key=f"{key_prefix}_keys_col",
+    )
+    return picks
+
+
 with map_left:
-    st.markdown("**Left columns**")
-    left_amount = _col_pick("Amount column", left_df, "left_amount_col", allow_none=False)
-    left_date = _col_pick("Date column (optional)", left_df, "left_date_col", allow_none=True)
-    left_desc = _col_pick("Description column (optional)", left_df, "left_desc_col", allow_none=True)
-    left_keys = st.multiselect(
-        "Reference columns (optional, e.g. check / invoice no.)",
-        list(left_df.columns), key="left_keys_col",
+    left_picks = _render_side_pickers(
+        "Left columns", left_df, "left",
+        keys_help="optional, e.g. check / invoice no.",
+    )
+with map_right:
+    right_picks = _render_side_pickers(
+        "Right columns", right_df, "right",
+        keys_help="must match left count",
    )

-with map_right:
-    st.markdown("**Right columns**")
-    right_amount = _col_pick("Amount column", right_df, "right_amount_col", allow_none=False)
-    right_date = _col_pick("Date column (optional)", right_df, "right_date_col", allow_none=True)
-    right_desc = _col_pick("Description column (optional)", right_df, "right_desc_col", allow_none=True)
-    right_keys = st.multiselect(
-        "Reference columns (must match left count)",
-        list(right_df.columns), key="right_keys_col",
-    )
+left_amount = left_picks["amount"]
+left_date = left_picks["date"]
+left_desc = left_picks["desc"]
+left_keys = left_picks["keys"]
+right_amount = right_picks["amount"]
+right_date = right_picks["date"]
+right_desc = right_picks["desc"]
+right_keys = right_picks["keys"]

 # ---------------------------------------------------------------------------
 # Tolerances & options
@@ -253,34 +338,62 @@ tab_matched, tab_review, tab_left, tab_right = st.tabs(
    ]
 )

-with tab_matched:
-    if result.matched.empty:
-        st.info("No matches.")
+# Default preview cap. The full data is still available via the download
+# buttons below — capping the on-screen render keeps a 50k-row recon
+# from freezing the Streamlit page on first paint.
+_PREVIEW_ROWS = 25
+
+
+def _render_preview(
+    df: pd.DataFrame,
+    *,
+    empty_msg: str,
+    caption: str | None = None,
+):
+    """Show a row-limited preview of *df* with a header caption."""
+    if df.empty:
+        st.info(empty_msg)
+        return
+    if caption:
+        st.caption(caption)
+    if len(df) > _PREVIEW_ROWS:
+        st.caption(
+            f"Preview of first {_PREVIEW_ROWS} of {len(df)} rows — "
+            f"download the CSV below for the full set."
+        )
    else:
-        st.dataframe(result.matched, width="stretch", hide_index=True)
+        st.caption(f"All {len(df)} rows shown.")
+    st.dataframe(df.head(_PREVIEW_ROWS), width="stretch", hide_index=True)
+
+
+with tab_matched:
+    _render_preview(
+        result.matched,
+        empty_msg="No matches.",
+    )

 with tab_review:
-    if result.review.empty:
-        st.info("Nothing to review — no ambiguous candidates.")
-    else:
-        st.caption(
+    _render_preview(
+        result.review,
+        empty_msg="Nothing to review — no ambiguous candidates.",
+        caption=(
            "Pairs flagged because the algorithm couldn't pick a single "
            "best match (e.g. multiple equally-good candidates). Use the "
            "left/right indices to disambiguate manually."
-        )
-        st.dataframe(result.review, width="stretch", hide_index=True)
+        ),
+    )

 with tab_left:
-    if result.unmatched_left.empty:
-        st.info("Every left row was matched.")
-    else:
-        st.dataframe(result.unmatched_left, width="stretch", hide_index=True)
+    _render_preview(
+        result.unmatched_left,
+        empty_msg="Every left row was matched.",
+    )

 with tab_right:
-    if result.unmatched_right.empty:
-        st.info("Every right row was matched.")
-    else:
-        st.dataframe(result.unmatched_right, width="stretch", hide_index=True)
+    _render_preview(
+        result.unmatched_right,
+        empty_msg="Every right row was matched.",
+    )

 # ---------------------------------------------------------------------------
 # Downloads