From d090f8cb5eaa6db718d6e2e5981f0f0a6f287eee Mon Sep 17 00:00:00 2001 From: Michael Date: Wed, 20 May 2026 22:39:47 +0000 Subject: [PATCH] feat(reconcile): auto-detect role columns, preview result tabs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Match-settings selectors now reorder per side to match the file's column order, using name heuristics (amount / date / desc) so a typical bank CSV reads Date → Description → Amount → Reference without manual fiddling. Detected columns also pre-fill as the default selection. Result tabs render at most 25 rows with a "preview of N of M" caption; full data is still available via the existing download buttons. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/gui/pages/11_Reconciler.py | 191 ++++++++++++++++++++++++++------- 1 file changed, 152 insertions(+), 39 deletions(-) diff --git a/src/gui/pages/11_Reconciler.py b/src/gui/pages/11_Reconciler.py index f9e8054..25aa534 100644 --- a/src/gui/pages/11_Reconciler.py +++ b/src/gui/pages/11_Reconciler.py @@ -122,34 +122,119 @@ st.subheader("Match settings") map_left, map_right = st.columns(2) -def _col_pick(label: str, df: pd.DataFrame, key: str, *, allow_none: bool): - """Selectbox for picking a column. Optional 'None' slot for date/desc.""" +# Name-pattern heuristics for auto-detecting which file column plays +# which semantic role. First-match-wins per role, scanning the file's +# columns in their on-disk order so a Date selector lands where the +# user expects to see it. +_AMOUNT_PATTERNS = ("amount", "amt", "value", "total", "debit", "credit", "sum") +_DATE_PATTERNS = ("date", "posted", "posting", "transaction date", "txn date", "when") +_DESC_PATTERNS = ("description", "desc", "memo", "narrative", "payee", "vendor", "name", "details") + + +def _detect_role(columns: list[str], patterns: tuple[str, ...]) -> str | None: + """Return the first column whose name contains any of *patterns*.""" + lowered = [(c, str(c).lower()) for c in columns] + for pat in patterns: + for original, low in lowered: + if pat in low: + return original + return None + + +def _col_pick( + label: str, + df: pd.DataFrame, + key: str, + *, + allow_none: bool, + default: str | None = None, +): + """Selectbox for picking a column. Pre-selects *default* if present.""" cols = list(df.columns) - if allow_none: - cols = ["(none)"] + cols - pick = st.selectbox(label, cols, key=key) + options = ["(none)"] + cols if allow_none else cols + if default is not None and default in cols: + index = options.index(default) + else: + index = 0 + pick = st.selectbox(label, options, key=key, index=index) return None if pick == "(none)" else pick +def _render_side_pickers( + side_label: str, + df: pd.DataFrame, + key_prefix: str, + *, + keys_help: str, +): + """Render the four column selectors in the order their detected + columns appear in the file. Roles that go undetected fall to the + bottom in a stable default order so the layout is predictable + when the file uses unusual column names.""" + st.markdown(f"**{side_label}**") + cols = list(df.columns) + detected = { + "amount": _detect_role(cols, _AMOUNT_PATTERNS), + "date": _detect_role(cols, _DATE_PATTERNS), + "desc": _detect_role(cols, _DESC_PATTERNS), + } + # Position within the file's column order. Undetected roles get a + # large sentinel so they sort to the end without disturbing the + # detected ones' relative order. + def _position(role: str) -> int: + col = detected.get(role) + return cols.index(col) if col in cols else 10**9 + + # Stable default among undetected: amount → date → desc (the order + # a brand-new user is most likely to think about). The multiselect + # for reference keys always renders last because it's the least + # frequently used. + role_order = sorted(["amount", "date", "desc"], key=lambda r: (_position(r), ["amount","date","desc"].index(r))) + + picks: dict[str, str | None] = {} + for role in role_order: + if role == "amount": + picks["amount"] = _col_pick( + "Amount column", df, f"{key_prefix}_amount_col", + allow_none=False, default=detected["amount"], + ) + elif role == "date": + picks["date"] = _col_pick( + "Date column (optional)", df, f"{key_prefix}_date_col", + allow_none=True, default=detected["date"], + ) + elif role == "desc": + picks["desc"] = _col_pick( + "Description column (optional)", df, f"{key_prefix}_desc_col", + allow_none=True, default=detected["desc"], + ) + + picks["keys"] = st.multiselect( + f"Reference columns ({keys_help})", + cols, key=f"{key_prefix}_keys_col", + ) + return picks + + with map_left: - st.markdown("**Left columns**") - left_amount = _col_pick("Amount column", left_df, "left_amount_col", allow_none=False) - left_date = _col_pick("Date column (optional)", left_df, "left_date_col", allow_none=True) - left_desc = _col_pick("Description column (optional)", left_df, "left_desc_col", allow_none=True) - left_keys = st.multiselect( - "Reference columns (optional, e.g. check / invoice no.)", - list(left_df.columns), key="left_keys_col", + left_picks = _render_side_pickers( + "Left columns", left_df, "left", + keys_help="optional, e.g. check / invoice no.", + ) +with map_right: + right_picks = _render_side_pickers( + "Right columns", right_df, "right", + keys_help="must match left count", ) -with map_right: - st.markdown("**Right columns**") - right_amount = _col_pick("Amount column", right_df, "right_amount_col", allow_none=False) - right_date = _col_pick("Date column (optional)", right_df, "right_date_col", allow_none=True) - right_desc = _col_pick("Description column (optional)", right_df, "right_desc_col", allow_none=True) - right_keys = st.multiselect( - "Reference columns (must match left count)", - list(right_df.columns), key="right_keys_col", - ) +left_amount = left_picks["amount"] +left_date = left_picks["date"] +left_desc = left_picks["desc"] +left_keys = left_picks["keys"] +right_amount = right_picks["amount"] +right_date = right_picks["date"] +right_desc = right_picks["desc"] +right_keys = right_picks["keys"] # --------------------------------------------------------------------------- # Tolerances & options @@ -253,34 +338,62 @@ tab_matched, tab_review, tab_left, tab_right = st.tabs( ] ) -with tab_matched: - if result.matched.empty: - st.info("No matches.") +# Default preview cap. The full data is still available via the download +# buttons below — capping the on-screen render keeps a 50k-row recon +# from freezing the Streamlit page on first paint. +_PREVIEW_ROWS = 25 + + +def _render_preview( + df: pd.DataFrame, + *, + empty_msg: str, + caption: str | None = None, +): + """Show a row-limited preview of *df* with a header caption.""" + if df.empty: + st.info(empty_msg) + return + if caption: + st.caption(caption) + if len(df) > _PREVIEW_ROWS: + st.caption( + f"Preview of first {_PREVIEW_ROWS} of {len(df)} rows — " + f"download the CSV below for the full set." + ) else: - st.dataframe(result.matched, width="stretch", hide_index=True) + st.caption(f"All {len(df)} rows shown.") + st.dataframe(df.head(_PREVIEW_ROWS), width="stretch", hide_index=True) + + +with tab_matched: + _render_preview( + result.matched, + empty_msg="No matches.", + ) with tab_review: - if result.review.empty: - st.info("Nothing to review — no ambiguous candidates.") - else: - st.caption( + _render_preview( + result.review, + empty_msg="Nothing to review — no ambiguous candidates.", + caption=( "Pairs flagged because the algorithm couldn't pick a single " "best match (e.g. multiple equally-good candidates). Use the " "left/right indices to disambiguate manually." - ) - st.dataframe(result.review, width="stretch", hide_index=True) + ), + ) with tab_left: - if result.unmatched_left.empty: - st.info("Every left row was matched.") - else: - st.dataframe(result.unmatched_left, width="stretch", hide_index=True) + _render_preview( + result.unmatched_left, + empty_msg="Every left row was matched.", + ) with tab_right: - if result.unmatched_right.empty: - st.info("Every right row was matched.") - else: - st.dataframe(result.unmatched_right, width="stretch", hide_index=True) + _render_preview( + result.unmatched_right, + empty_msg="Every right row was matched.", + ) # --------------------------------------------------------------------------- # Downloads