feat(reconcile): auto-detect role columns, preview result tabs
Match-settings selectors now reorder per side to match the file's column order, using name heuristics (amount / date / desc) so a typical bank CSV reads Date → Description → Amount → Reference without manual fiddling. Detected columns also pre-fill as the default selection. Result tabs render at most 25 rows with a "preview of N of M" caption; full data is still available via the existing download buttons. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -122,34 +122,119 @@ st.subheader("Match settings")
|
|||||||
map_left, map_right = st.columns(2)
|
map_left, map_right = st.columns(2)
|
||||||
|
|
||||||
|
|
||||||
def _col_pick(label: str, df: pd.DataFrame, key: str, *, allow_none: bool):
|
# Name-pattern heuristics for auto-detecting which file column plays
|
||||||
"""Selectbox for picking a column. Optional 'None' slot for date/desc."""
|
# which semantic role. First-match-wins per role, scanning the file's
|
||||||
|
# columns in their on-disk order so a Date selector lands where the
|
||||||
|
# user expects to see it.
|
||||||
|
_AMOUNT_PATTERNS = ("amount", "amt", "value", "total", "debit", "credit", "sum")
|
||||||
|
_DATE_PATTERNS = ("date", "posted", "posting", "transaction date", "txn date", "when")
|
||||||
|
_DESC_PATTERNS = ("description", "desc", "memo", "narrative", "payee", "vendor", "name", "details")
|
||||||
|
|
||||||
|
|
||||||
|
def _detect_role(columns: list[str], patterns: tuple[str, ...]) -> str | None:
|
||||||
|
"""Return the first column whose name contains any of *patterns*."""
|
||||||
|
lowered = [(c, str(c).lower()) for c in columns]
|
||||||
|
for pat in patterns:
|
||||||
|
for original, low in lowered:
|
||||||
|
if pat in low:
|
||||||
|
return original
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _col_pick(
|
||||||
|
label: str,
|
||||||
|
df: pd.DataFrame,
|
||||||
|
key: str,
|
||||||
|
*,
|
||||||
|
allow_none: bool,
|
||||||
|
default: str | None = None,
|
||||||
|
):
|
||||||
|
"""Selectbox for picking a column. Pre-selects *default* if present."""
|
||||||
cols = list(df.columns)
|
cols = list(df.columns)
|
||||||
if allow_none:
|
options = ["(none)"] + cols if allow_none else cols
|
||||||
cols = ["(none)"] + cols
|
if default is not None and default in cols:
|
||||||
pick = st.selectbox(label, cols, key=key)
|
index = options.index(default)
|
||||||
|
else:
|
||||||
|
index = 0
|
||||||
|
pick = st.selectbox(label, options, key=key, index=index)
|
||||||
return None if pick == "(none)" else pick
|
return None if pick == "(none)" else pick
|
||||||
|
|
||||||
|
|
||||||
|
def _render_side_pickers(
|
||||||
|
side_label: str,
|
||||||
|
df: pd.DataFrame,
|
||||||
|
key_prefix: str,
|
||||||
|
*,
|
||||||
|
keys_help: str,
|
||||||
|
):
|
||||||
|
"""Render the four column selectors in the order their detected
|
||||||
|
columns appear in the file. Roles that go undetected fall to the
|
||||||
|
bottom in a stable default order so the layout is predictable
|
||||||
|
when the file uses unusual column names."""
|
||||||
|
st.markdown(f"**{side_label}**")
|
||||||
|
cols = list(df.columns)
|
||||||
|
detected = {
|
||||||
|
"amount": _detect_role(cols, _AMOUNT_PATTERNS),
|
||||||
|
"date": _detect_role(cols, _DATE_PATTERNS),
|
||||||
|
"desc": _detect_role(cols, _DESC_PATTERNS),
|
||||||
|
}
|
||||||
|
# Position within the file's column order. Undetected roles get a
|
||||||
|
# large sentinel so they sort to the end without disturbing the
|
||||||
|
# detected ones' relative order.
|
||||||
|
def _position(role: str) -> int:
|
||||||
|
col = detected.get(role)
|
||||||
|
return cols.index(col) if col in cols else 10**9
|
||||||
|
|
||||||
|
# Stable default among undetected: amount → date → desc (the order
|
||||||
|
# a brand-new user is most likely to think about). The multiselect
|
||||||
|
# for reference keys always renders last because it's the least
|
||||||
|
# frequently used.
|
||||||
|
role_order = sorted(["amount", "date", "desc"], key=lambda r: (_position(r), ["amount","date","desc"].index(r)))
|
||||||
|
|
||||||
|
picks: dict[str, str | None] = {}
|
||||||
|
for role in role_order:
|
||||||
|
if role == "amount":
|
||||||
|
picks["amount"] = _col_pick(
|
||||||
|
"Amount column", df, f"{key_prefix}_amount_col",
|
||||||
|
allow_none=False, default=detected["amount"],
|
||||||
|
)
|
||||||
|
elif role == "date":
|
||||||
|
picks["date"] = _col_pick(
|
||||||
|
"Date column (optional)", df, f"{key_prefix}_date_col",
|
||||||
|
allow_none=True, default=detected["date"],
|
||||||
|
)
|
||||||
|
elif role == "desc":
|
||||||
|
picks["desc"] = _col_pick(
|
||||||
|
"Description column (optional)", df, f"{key_prefix}_desc_col",
|
||||||
|
allow_none=True, default=detected["desc"],
|
||||||
|
)
|
||||||
|
|
||||||
|
picks["keys"] = st.multiselect(
|
||||||
|
f"Reference columns ({keys_help})",
|
||||||
|
cols, key=f"{key_prefix}_keys_col",
|
||||||
|
)
|
||||||
|
return picks
|
||||||
|
|
||||||
|
|
||||||
with map_left:
|
with map_left:
|
||||||
st.markdown("**Left columns**")
|
left_picks = _render_side_pickers(
|
||||||
left_amount = _col_pick("Amount column", left_df, "left_amount_col", allow_none=False)
|
"Left columns", left_df, "left",
|
||||||
left_date = _col_pick("Date column (optional)", left_df, "left_date_col", allow_none=True)
|
keys_help="optional, e.g. check / invoice no.",
|
||||||
left_desc = _col_pick("Description column (optional)", left_df, "left_desc_col", allow_none=True)
|
)
|
||||||
left_keys = st.multiselect(
|
with map_right:
|
||||||
"Reference columns (optional, e.g. check / invoice no.)",
|
right_picks = _render_side_pickers(
|
||||||
list(left_df.columns), key="left_keys_col",
|
"Right columns", right_df, "right",
|
||||||
|
keys_help="must match left count",
|
||||||
)
|
)
|
||||||
|
|
||||||
with map_right:
|
left_amount = left_picks["amount"]
|
||||||
st.markdown("**Right columns**")
|
left_date = left_picks["date"]
|
||||||
right_amount = _col_pick("Amount column", right_df, "right_amount_col", allow_none=False)
|
left_desc = left_picks["desc"]
|
||||||
right_date = _col_pick("Date column (optional)", right_df, "right_date_col", allow_none=True)
|
left_keys = left_picks["keys"]
|
||||||
right_desc = _col_pick("Description column (optional)", right_df, "right_desc_col", allow_none=True)
|
right_amount = right_picks["amount"]
|
||||||
right_keys = st.multiselect(
|
right_date = right_picks["date"]
|
||||||
"Reference columns (must match left count)",
|
right_desc = right_picks["desc"]
|
||||||
list(right_df.columns), key="right_keys_col",
|
right_keys = right_picks["keys"]
|
||||||
)
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Tolerances & options
|
# Tolerances & options
|
||||||
@@ -253,34 +338,62 @@ tab_matched, tab_review, tab_left, tab_right = st.tabs(
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
with tab_matched:
|
# Default preview cap. The full data is still available via the download
|
||||||
if result.matched.empty:
|
# buttons below — capping the on-screen render keeps a 50k-row recon
|
||||||
st.info("No matches.")
|
# from freezing the Streamlit page on first paint.
|
||||||
|
_PREVIEW_ROWS = 25
|
||||||
|
|
||||||
|
|
||||||
|
def _render_preview(
|
||||||
|
df: pd.DataFrame,
|
||||||
|
*,
|
||||||
|
empty_msg: str,
|
||||||
|
caption: str | None = None,
|
||||||
|
):
|
||||||
|
"""Show a row-limited preview of *df* with a header caption."""
|
||||||
|
if df.empty:
|
||||||
|
st.info(empty_msg)
|
||||||
|
return
|
||||||
|
if caption:
|
||||||
|
st.caption(caption)
|
||||||
|
if len(df) > _PREVIEW_ROWS:
|
||||||
|
st.caption(
|
||||||
|
f"Preview of first {_PREVIEW_ROWS} of {len(df)} rows — "
|
||||||
|
f"download the CSV below for the full set."
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
st.dataframe(result.matched, width="stretch", hide_index=True)
|
st.caption(f"All {len(df)} rows shown.")
|
||||||
|
st.dataframe(df.head(_PREVIEW_ROWS), width="stretch", hide_index=True)
|
||||||
|
|
||||||
|
|
||||||
|
with tab_matched:
|
||||||
|
_render_preview(
|
||||||
|
result.matched,
|
||||||
|
empty_msg="No matches.",
|
||||||
|
)
|
||||||
|
|
||||||
with tab_review:
|
with tab_review:
|
||||||
if result.review.empty:
|
_render_preview(
|
||||||
st.info("Nothing to review — no ambiguous candidates.")
|
result.review,
|
||||||
else:
|
empty_msg="Nothing to review — no ambiguous candidates.",
|
||||||
st.caption(
|
caption=(
|
||||||
"Pairs flagged because the algorithm couldn't pick a single "
|
"Pairs flagged because the algorithm couldn't pick a single "
|
||||||
"best match (e.g. multiple equally-good candidates). Use the "
|
"best match (e.g. multiple equally-good candidates). Use the "
|
||||||
"left/right indices to disambiguate manually."
|
"left/right indices to disambiguate manually."
|
||||||
)
|
),
|
||||||
st.dataframe(result.review, width="stretch", hide_index=True)
|
)
|
||||||
|
|
||||||
with tab_left:
|
with tab_left:
|
||||||
if result.unmatched_left.empty:
|
_render_preview(
|
||||||
st.info("Every left row was matched.")
|
result.unmatched_left,
|
||||||
else:
|
empty_msg="Every left row was matched.",
|
||||||
st.dataframe(result.unmatched_left, width="stretch", hide_index=True)
|
)
|
||||||
|
|
||||||
with tab_right:
|
with tab_right:
|
||||||
if result.unmatched_right.empty:
|
_render_preview(
|
||||||
st.info("Every right row was matched.")
|
result.unmatched_right,
|
||||||
else:
|
empty_msg="Every right row was matched.",
|
||||||
st.dataframe(result.unmatched_right, width="stretch", hide_index=True)
|
)
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Downloads
|
# Downloads
|
||||||
|
|||||||
Reference in New Issue
Block a user