feat: inline checkboxes and column dropdowns in match group editor

Replace separate checkbox row and "Customize columns" toggle with a
unified st.data_editor grid — Keep checkboxes at the start of each row,
differing columns render as inline selectbox dropdowns.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-29 00:10:16 +00:00
parent 863fe89f2c
commit d368cad89d
2 changed files with 121 additions and 98 deletions

View File

@@ -215,6 +215,9 @@ if uploaded is not None:
def _clear_all(): def _clear_all():
st.session_state["review_decisions"] = {} st.session_state["review_decisions"] = {}
for k in list(st.session_state):
if k.startswith("editor_"):
del st.session_state[k]
action_left, action_mid, action_right = st.columns(3) action_left, action_mid, action_right = st.columns(3)
with action_left: with action_left:

View File

@@ -318,37 +318,36 @@ def match_group_card(
differing_cols = _find_differing_cols(group, df, display_cols) differing_cols = _find_differing_cols(group, df, display_cols)
with st.expander(label, expanded=expanded): with st.expander(label, expanded=expanded):
# Build comparison DataFrame
rows_data = []
for idx in group.row_indices:
row = {"_row": idx + 1}
for col in display_cols:
row[col] = df.iloc[idx].get(col, "")
rows_data.append(row)
compare_df = pd.DataFrame(rows_data)
compare_df = compare_df.set_index("_row")
# Highlight differences
def _highlight_diffs(s: pd.Series) -> list[str]:
"""Highlight cells that differ from the first row."""
styles = []
first_val = str(s.iloc[0]).strip() if len(s) > 0 else ""
for val in s:
val_str = str(val).strip()
if val_str != first_val and val_str and first_val:
styles.append("background-color: rgba(245, 166, 35, 0.2)")
elif not val_str and first_val:
styles.append("background-color: rgba(240, 82, 82, 0.1)")
else:
styles.append("")
return styles
styled = compare_df.style.apply(_highlight_diffs, axis=0)
st.dataframe(styled, use_container_width=True)
if has_decision: if has_decision:
# --- Decided state: show summary + undo --- # --- Decided state: read-only table with diff highlighting ---
rows_data = []
for idx in group.row_indices:
row = {"Row": idx + 1}
for col in display_cols:
row[col] = df.iloc[idx].get(col, "")
rows_data.append(row)
compare_df = pd.DataFrame(rows_data).set_index("Row")
def _highlight_diffs(s: pd.Series) -> list[str]:
styles = []
first_val = str(s.iloc[0]).strip() if len(s) > 0 else ""
for val in s:
val_str = str(val).strip()
if val_str != first_val and val_str and first_val:
styles.append(
"background-color: rgba(245, 166, 35, 0.2)"
)
elif not val_str and first_val:
styles.append(
"background-color: rgba(240, 82, 82, 0.1)"
)
else:
styles.append("")
return styles
styled = compare_df.style.apply(_highlight_diffs, axis=0)
st.dataframe(styled, use_container_width=True)
if len(keep_indices) == n_rows: if len(keep_indices) == n_rows:
st.info("Decision: Kept All") st.info("Decision: Kept All")
elif len(keep_indices) == 1: elif len(keep_indices) == 1:
@@ -363,97 +362,118 @@ def match_group_card(
f"(removing {n_rows - len(keep_indices)})" f"(removing {n_rows - len(keep_indices)})"
) )
def _undo(g=gid, indices=group.row_indices, diff=differing_cols): def _undo(g=gid):
st.session_state["review_decisions"].pop(g, None) st.session_state["review_decisions"].pop(g, None)
st.session_state.pop(f"customize_{g}", None) st.session_state.pop(f"editor_{g}", None)
for idx in indices:
st.session_state.pop(f"keep_{g}_{idx}", None)
for c in diff:
st.session_state.pop(f"col_{g}_{c}", None)
st.button("Undo", key=f"undo_{gid}", on_click=_undo) st.button("Undo", key=f"undo_{gid}", on_click=_undo)
else:
# --- Row selection checkboxes ---
st.caption("Select rows to keep:")
chk_cols = st.columns(n_rows)
for i, idx in enumerate(group.row_indices):
with chk_cols[i]:
st.checkbox(
f"Row {idx + 1}",
value=True,
key=f"keep_{gid}_{idx}",
)
# Read current checkbox state else:
checked = [ # --- Undecided: interactive editor with inline checkboxes & dropdowns ---
idx for idx in group.row_indices editor_rows = []
if st.session_state.get(f"keep_{gid}_{idx}", True) for idx in group.row_indices:
row_data = {"Keep": True, "Row": idx + 1}
for col in display_cols:
row_data[col] = str(df.iloc[idx].get(col, ""))
editor_rows.append(row_data)
editor_df = pd.DataFrame(editor_rows)
col_config = {
"Keep": st.column_config.CheckboxColumn(
"Keep", default=True, width="small",
),
"Row": st.column_config.NumberColumn("Row", width="small"),
}
for col in differing_cols:
vals = []
for idx in group.row_indices:
v = str(df.iloc[idx].get(col, "")).strip()
if v not in vals:
vals.append(v)
if "" not in vals:
vals.append("")
col_config[col] = st.column_config.SelectboxColumn(
col, options=vals, required=False,
)
disabled_cols = ["Row"] + [
c for c in display_cols if c not in differing_cols
] ]
# --- Customize columns (only when exactly 1 row kept) --- edited = st.data_editor(
if len(checked) == 1 and differing_cols: editor_df,
customize = st.checkbox( column_config=col_config,
f"Customize columns ({len(differing_cols)} differ)", disabled=disabled_cols,
key=f"customize_{gid}", use_container_width=True,
value=False, hide_index=True,
key=f"editor_{gid}",
)
# Read which rows are checked
checked = [
idx
for i, idx in enumerate(group.row_indices)
if edited.iloc[i]["Keep"]
]
if differing_cols:
st.caption(
f"Columns with differences (editable): "
f"{', '.join(differing_cols)}"
) )
if customize:
survivor_idx = checked[0]
base_pos = group.row_indices.index(survivor_idx)
st.caption("Pick which row's value to use for each column:")
for col in differing_cols:
def _fmt(idx: int, c: str = col) -> str:
val = str(
st.session_state["df"].iloc[idx].get(c, "")
).strip()
return f"Row {idx + 1}: {val or '(empty)'}"
st.selectbox( # Status
col,
options=group.row_indices,
index=base_pos,
format_func=_fmt,
key=f"col_{gid}_{col}",
)
# --- Status caption ---
if len(checked) == 0: if len(checked) == 0:
st.warning("Select at least one row to keep.") st.warning("Select at least one row to keep.")
elif len(checked) == n_rows: elif len(checked) == n_rows:
st.caption("Keeping all rows (no duplicates removed from this group)") st.caption("Keeping all rows (no duplicates removed)")
elif len(checked) == 1: elif len(checked) == 1:
st.caption(f"Will merge into Row {checked[0] + 1}, " st.caption(
f"removing {n_rows - 1} row(s)") f"Merging into Row {checked[0] + 1}, "
f"removing {n_rows - 1} row(s)"
)
else: else:
removed = n_rows - len(checked) st.caption(
st.caption(f"Will keep {len(checked)} rows, " f"Keeping {len(checked)} rows, "
f"removing {removed}") f"removing {n_rows - len(checked)}"
)
# --- Confirm button --- # Confirm
def _on_confirm( def _on_confirm(
g=gid, indices=group.row_indices, diff=differing_cols, g=gid, indices=list(group.row_indices),
diff=differing_cols,
): ):
keep = [ editor_state = st.session_state.get(f"editor_{g}", {})
idx for idx in indices ed_rows = editor_state.get("edited_rows", {})
if st.session_state.get(f"keep_{g}_{idx}", True)
] # Determine which rows to keep
# Safety: never remove all rows keep = []
for i, idx in enumerate(indices):
changes = ed_rows.get(i, {})
if changes.get("Keep", True):
keep.append(idx)
if not keep: if not keep:
keep = list(indices) keep = list(indices)
# Column overrides (single-survivor merge only)
ovr: dict[str, str] = {} ovr: dict[str, str] = {}
# Column overrides only apply for single-survivor merge
if len(keep) == 1: if len(keep) == 1:
surv_idx = keep[0]
surv_pos = indices.index(surv_idx)
surv_changes = ed_rows.get(surv_pos, {})
the_df = st.session_state["df"] the_df = st.session_state["df"]
base_idx = keep[0]
for c in diff: for c in diff:
col_key = f"col_{g}_{c}" if c in surv_changes:
if col_key in st.session_state: new_val = (
source_idx = st.session_state[col_key] str(surv_changes[c])
if source_idx != base_idx: if surv_changes[c] is not None
ovr[c] = str( else ""
the_df.iloc[source_idx].get(c, "") )
) orig = str(
the_df.iloc[surv_idx].get(c, "")
).strip()
if new_val.strip() != orig:
ovr[c] = new_val
st.session_state["review_decisions"][g] = { st.session_state["review_decisions"][g] = {
"keep_indices": keep, "keep_indices": keep,