feat: inline checkboxes and column dropdowns in match group editor

Replace separate checkbox row and "Customize columns" toggle with a
unified st.data_editor grid — Keep checkboxes at the start of each row,
differing columns render as inline selectbox dropdowns.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-29 00:10:16 +00:00
parent 863fe89f2c
commit d368cad89d
2 changed files with 121 additions and 98 deletions

View File

@@ -318,37 +318,36 @@ def match_group_card(
differing_cols = _find_differing_cols(group, df, display_cols)
with st.expander(label, expanded=expanded):
# Build comparison DataFrame
rows_data = []
for idx in group.row_indices:
row = {"_row": idx + 1}
for col in display_cols:
row[col] = df.iloc[idx].get(col, "")
rows_data.append(row)
compare_df = pd.DataFrame(rows_data)
compare_df = compare_df.set_index("_row")
# Highlight differences
def _highlight_diffs(s: pd.Series) -> list[str]:
"""Highlight cells that differ from the first row."""
styles = []
first_val = str(s.iloc[0]).strip() if len(s) > 0 else ""
for val in s:
val_str = str(val).strip()
if val_str != first_val and val_str and first_val:
styles.append("background-color: rgba(245, 166, 35, 0.2)")
elif not val_str and first_val:
styles.append("background-color: rgba(240, 82, 82, 0.1)")
else:
styles.append("")
return styles
styled = compare_df.style.apply(_highlight_diffs, axis=0)
st.dataframe(styled, use_container_width=True)
if has_decision:
# --- Decided state: show summary + undo ---
# --- Decided state: read-only table with diff highlighting ---
rows_data = []
for idx in group.row_indices:
row = {"Row": idx + 1}
for col in display_cols:
row[col] = df.iloc[idx].get(col, "")
rows_data.append(row)
compare_df = pd.DataFrame(rows_data).set_index("Row")
def _highlight_diffs(s: pd.Series) -> list[str]:
styles = []
first_val = str(s.iloc[0]).strip() if len(s) > 0 else ""
for val in s:
val_str = str(val).strip()
if val_str != first_val and val_str and first_val:
styles.append(
"background-color: rgba(245, 166, 35, 0.2)"
)
elif not val_str and first_val:
styles.append(
"background-color: rgba(240, 82, 82, 0.1)"
)
else:
styles.append("")
return styles
styled = compare_df.style.apply(_highlight_diffs, axis=0)
st.dataframe(styled, use_container_width=True)
if len(keep_indices) == n_rows:
st.info("Decision: Kept All")
elif len(keep_indices) == 1:
@@ -363,97 +362,118 @@ def match_group_card(
f"(removing {n_rows - len(keep_indices)})"
)
def _undo(g=gid, indices=group.row_indices, diff=differing_cols):
def _undo(g=gid):
st.session_state["review_decisions"].pop(g, None)
st.session_state.pop(f"customize_{g}", None)
for idx in indices:
st.session_state.pop(f"keep_{g}_{idx}", None)
for c in diff:
st.session_state.pop(f"col_{g}_{c}", None)
st.session_state.pop(f"editor_{g}", None)
st.button("Undo", key=f"undo_{gid}", on_click=_undo)
else:
# --- Row selection checkboxes ---
st.caption("Select rows to keep:")
chk_cols = st.columns(n_rows)
for i, idx in enumerate(group.row_indices):
with chk_cols[i]:
st.checkbox(
f"Row {idx + 1}",
value=True,
key=f"keep_{gid}_{idx}",
)
# Read current checkbox state
checked = [
idx for idx in group.row_indices
if st.session_state.get(f"keep_{gid}_{idx}", True)
else:
# --- Undecided: interactive editor with inline checkboxes & dropdowns ---
editor_rows = []
for idx in group.row_indices:
row_data = {"Keep": True, "Row": idx + 1}
for col in display_cols:
row_data[col] = str(df.iloc[idx].get(col, ""))
editor_rows.append(row_data)
editor_df = pd.DataFrame(editor_rows)
col_config = {
"Keep": st.column_config.CheckboxColumn(
"Keep", default=True, width="small",
),
"Row": st.column_config.NumberColumn("Row", width="small"),
}
for col in differing_cols:
vals = []
for idx in group.row_indices:
v = str(df.iloc[idx].get(col, "")).strip()
if v not in vals:
vals.append(v)
if "" not in vals:
vals.append("")
col_config[col] = st.column_config.SelectboxColumn(
col, options=vals, required=False,
)
disabled_cols = ["Row"] + [
c for c in display_cols if c not in differing_cols
]
# --- Customize columns (only when exactly 1 row kept) ---
if len(checked) == 1 and differing_cols:
customize = st.checkbox(
f"Customize columns ({len(differing_cols)} differ)",
key=f"customize_{gid}",
value=False,
edited = st.data_editor(
editor_df,
column_config=col_config,
disabled=disabled_cols,
use_container_width=True,
hide_index=True,
key=f"editor_{gid}",
)
# Read which rows are checked
checked = [
idx
for i, idx in enumerate(group.row_indices)
if edited.iloc[i]["Keep"]
]
if differing_cols:
st.caption(
f"Columns with differences (editable): "
f"{', '.join(differing_cols)}"
)
if customize:
survivor_idx = checked[0]
base_pos = group.row_indices.index(survivor_idx)
st.caption("Pick which row's value to use for each column:")
for col in differing_cols:
def _fmt(idx: int, c: str = col) -> str:
val = str(
st.session_state["df"].iloc[idx].get(c, "")
).strip()
return f"Row {idx + 1}: {val or '(empty)'}"
st.selectbox(
col,
options=group.row_indices,
index=base_pos,
format_func=_fmt,
key=f"col_{gid}_{col}",
)
# --- Status caption ---
# Status
if len(checked) == 0:
st.warning("Select at least one row to keep.")
elif len(checked) == n_rows:
st.caption("Keeping all rows (no duplicates removed from this group)")
st.caption("Keeping all rows (no duplicates removed)")
elif len(checked) == 1:
st.caption(f"Will merge into Row {checked[0] + 1}, "
f"removing {n_rows - 1} row(s)")
st.caption(
f"Merging into Row {checked[0] + 1}, "
f"removing {n_rows - 1} row(s)"
)
else:
removed = n_rows - len(checked)
st.caption(f"Will keep {len(checked)} rows, "
f"removing {removed}")
st.caption(
f"Keeping {len(checked)} rows, "
f"removing {n_rows - len(checked)}"
)
# --- Confirm button ---
# Confirm
def _on_confirm(
g=gid, indices=group.row_indices, diff=differing_cols,
g=gid, indices=list(group.row_indices),
diff=differing_cols,
):
keep = [
idx for idx in indices
if st.session_state.get(f"keep_{g}_{idx}", True)
]
# Safety: never remove all rows
editor_state = st.session_state.get(f"editor_{g}", {})
ed_rows = editor_state.get("edited_rows", {})
# Determine which rows to keep
keep = []
for i, idx in enumerate(indices):
changes = ed_rows.get(i, {})
if changes.get("Keep", True):
keep.append(idx)
if not keep:
keep = list(indices)
# Column overrides (single-survivor merge only)
ovr: dict[str, str] = {}
# Column overrides only apply for single-survivor merge
if len(keep) == 1:
surv_idx = keep[0]
surv_pos = indices.index(surv_idx)
surv_changes = ed_rows.get(surv_pos, {})
the_df = st.session_state["df"]
base_idx = keep[0]
for c in diff:
col_key = f"col_{g}_{c}"
if col_key in st.session_state:
source_idx = st.session_state[col_key]
if source_idx != base_idx:
ovr[c] = str(
the_df.iloc[source_idx].get(c, "")
)
if c in surv_changes:
new_val = (
str(surv_changes[c])
if surv_changes[c] is not None
else ""
)
orig = str(
the_df.iloc[surv_idx].get(c, "")
).strip()
if new_val.strip() != orig:
ovr[c] = new_val
st.session_state["review_decisions"][g] = {
"keep_indices": keep,