feat: inline checkboxes and column dropdowns in match group editor

Replace separate checkbox row and "Customize columns" toggle with a unified st.data_editor grid — Keep checkboxes at the start of each row, differing columns render as inline selectbox dropdowns. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-29 00:10:16 +00:00
parent 863fe89f2c
commit d368cad89d
2 changed files with 121 additions and 98 deletions
--- a/src/gui/app.py
+++ b/src/gui/app.py
@@ -215,6 +215,9 @@ if uploaded is not None:
                def _clear_all():
                    st.session_state["review_decisions"] = {}
                    for k in list(st.session_state):
                        if k.startswith("editor_"):
                            del st.session_state[k]
                action_left, action_mid, action_right = st.columns(3)
                with action_left:
--- a/src/gui/components.py
+++ b/src/gui/components.py
@@ -318,37 +318,36 @@ def match_group_card(
    differing_cols = _find_differing_cols(group, df, display_cols)
    with st.expander(label, expanded=expanded):
        # Build comparison DataFrame
        rows_data = []
        for idx in group.row_indices:
            row = {"_row": idx + 1}
            for col in display_cols:
                row[col] = df.iloc[idx].get(col, "")
            rows_data.append(row)
        compare_df = pd.DataFrame(rows_data)
        compare_df = compare_df.set_index("_row")
        # Highlight differences
        def _highlight_diffs(s: pd.Series) -> list[str]:
            """Highlight cells that differ from the first row."""
            styles = []
            first_val = str(s.iloc[0]).strip() if len(s) > 0 else ""
            for val in s:
                val_str = str(val).strip()
                if val_str != first_val and val_str and first_val:
                    styles.append("background-color: rgba(245, 166, 35, 0.2)")
                elif not val_str and first_val:
                    styles.append("background-color: rgba(240, 82, 82, 0.1)")
                else:
                    styles.append("")
            return styles
        styled = compare_df.style.apply(_highlight_diffs, axis=0)
        st.dataframe(styled, use_container_width=True)
        if has_decision:
-            # --- Decided state: show summary + undo ---
+            # --- Decided state: read-only table with diff highlighting ---
            rows_data = []
            for idx in group.row_indices:
                row = {"Row": idx + 1}
                for col in display_cols:
                    row[col] = df.iloc[idx].get(col, "")
                rows_data.append(row)
            compare_df = pd.DataFrame(rows_data).set_index("Row")
            def _highlight_diffs(s: pd.Series) -> list[str]:
                styles = []
                first_val = str(s.iloc[0]).strip() if len(s) > 0 else ""
                for val in s:
                    val_str = str(val).strip()
                    if val_str != first_val and val_str and first_val:
                        styles.append(
                            "background-color: rgba(245, 166, 35, 0.2)"
                        )
                    elif not val_str and first_val:
                        styles.append(
                            "background-color: rgba(240, 82, 82, 0.1)"
                        )
                    else:
                        styles.append("")
                return styles
            styled = compare_df.style.apply(_highlight_diffs, axis=0)
            st.dataframe(styled, use_container_width=True)
            if len(keep_indices) == n_rows:
                st.info("Decision: Kept All")
            elif len(keep_indices) == 1:
@@ -363,97 +362,118 @@ def match_group_card(
                    f"(removing {n_rows - len(keep_indices)})"
                )
-            def _undo(g=gid, indices=group.row_indices, diff=differing_cols):
+            def _undo(g=gid):
                st.session_state["review_decisions"].pop(g, None)
-                st.session_state.pop(f"customize_{g}", None)
+                st.session_state.pop(f"editor_{g}", None)
                for idx in indices:
                    st.session_state.pop(f"keep_{g}_{idx}", None)
                for c in diff:
                    st.session_state.pop(f"col_{g}_{c}", None)
            st.button("Undo", key=f"undo_{gid}", on_click=_undo)
        else:
            # --- Row selection checkboxes ---
            st.caption("Select rows to keep:")
            chk_cols = st.columns(n_rows)
            for i, idx in enumerate(group.row_indices):
                with chk_cols[i]:
                    st.checkbox(
                        f"Row {idx + 1}",
                        value=True,
                        key=f"keep_{gid}_{idx}",
                    )
-            # Read current checkbox state
+        else:
-            checked = [
+            # --- Undecided: interactive editor with inline checkboxes & dropdowns ---
-                idx for idx in group.row_indices
+            editor_rows = []
-                if st.session_state.get(f"keep_{gid}_{idx}", True)
+            for idx in group.row_indices:
                row_data = {"Keep": True, "Row": idx + 1}
                for col in display_cols:
                    row_data[col] = str(df.iloc[idx].get(col, ""))
                editor_rows.append(row_data)
            editor_df = pd.DataFrame(editor_rows)
            col_config = {
                "Keep": st.column_config.CheckboxColumn(
                    "Keep", default=True, width="small",
                ),
                "Row": st.column_config.NumberColumn("Row", width="small"),
            }
            for col in differing_cols:
                vals = []
                for idx in group.row_indices:
                    v = str(df.iloc[idx].get(col, "")).strip()
                    if v not in vals:
                        vals.append(v)
                if "" not in vals:
                    vals.append("")
                col_config[col] = st.column_config.SelectboxColumn(
                    col, options=vals, required=False,
                )
            disabled_cols = ["Row"] + [
                c for c in display_cols if c not in differing_cols
            ]
-            # --- Customize columns (only when exactly 1 row kept) ---
+            edited = st.data_editor(
-            if len(checked) == 1 and differing_cols:
+                editor_df,
-                customize = st.checkbox(
+                column_config=col_config,
-                    f"Customize columns ({len(differing_cols)} differ)",
+                disabled=disabled_cols,
-                    key=f"customize_{gid}",
+                use_container_width=True,
-                    value=False,
+                hide_index=True,
                key=f"editor_{gid}",
            )
            # Read which rows are checked
            checked = [
                idx
                for i, idx in enumerate(group.row_indices)
                if edited.iloc[i]["Keep"]
            ]
            if differing_cols:
                st.caption(
                    f"Columns with differences (editable): "
                    f"{', '.join(differing_cols)}"
                )
                if customize:
                    survivor_idx = checked[0]
                    base_pos = group.row_indices.index(survivor_idx)
                    st.caption("Pick which row's value to use for each column:")
                    for col in differing_cols:
                        def _fmt(idx: int, c: str = col) -> str:
                            val = str(
                                st.session_state["df"].iloc[idx].get(c, "")
                            ).strip()
                            return f"Row {idx + 1}: {val or '(empty)'}"
-                        st.selectbox(
+            # Status
                            col,
                            options=group.row_indices,
                            index=base_pos,
                            format_func=_fmt,
                            key=f"col_{gid}_{col}",
                        )
            # --- Status caption ---
            if len(checked) == 0:
                st.warning("Select at least one row to keep.")
            elif len(checked) == n_rows:
-                st.caption("Keeping all rows (no duplicates removed from this group)")
+                st.caption("Keeping all rows (no duplicates removed)")
            elif len(checked) == 1:
-                st.caption(f"Will merge into Row {checked[0] + 1}, "
+                st.caption(
-                           f"removing {n_rows - 1} row(s)")
+                    f"Merging into Row {checked[0] + 1}, "
                    f"removing {n_rows - 1} row(s)"
                )
            else:
-                removed = n_rows - len(checked)
+                st.caption(
-                st.caption(f"Will keep {len(checked)} rows, "
+                    f"Keeping {len(checked)} rows, "
-                           f"removing {removed}")
+                    f"removing {n_rows - len(checked)}"
                )
-            # --- Confirm button ---
+            # Confirm
            def _on_confirm(
-                g=gid, indices=group.row_indices, diff=differing_cols,
+                g=gid, indices=list(group.row_indices),
                diff=differing_cols,
            ):
-                keep = [
+                editor_state = st.session_state.get(f"editor_{g}", {})
-                    idx for idx in indices
+                ed_rows = editor_state.get("edited_rows", {})
-                    if st.session_state.get(f"keep_{g}_{idx}", True)
+
-                ]
+                # Determine which rows to keep
-                # Safety: never remove all rows
+                keep = []
                for i, idx in enumerate(indices):
                    changes = ed_rows.get(i, {})
                    if changes.get("Keep", True):
                        keep.append(idx)
                if not keep:
                    keep = list(indices)
                # Column overrides (single-survivor merge only)
                ovr: dict[str, str] = {}
                # Column overrides only apply for single-survivor merge
                if len(keep) == 1:
                    surv_idx = keep[0]
                    surv_pos = indices.index(surv_idx)
                    surv_changes = ed_rows.get(surv_pos, {})
                    the_df = st.session_state["df"]
                    base_idx = keep[0]
                    for c in diff:
-                        col_key = f"col_{g}_{c}"
+                        if c in surv_changes:
-                        if col_key in st.session_state:
+                            new_val = (
-                            source_idx = st.session_state[col_key]
+                                str(surv_changes[c])
-                            if source_idx != base_idx:
+                                if surv_changes[c] is not None
-                                ovr[c] = str(
+                                else ""
-                                    the_df.iloc[source_idx].get(c, "")
+                            )
-                                )
+                            orig = str(
                                the_df.iloc[surv_idx].get(c, "")
                            ).strip()
                            if new_val.strip() != orig:
                                ovr[c] = new_val
                st.session_state["review_decisions"][g] = {
                    "keep_indices": keep,