diff --git a/src/gui/app.py b/src/gui/app.py index b936b0e..9855b89 100644 --- a/src/gui/app.py +++ b/src/gui/app.py @@ -215,6 +215,9 @@ if uploaded is not None: def _clear_all(): st.session_state["review_decisions"] = {} + for k in list(st.session_state): + if k.startswith("editor_"): + del st.session_state[k] action_left, action_mid, action_right = st.columns(3) with action_left: diff --git a/src/gui/components.py b/src/gui/components.py index d319282..df83466 100644 --- a/src/gui/components.py +++ b/src/gui/components.py @@ -318,37 +318,36 @@ def match_group_card( differing_cols = _find_differing_cols(group, df, display_cols) with st.expander(label, expanded=expanded): - # Build comparison DataFrame - rows_data = [] - for idx in group.row_indices: - row = {"_row": idx + 1} - for col in display_cols: - row[col] = df.iloc[idx].get(col, "") - rows_data.append(row) - - compare_df = pd.DataFrame(rows_data) - compare_df = compare_df.set_index("_row") - - # Highlight differences - def _highlight_diffs(s: pd.Series) -> list[str]: - """Highlight cells that differ from the first row.""" - styles = [] - first_val = str(s.iloc[0]).strip() if len(s) > 0 else "" - for val in s: - val_str = str(val).strip() - if val_str != first_val and val_str and first_val: - styles.append("background-color: rgba(245, 166, 35, 0.2)") - elif not val_str and first_val: - styles.append("background-color: rgba(240, 82, 82, 0.1)") - else: - styles.append("") - return styles - - styled = compare_df.style.apply(_highlight_diffs, axis=0) - st.dataframe(styled, use_container_width=True) - if has_decision: - # --- Decided state: show summary + undo --- + # --- Decided state: read-only table with diff highlighting --- + rows_data = [] + for idx in group.row_indices: + row = {"Row": idx + 1} + for col in display_cols: + row[col] = df.iloc[idx].get(col, "") + rows_data.append(row) + compare_df = pd.DataFrame(rows_data).set_index("Row") + + def _highlight_diffs(s: pd.Series) -> list[str]: + styles = [] + first_val = str(s.iloc[0]).strip() if len(s) > 0 else "" + for val in s: + val_str = str(val).strip() + if val_str != first_val and val_str and first_val: + styles.append( + "background-color: rgba(245, 166, 35, 0.2)" + ) + elif not val_str and first_val: + styles.append( + "background-color: rgba(240, 82, 82, 0.1)" + ) + else: + styles.append("") + return styles + + styled = compare_df.style.apply(_highlight_diffs, axis=0) + st.dataframe(styled, use_container_width=True) + if len(keep_indices) == n_rows: st.info("Decision: Kept All") elif len(keep_indices) == 1: @@ -363,97 +362,118 @@ def match_group_card( f"(removing {n_rows - len(keep_indices)})" ) - def _undo(g=gid, indices=group.row_indices, diff=differing_cols): + def _undo(g=gid): st.session_state["review_decisions"].pop(g, None) - st.session_state.pop(f"customize_{g}", None) - for idx in indices: - st.session_state.pop(f"keep_{g}_{idx}", None) - for c in diff: - st.session_state.pop(f"col_{g}_{c}", None) + st.session_state.pop(f"editor_{g}", None) st.button("Undo", key=f"undo_{gid}", on_click=_undo) - else: - # --- Row selection checkboxes --- - st.caption("Select rows to keep:") - chk_cols = st.columns(n_rows) - for i, idx in enumerate(group.row_indices): - with chk_cols[i]: - st.checkbox( - f"Row {idx + 1}", - value=True, - key=f"keep_{gid}_{idx}", - ) - # Read current checkbox state - checked = [ - idx for idx in group.row_indices - if st.session_state.get(f"keep_{gid}_{idx}", True) + else: + # --- Undecided: interactive editor with inline checkboxes & dropdowns --- + editor_rows = [] + for idx in group.row_indices: + row_data = {"Keep": True, "Row": idx + 1} + for col in display_cols: + row_data[col] = str(df.iloc[idx].get(col, "")) + editor_rows.append(row_data) + editor_df = pd.DataFrame(editor_rows) + + col_config = { + "Keep": st.column_config.CheckboxColumn( + "Keep", default=True, width="small", + ), + "Row": st.column_config.NumberColumn("Row", width="small"), + } + for col in differing_cols: + vals = [] + for idx in group.row_indices: + v = str(df.iloc[idx].get(col, "")).strip() + if v not in vals: + vals.append(v) + if "" not in vals: + vals.append("") + col_config[col] = st.column_config.SelectboxColumn( + col, options=vals, required=False, + ) + + disabled_cols = ["Row"] + [ + c for c in display_cols if c not in differing_cols ] - # --- Customize columns (only when exactly 1 row kept) --- - if len(checked) == 1 and differing_cols: - customize = st.checkbox( - f"Customize columns ({len(differing_cols)} differ)", - key=f"customize_{gid}", - value=False, + edited = st.data_editor( + editor_df, + column_config=col_config, + disabled=disabled_cols, + use_container_width=True, + hide_index=True, + key=f"editor_{gid}", + ) + + # Read which rows are checked + checked = [ + idx + for i, idx in enumerate(group.row_indices) + if edited.iloc[i]["Keep"] + ] + + if differing_cols: + st.caption( + f"Columns with differences (editable): " + f"{', '.join(differing_cols)}" ) - if customize: - survivor_idx = checked[0] - base_pos = group.row_indices.index(survivor_idx) - st.caption("Pick which row's value to use for each column:") - for col in differing_cols: - def _fmt(idx: int, c: str = col) -> str: - val = str( - st.session_state["df"].iloc[idx].get(c, "") - ).strip() - return f"Row {idx + 1}: {val or '(empty)'}" - st.selectbox( - col, - options=group.row_indices, - index=base_pos, - format_func=_fmt, - key=f"col_{gid}_{col}", - ) - - # --- Status caption --- + # Status if len(checked) == 0: st.warning("Select at least one row to keep.") elif len(checked) == n_rows: - st.caption("Keeping all rows (no duplicates removed from this group)") + st.caption("Keeping all rows (no duplicates removed)") elif len(checked) == 1: - st.caption(f"Will merge into Row {checked[0] + 1}, " - f"removing {n_rows - 1} row(s)") + st.caption( + f"Merging into Row {checked[0] + 1}, " + f"removing {n_rows - 1} row(s)" + ) else: - removed = n_rows - len(checked) - st.caption(f"Will keep {len(checked)} rows, " - f"removing {removed}") + st.caption( + f"Keeping {len(checked)} rows, " + f"removing {n_rows - len(checked)}" + ) - # --- Confirm button --- + # Confirm def _on_confirm( - g=gid, indices=group.row_indices, diff=differing_cols, + g=gid, indices=list(group.row_indices), + diff=differing_cols, ): - keep = [ - idx for idx in indices - if st.session_state.get(f"keep_{g}_{idx}", True) - ] - # Safety: never remove all rows + editor_state = st.session_state.get(f"editor_{g}", {}) + ed_rows = editor_state.get("edited_rows", {}) + + # Determine which rows to keep + keep = [] + for i, idx in enumerate(indices): + changes = ed_rows.get(i, {}) + if changes.get("Keep", True): + keep.append(idx) if not keep: keep = list(indices) + # Column overrides (single-survivor merge only) ovr: dict[str, str] = {} - # Column overrides only apply for single-survivor merge if len(keep) == 1: + surv_idx = keep[0] + surv_pos = indices.index(surv_idx) + surv_changes = ed_rows.get(surv_pos, {}) the_df = st.session_state["df"] - base_idx = keep[0] for c in diff: - col_key = f"col_{g}_{c}" - if col_key in st.session_state: - source_idx = st.session_state[col_key] - if source_idx != base_idx: - ovr[c] = str( - the_df.iloc[source_idx].get(c, "") - ) + if c in surv_changes: + new_val = ( + str(surv_changes[c]) + if surv_changes[c] is not None + else "" + ) + orig = str( + the_df.iloc[surv_idx].get(c, "") + ).strip() + if new_val.strip() != orig: + ovr[c] = new_val st.session_state["review_decisions"][g] = { "keep_indices": keep,