feat: per-group survivor selection and column cherry-picking in GUI

Each match group card now has:
- Radio button to pick which row to keep as the base survivor
- "Customize columns" toggle showing only columns that differ
- Per-column selectbox to pick values from any row in the group
- Decisions stored as {action, survivor_idx, overrides} dicts

Added apply_review_decisions() that builds the final DataFrame by
applying survivor selection + column overrides without re-running
the dedup engine. Batch actions also use the new dict format.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-28 23:47:25 +00:00
parent 39e139d777
commit debb0cb516
2 changed files with 247 additions and 55 deletions

View File

@@ -21,7 +21,12 @@ if str(_project_root) not in sys.path:
from src.core.dedup import deduplicate, build_default_strategies, DeduplicationResult
from src.core.io import read_file, list_sheets
from src.core.config import DeduplicationConfig
from src.gui.components import config_panel, match_group_card, results_summary
from src.gui.components import (
apply_review_decisions,
config_panel,
match_group_card,
results_summary,
)
# ---------------------------------------------------------------------------
@@ -194,21 +199,32 @@ if uploaded is not None:
st.subheader("Match Groups")
# Batch actions
def _accept_all():
for g in result.match_groups:
st.session_state["review_decisions"][g.group_id] = {
"action": True,
"survivor_idx": g.survivor_index,
"overrides": {},
}
def _reject_all():
for g in result.match_groups:
st.session_state["review_decisions"][g.group_id] = {
"action": False,
"survivor_idx": g.survivor_index,
"overrides": {},
}
def _clear_all():
st.session_state["review_decisions"] = {}
action_left, action_mid, action_right = st.columns(3)
with action_left:
if st.button("Accept All"):
for g in result.match_groups:
st.session_state["review_decisions"][g.group_id] = True
st.rerun()
st.button("Accept All", on_click=_accept_all)
with action_mid:
if st.button("Reject All"):
for g in result.match_groups:
st.session_state["review_decisions"][g.group_id] = False
st.rerun()
st.button("Reject All", on_click=_reject_all)
with action_right:
if st.button("Clear Decisions"):
st.session_state["review_decisions"] = {}
st.rerun()
st.button("Clear Decisions", on_click=_clear_all)
# Individual group cards
decisions = st.session_state["review_decisions"]
@@ -218,40 +234,39 @@ if uploaded is not None:
# Show decision summary
if decisions:
st.divider()
accepted = sum(1 for v in decisions.values() if v is True)
rejected = sum(1 for v in decisions.values() if v is False)
pending = len(result.match_groups) - len(decisions)
st.caption(
f"Decisions: {accepted} merged, {rejected} kept both, "
f"{pending} pending"
accepted = sum(
1 for v in decisions.values()
if isinstance(v, dict) and v.get("action") is True
)
customized = sum(
1 for v in decisions.values()
if isinstance(v, dict) and v.get("action") is True
and v.get("overrides")
)
rejected = sum(
1 for v in decisions.values()
if isinstance(v, dict) and v.get("action") is False
)
pending = len(result.match_groups) - len(decisions)
# Re-run dedup with review decisions applied
summary_parts = [f"{accepted} merged"]
if customized:
summary_parts.append(f"{customized} customized")
summary_parts.append(f"{rejected} kept both")
summary_parts.append(f"{pending} pending")
st.caption("Decisions: " + ", ".join(summary_parts))
# Apply decisions and offer download
if st.button(
"Apply Review Decisions & Download",
type="primary",
use_container_width=True,
):
def _review_callback(group, _df):
gid = group.group_id
if gid in decisions:
return decisions[gid]
return True # default: accept
reviewed_result = deduplicate(
df,
strategies=settings["strategies"],
survivor_rule=settings["survivor_rule"],
date_column=settings["date_column"],
merge=settings["merge"],
preview=False,
review_callback=_review_callback,
reviewed_df, reviewed_removed = apply_review_decisions(
df, result.match_groups, decisions,
)
# Update result and show downloads
st.session_state["result"] = reviewed_result
csv_bytes = reviewed_result.deduplicated_df.to_csv(
csv_bytes = reviewed_df.to_csv(
index=False
).encode("utf-8-sig")
st.download_button(
@@ -261,6 +276,17 @@ if uploaded is not None:
mime="text/csv",
key="reviewed_download",
)
if not reviewed_removed.empty:
removed_bytes = reviewed_removed.to_csv(
index=False
).encode("utf-8-sig")
st.download_button(
"Download Reviewed Removed Rows",
data=removed_bytes,
file_name="removed_reviewed.csv",
mime="text/csv",
key="reviewed_removed_download",
)
# Log entries
if result.log_entries: