feat: per-group survivor selection and column cherry-picking in GUI
Each match group card now has:
- Radio button to pick which row to keep as the base survivor
- "Customize columns" toggle showing only columns that differ
- Per-column selectbox to pick values from any row in the group
- Decisions stored as {action, survivor_idx, overrides} dicts
Added apply_review_decisions() that builds the final DataFrame by
applying survivor selection + column overrides without re-running
the dedup engine. Batch actions also use the new dict format.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
100
src/gui/app.py
100
src/gui/app.py
@@ -21,7 +21,12 @@ if str(_project_root) not in sys.path:
|
||||
from src.core.dedup import deduplicate, build_default_strategies, DeduplicationResult
|
||||
from src.core.io import read_file, list_sheets
|
||||
from src.core.config import DeduplicationConfig
|
||||
from src.gui.components import config_panel, match_group_card, results_summary
|
||||
from src.gui.components import (
|
||||
apply_review_decisions,
|
||||
config_panel,
|
||||
match_group_card,
|
||||
results_summary,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -194,21 +199,32 @@ if uploaded is not None:
|
||||
st.subheader("Match Groups")
|
||||
|
||||
# Batch actions
|
||||
def _accept_all():
|
||||
for g in result.match_groups:
|
||||
st.session_state["review_decisions"][g.group_id] = {
|
||||
"action": True,
|
||||
"survivor_idx": g.survivor_index,
|
||||
"overrides": {},
|
||||
}
|
||||
|
||||
def _reject_all():
|
||||
for g in result.match_groups:
|
||||
st.session_state["review_decisions"][g.group_id] = {
|
||||
"action": False,
|
||||
"survivor_idx": g.survivor_index,
|
||||
"overrides": {},
|
||||
}
|
||||
|
||||
def _clear_all():
|
||||
st.session_state["review_decisions"] = {}
|
||||
|
||||
action_left, action_mid, action_right = st.columns(3)
|
||||
with action_left:
|
||||
if st.button("Accept All"):
|
||||
for g in result.match_groups:
|
||||
st.session_state["review_decisions"][g.group_id] = True
|
||||
st.rerun()
|
||||
st.button("Accept All", on_click=_accept_all)
|
||||
with action_mid:
|
||||
if st.button("Reject All"):
|
||||
for g in result.match_groups:
|
||||
st.session_state["review_decisions"][g.group_id] = False
|
||||
st.rerun()
|
||||
st.button("Reject All", on_click=_reject_all)
|
||||
with action_right:
|
||||
if st.button("Clear Decisions"):
|
||||
st.session_state["review_decisions"] = {}
|
||||
st.rerun()
|
||||
st.button("Clear Decisions", on_click=_clear_all)
|
||||
|
||||
# Individual group cards
|
||||
decisions = st.session_state["review_decisions"]
|
||||
@@ -218,40 +234,39 @@ if uploaded is not None:
|
||||
# Show decision summary
|
||||
if decisions:
|
||||
st.divider()
|
||||
accepted = sum(1 for v in decisions.values() if v is True)
|
||||
rejected = sum(1 for v in decisions.values() if v is False)
|
||||
pending = len(result.match_groups) - len(decisions)
|
||||
st.caption(
|
||||
f"Decisions: {accepted} merged, {rejected} kept both, "
|
||||
f"{pending} pending"
|
||||
accepted = sum(
|
||||
1 for v in decisions.values()
|
||||
if isinstance(v, dict) and v.get("action") is True
|
||||
)
|
||||
customized = sum(
|
||||
1 for v in decisions.values()
|
||||
if isinstance(v, dict) and v.get("action") is True
|
||||
and v.get("overrides")
|
||||
)
|
||||
rejected = sum(
|
||||
1 for v in decisions.values()
|
||||
if isinstance(v, dict) and v.get("action") is False
|
||||
)
|
||||
pending = len(result.match_groups) - len(decisions)
|
||||
|
||||
# Re-run dedup with review decisions applied
|
||||
summary_parts = [f"{accepted} merged"]
|
||||
if customized:
|
||||
summary_parts.append(f"{customized} customized")
|
||||
summary_parts.append(f"{rejected} kept both")
|
||||
summary_parts.append(f"{pending} pending")
|
||||
st.caption("Decisions: " + ", ".join(summary_parts))
|
||||
|
||||
# Apply decisions and offer download
|
||||
if st.button(
|
||||
"Apply Review Decisions & Download",
|
||||
type="primary",
|
||||
use_container_width=True,
|
||||
):
|
||||
def _review_callback(group, _df):
|
||||
gid = group.group_id
|
||||
if gid in decisions:
|
||||
return decisions[gid]
|
||||
return True # default: accept
|
||||
|
||||
reviewed_result = deduplicate(
|
||||
df,
|
||||
strategies=settings["strategies"],
|
||||
survivor_rule=settings["survivor_rule"],
|
||||
date_column=settings["date_column"],
|
||||
merge=settings["merge"],
|
||||
preview=False,
|
||||
review_callback=_review_callback,
|
||||
reviewed_df, reviewed_removed = apply_review_decisions(
|
||||
df, result.match_groups, decisions,
|
||||
)
|
||||
|
||||
# Update result and show downloads
|
||||
st.session_state["result"] = reviewed_result
|
||||
|
||||
csv_bytes = reviewed_result.deduplicated_df.to_csv(
|
||||
csv_bytes = reviewed_df.to_csv(
|
||||
index=False
|
||||
).encode("utf-8-sig")
|
||||
st.download_button(
|
||||
@@ -261,6 +276,17 @@ if uploaded is not None:
|
||||
mime="text/csv",
|
||||
key="reviewed_download",
|
||||
)
|
||||
if not reviewed_removed.empty:
|
||||
removed_bytes = reviewed_removed.to_csv(
|
||||
index=False
|
||||
).encode("utf-8-sig")
|
||||
st.download_button(
|
||||
"Download Reviewed Removed Rows",
|
||||
data=removed_bytes,
|
||||
file_name="removed_reviewed.csv",
|
||||
mime="text/csv",
|
||||
key="reviewed_removed_download",
|
||||
)
|
||||
|
||||
# Log entries
|
||||
if result.log_entries:
|
||||
|
||||
Reference in New Issue
Block a user