Files
datatools-dev/layout-review/01_deduplicator.html
Michael b6c39d7a09 refactor(layout-review): move assets to repo root
Relocate assets/ (app.css, shell.js) from layout-review/ up to the repo
root and rewrite every page's link/script refs to ../assets/.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-08 15:31:53 +00:00

188 lines
9.3 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Layout review — Find Duplicates</title>
<link rel="stylesheet" href="../assets/app.css">
</head>
<body data-page="01_deduplicator">
<div class="dt-app">
<aside class="dt-sidebar" id="dt-sidebar"></aside>
<main class="dt-main">
<div class="dt-review-banner">
<span class="dt-mi">visibility</span>
<span>Static layout preview of <strong>Find Duplicates</strong>, shown with a file imported and a completed run (results + match-group review). <a href="index.html">All pages →</a></span>
</div>
<div class="dt-main-inner">
<!-- Tool header -->
<div class="dt-tool-header">
<h1>Find Duplicates</h1>
<button class="dt-help-btn"><span class="dt-mi">help_outline</span> Help</button>
</div>
<p class="dt-tool-caption">Find rows that repeat, then keep one and remove the extras.</p>
<div class="dt-spacer"></div>
<!-- Upload (file staged) -->
<label class="dt-label">Import CSV or Excel file</label>
<div class="dt-uploader">
<div class="dt-uploader-text">
<span class="hint"><span class="dt-mi" style="vertical-align:-4px">upload_file</span> Drag and drop file here</span>
<span class="sub">Up to 1.5 GB · CSV, TSV, XLSX, XLS · encoding &amp; delimiter auto-detected</span>
</div>
<button class="dt-btn">Browse files</button>
</div>
<div class="dt-file-chip">
<span class="dt-file-icon-chip"><svg viewBox="0 0 24 24" fill="none" stroke="currentColor"><path d="M14 2H6a2 2 0 00-2 2v16a2 2 0 002 2h12a2 2 0 002-2V8z"/><path d="M14 2v6h6"/></svg></span>
<span class="name">customers_export.csv</span>
<span class="size">2.1 MB</span>
<button class="dt-btn dt-btn-tertiary" title="Remove"></button>
</div>
<!-- Delimiter selector (CSV) -->
<div class="dt-field" style="max-width:320px">
<label class="dt-label">Delimiter</label>
<div class="dt-select">Comma (,)</div>
<div class="dt-help-text">Auto-detected on upload. Change if the preview looks wrong.</div>
</div>
<!-- Preview expander (collapsed after a result exists) -->
<details class="dt-expander">
<summary>Preview: customers_export.csv</summary>
<div class="dt-expander-body">
<p class="dt-caption">18,442 rows, 6 columns</p>
<div class="dt-table-wrap">
<table class="dt-table">
<thead><tr><th class="idx"></th><th>name</th><th>email</th><th>city</th><th>phone</th><th>signup_date</th></tr></thead>
<tbody>
<tr><td class="idx">0</td><td>Jane Doe</td><td>jane@acme.io</td><td>Austin</td><td>512-555-0190</td><td>2024-01-04</td></tr>
<tr><td class="idx">1</td><td>jane doe</td><td>JANE@ACME.IO</td><td>austin</td><td>(512) 555-0190</td><td>01/04/2024</td></tr>
<tr><td class="idx">2</td><td>Bob Smith</td><td>bob@globex.com</td><td>Denver</td><td>720-555-7781</td><td>2024-02-11</td></tr>
<tr><td class="idx">3</td><td>R. Smith</td><td>bob@globex.com</td><td>Denver</td><td>720-555-7781</td><td>2024-02-11</td></tr>
</tbody>
</table>
</div>
</div>
</details>
<!-- Options expander -->
<details class="dt-expander">
<summary>Options</summary>
<div class="dt-expander-body">
<details class="dt-expander" style="margin-top:0">
<summary>Advanced Options</summary>
<div class="dt-expander-body">
<div class="dt-cols-2">
<div>
<div class="dt-field"><label class="dt-label">Match on columns</label>
<div class="dt-multiselect"><span class="dt-ms-placeholder">Leave empty to auto-detect</span></div></div>
<div class="dt-field"><label class="dt-label">Strong keys</label>
<div class="dt-multiselect"><span class="dt-ms-chip">email <span class="x"></span></span></div></div>
<div class="dt-field"><label class="dt-label">Fuzzy columns</label>
<div class="dt-multiselect"><span class="dt-ms-chip">name <span class="x"></span></span></div></div>
</div>
<div>
<div class="dt-field"><label class="dt-label">Fuzzy algorithm</label><div class="dt-select">jaro_winkler</div></div>
<div class="dt-field"><label class="dt-label">Similarity threshold</label>
<div class="dt-slider"><div class="track"><div class="fill" style="width:70%"></div><div class="knob" style="left:70%"></div></div><div class="val">85</div></div></div>
<div class="dt-field"><label class="dt-label">Survivor rule</label><div class="dt-select">most-complete</div></div>
</div>
</div>
<div class="dt-check on" style="margin-top:6px"><span class="box"><span class="dt-mi">check</span></span> Merge mode — fill missing fields in the surviving row</div>
</div>
</details>
</div>
</details>
<hr class="dt-divider">
<button class="dt-btn dt-btn-primary dt-btn-block">Find Duplicates</button>
<hr class="dt-divider">
<!-- Results -->
<h2>Results</h2>
<div class="dt-metrics">
<div class="dt-metric"><div class="label">Original rows</div><div class="value">18,442</div></div>
<div class="dt-metric"><div class="label">Duplicate rows</div><div class="value">312</div><div class="delta down">312 removed</div></div>
<div class="dt-metric"><div class="label">Match groups</div><div class="value">147</div></div>
<div class="dt-metric"><div class="label">Rows kept</div><div class="value">18,130</div></div>
</div>
<div class="dt-btn-row" style="max-width:560px">
<button class="dt-btn dt-btn-primary">Download deduplicated CSV</button>
<button class="dt-btn">Download removed rows</button>
</div>
<hr class="dt-divider">
<!-- Match groups -->
<h2>Match Groups</h2>
<div class="dt-cols-3" style="max-width:520px">
<button class="dt-btn">Accept All</button>
<button class="dt-btn">Reject All</button>
<button class="dt-btn">Clear Decisions</button>
</div>
<!-- Match group card 1 -->
<div class="dt-match-card">
<div class="dt-match-head">
<span class="title">Group 1 · 2 rows</span>
<span class="conf"><span class="dt-count-pill success">98% match</span></span>
</div>
<div class="dt-match-body">
<div class="dt-table-wrap">
<table class="dt-table">
<thead><tr><th>keep</th><th>name</th><th>email</th><th>city</th><th>phone</th><th>signup_date</th></tr></thead>
<tbody>
<tr class="dt-keep-row"><td><span class="dt-keep-tag">keep</span></td><td>Jane Doe</td><td>jane@acme.io</td><td>Austin</td><td>512-555-0190</td><td>2024-01-04</td></tr>
<tr><td><span class="dt-caption">remove</span></td><td class="dt-cell-flag">jane doe</td><td class="dt-cell-flag">JANE@ACME.IO</td><td class="dt-cell-flag">austin</td><td>(512) 555-0190</td><td class="dt-cell-flag">01/04/2024</td></tr>
</tbody>
</table>
</div>
<p class="dt-caption">Differing columns highlighted. The survivor row is kept; uncheck rows to split the group.</p>
</div>
</div>
<!-- Match group card 2 -->
<div class="dt-match-card">
<div class="dt-match-head">
<span class="title">Group 2 · 2 rows</span>
<span class="conf"><span class="dt-count-pill warn">87% match</span></span>
</div>
<div class="dt-match-body">
<div class="dt-table-wrap">
<table class="dt-table">
<thead><tr><th>keep</th><th>name</th><th>email</th><th>city</th><th>phone</th><th>signup_date</th></tr></thead>
<tbody>
<tr class="dt-keep-row"><td><span class="dt-keep-tag">keep</span></td><td>Bob Smith</td><td>bob@globex.com</td><td>Denver</td><td>720-555-7781</td><td>2024-02-11</td></tr>
<tr><td><span class="dt-caption">remove</span></td><td class="dt-cell-flag">R. Smith</td><td>bob@globex.com</td><td>Denver</td><td>720-555-7781</td><td>2024-02-11</td></tr>
</tbody>
</table>
</div>
</div>
</div>
<p class="dt-caption" style="margin-top:14px">Decisions: 1 merged, 1 pending</p>
<button class="dt-btn dt-btn-primary dt-btn-block" style="margin-top:8px">Apply Review Decisions &amp; Download</button>
<!-- Processing log -->
<details class="dt-expander" style="margin-top:18px">
<summary>Processing Log</summary>
<div class="dt-expander-body">
<div class="dt-code">[00:00.01] Loaded 18,442 rows from customers_export.csv
[00:00.04] Strategy: exact(email) + fuzzy(name, jaro_winkler ≥ 85)
[00:00.91] Compared 18,442 rows → 147 match groups
[00:01.02] Survivor rule: most-complete · merge=on
[00:01.05] 312 rows flagged for removal</div>
</div>
</details>
</div>
</main>
</div>
<footer class="dt-footer" id="dt-footer"></footer>
<script src="../assets/shell.js"></script>
</body>
</html>