Files
datatools-dev/layout-review/10_pdf_extractor.html
Michael b2fa8503e6 chore: add layout-review HTML mockups
Static layout mockups for each app tool (deduplicator, text cleaner,
format standardizer, missing handler, column mapper, outlier detector,
multi-file merger, validator/reporter, pipeline runner, PDF extractor,
reconciler) plus index/home shells and shared assets.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-08 15:28:23 +00:00

190 lines
10 KiB
HTML

<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Layout review — PDF to CSV</title>
<link rel="stylesheet" href="assets/app.css">
</head>
<body data-page="10_pdf_extractor">
<div class="dt-app">
<aside class="dt-sidebar" id="dt-sidebar"></aside>
<main class="dt-main">
<div class="dt-review-banner">
<span class="dt-mi">visibility</span>
<span>Static layout preview of <strong>PDF to CSV</strong>, shown with two bank-statement PDFs imported and a completed scan (candidate transactions in the editable preview table). <a href="index.html">All pages →</a></span>
</div>
<div class="dt-main-inner">
<!-- Tool header -->
<div class="dt-tool-header">
<h1>PDF to CSV</h1>
<button class="dt-help-btn"><span class="dt-mi">help_outline</span> Help</button>
</div>
<p class="dt-tool-caption">Pull transactions out of bank-statement PDFs into a clean CSV file.</p>
<div class="dt-spacer"></div>
<!-- Scan options expander (collapsed by default) -->
<details class="dt-expander">
<summary>Scan options</summary>
<div class="dt-expander-body">
<div class="dt-cols-2">
<div class="dt-check on">
<span class="box"><span class="dt-mi">check</span></span>
Treat (4.50) as negative
</div>
<div class="dt-check on">
<span class="box"><span class="dt-mi">check</span></span>
Use OCR for scanned pages
</div>
</div>
<p class="dt-help-text" style="margin:0 0 10px">OCR status: ready (bundled Tesseract). Most modern bank PDFs are text-based and don't need OCR — only enable for image-based scans.</p>
<div class="dt-cols-2">
<div class="dt-field">
<label class="dt-label">Output date format</label>
<div class="dt-select">YYYY-MM-DD (2026-01-13)</div>
</div>
<div class="dt-field">
<label class="dt-label">Override year for short dates (optional)</label>
<input class="dt-input" type="text" placeholder="" value="" disabled>
<div class="dt-help-text">Leave blank for automatic (statement period → filename year → this override).</div>
</div>
</div>
</div>
</details>
<!-- Files section head -->
<div class="dt-files-section-head">
<h2>Files</h2>
<span class="dt-section-meta">2 files · 318.4 KB total</span>
</div>
<!-- Files card (Home-style bordered list + Add more files) -->
<div class="dt-card" style="padding-bottom:0">
<div class="dt-file-row" style="padding:6px 0">
<button class="dt-btn dt-btn-tertiary" title="Remove statement-jan-2026.pdf"></button>
<span class="dt-file-icon-chip"><svg viewBox="0 0 24 24" fill="none" stroke="currentColor"><path d="M14 2H6a2 2 0 00-2 2v16a2 2 0 002 2h12a2 2 0 002-2V8z"/><path d="M14 2v6h6"/></svg></span>
<span class="dt-file-name">statement-jan-2026.pdf</span>
<span class="dt-file-size" style="margin-left:auto">171.2 KB</span>
</div>
<div class="dt-file-row" style="padding:6px 0">
<button class="dt-btn dt-btn-tertiary" title="Remove statement-feb-2026.pdf"></button>
<span class="dt-file-icon-chip"><svg viewBox="0 0 24 24" fill="none" stroke="currentColor"><path d="M14 2H6a2 2 0 00-2 2v16a2 2 0 002 2h12a2 2 0 002-2V8z"/><path d="M14 2v6h6"/></svg></span>
<span class="dt-file-name">statement-feb-2026.pdf</span>
<span class="dt-file-size" style="margin-left:auto">147.2 KB</span>
</div>
<button class="dt-file-add">
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor"><path d="M12 5v14M5 12h14"/></svg> Add more files
</button>
</div>
<!-- Action buttons -->
<div class="dt-btn-row" style="margin-top:16px;max-width:340px">
<button class="dt-btn dt-btn-primary">Scan</button>
<button class="dt-btn">Clear all files</button>
</div>
<hr class="dt-divider">
<!-- Warnings expander (collapsed) -->
<details class="dt-expander">
<summary>Warnings (1)</summary>
<div class="dt-expander-body">
<div class="dt-alert warn">
<span class="dt-mi">warning</span>
<span>[statement-feb-2026.pdf] 2 lines matched a date but no amount — skipped (likely a wrapped description). Check the source if a transaction looks missing.</span>
</div>
</div>
</details>
<!-- Results -->
<h4>47 candidate transaction(s) from 2 file(s)</h4>
<p class="dt-caption">Uncheck rows to exclude. Edit any cell to fix a value the scanner got wrong. The <code>raw</code> column shows the original PDF text for that row.</p>
<div class="dt-table-wrap">
<table class="dt-table">
<thead>
<tr>
<th>Include</th>
<th>date</th>
<th>description</th>
<th>amount_debit</th>
<th>amount_credit</th>
<th>account_number</th>
<th>source_file</th>
<th>page</th>
<th>raw</th>
</tr>
</thead>
<tbody>
<tr>
<td><span class="dt-check on" style="margin:0"><span class="box"><span class="dt-mi">check</span></span></span></td>
<td>2026-01-03</td><td>OPENING BALANCE</td><td></td><td></td><td>****4821</td><td>statement-jan-2026.pdf</td><td class="idx">1</td><td>01/03 OPENING BALANCE 2,140.55</td>
</tr>
<tr>
<td><span class="dt-check on" style="margin:0"><span class="box"><span class="dt-mi">check</span></span></span></td>
<td>2026-01-05</td><td>POS PURCHASE WHOLE FOODS MKT</td><td>84.12</td><td></td><td>****4821</td><td>statement-jan-2026.pdf</td><td class="idx">1</td><td>01/05 POS PURCHASE WHOLE FOODS MKT (84.12)</td>
</tr>
<tr>
<td><span class="dt-check on" style="margin:0"><span class="box"><span class="dt-mi">check</span></span></span></td>
<td>2026-01-08</td><td>ACH DEPOSIT PAYROLL ACME CORP</td><td></td><td>3,250.00</td><td>****4821</td><td>statement-jan-2026.pdf</td><td class="idx">1</td><td>01/08 ACH DEPOSIT PAYROLL ACME CORP 3,250.00</td>
</tr>
<tr>
<td><span class="dt-check on" style="margin:0"><span class="box"><span class="dt-mi">check</span></span></span></td>
<td>2026-01-11</td><td>ONLINE TRANSFER TO SAVINGS</td><td>500.00</td><td></td><td>****4821</td><td>statement-jan-2026.pdf</td><td class="idx">2</td><td>01/11 ONLINE TRANSFER TO SAVINGS (500.00)</td>
</tr>
<tr>
<td><span class="dt-check" style="margin:0"><span class="box"></span></span></td>
<td class="dt-cell-flag">2026-01-12</td><td class="dt-cell-flag">INTEREST RATE 0.50% APY DETAIL</td><td></td><td></td><td>****4821</td><td>statement-jan-2026.pdf</td><td class="idx">2</td><td>01/12 INTEREST RATE 0.50% APY 0.00</td>
</tr>
<tr>
<td><span class="dt-check on" style="margin:0"><span class="box"><span class="dt-mi">check</span></span></span></td>
<td>2026-01-14</td><td>DEBIT CARD SHELL OIL #2287</td><td>52.40</td><td></td><td>****4821</td><td>statement-jan-2026.pdf</td><td class="idx">2</td><td>01/14 DEBIT CARD SHELL OIL #2287 (52.40)</td>
</tr>
<tr>
<td><span class="dt-check on" style="margin:0"><span class="box"><span class="dt-mi">check</span></span></span></td>
<td>2026-02-02</td><td>POS PURCHASE TRADER JOES #511</td><td>61.88</td><td></td><td>****4821</td><td>statement-feb-2026.pdf</td><td class="idx">1</td><td>02/02 POS PURCHASE TRADER JOES #511 (61.88)</td>
</tr>
<tr>
<td><span class="dt-check on" style="margin:0"><span class="box"><span class="dt-mi">check</span></span></span></td>
<td>2026-02-06</td><td>ACH DEPOSIT PAYROLL ACME CORP</td><td></td><td>3,250.00</td><td>****4821</td><td>statement-feb-2026.pdf</td><td class="idx">2</td><td>02/06 ACH DEPOSIT PAYROLL ACME CORP 3,250.00</td>
</tr>
<tr>
<td><span class="dt-check on" style="margin:0"><span class="box"><span class="dt-mi">check</span></span></span></td>
<td>2026-02-09</td><td>CHECK #1043</td><td>1,200.00</td><td></td><td>****4821</td><td>statement-feb-2026.pdf</td><td class="idx">2</td><td>02/09 CHECK #1043 (1,200.00)</td>
</tr>
</tbody>
</table>
</div>
<!-- Download row: download button (left) + columns multiselect (right) -->
<div class="dt-row" style="margin-top:14px;align-items:flex-start">
<div style="flex:2">
<button class="dt-btn dt-btn-primary dt-btn-block">Download 46 rows as CSV</button>
<p class="dt-caption" style="margin-top:8px">46 of 47 rows selected.</p>
</div>
<div style="flex:3">
<div class="dt-field" style="margin:0">
<label class="dt-label">Columns to include in CSV</label>
<div class="dt-multiselect">
<span class="dt-ms-chip">date <span class="x"></span></span>
<span class="dt-ms-chip">description <span class="x"></span></span>
<span class="dt-ms-chip">amount_debit <span class="x"></span></span>
<span class="dt-ms-chip">amount_credit <span class="x"></span></span>
<span class="dt-ms-chip">account_number <span class="x"></span></span>
<span class="dt-ms-chip">source_file <span class="x"></span></span>
</div>
<div class="dt-help-text"><code>page</code> and <code>raw</code> are kept off by default; tick them if you want them in the file.</div>
</div>
</div>
</div>
</div>
</main>
</div>
<footer class="dt-footer" id="dt-footer"></footer>
<script src="assets/shell.js"></script>
</body>
</html>