feat(pdf): visual region picker on rendered sample page
Phase 5/6. Adds a "Visual picker" tab as the first stop in the template-build flow. The sample PDF page is rasterized with ``pypdfium2`` (capped at ~900px wide for sensible display), and ``streamlit-drawable-canvas`` overlays drawing tools on top. UX: - **Line mode** — drag short (roughly vertical) strokes where you want columns to split. Each stroke's x-midpoint becomes one boundary in PDF point coordinates. - **Rect mode** — drag a rectangle around the transactions table; bbox is preserved on the template as ``visual.table_bbox`` for round-trip, future use as a hard crop region. - **Transform mode** — move/resize already-drawn shapes after the fact. Round-trip: re-entering Build mode with an existing template seeds the canvas with full-height vertical lines for every boundary already on the template, plus the saved bbox if any, so editing-after-save matches the user's mental model. Coordinate translation: the canvas reports pixel positions; we divide by the renderer's pixels-per-PDF-point scale to get back to PDF coordinates that ``apply_template`` already expects. No template-schema change required — the boundaries the picker writes are the same list the text-input editor wrote in commit 3, just sourced visually. New helper in the extraction module: - ``render_page_image(pdf_bytes, page_no, target_width=900)`` — rasterize a single 1-indexed page to a PIL image; returns ``(image, scale)`` for coordinate translation. The text-input boundary editor in the Columns tab remains as a fallback for power users / keyboard-only workflows and for copy-paste from spreadsheet-derived x-positions. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -31,7 +31,7 @@ if str(_project_root) not in sys.path:
|
|||||||
|
|
||||||
from src.audit import log_event, log_page_open
|
from src.audit import log_event, log_page_open
|
||||||
from src.gui.components import hide_streamlit_chrome, render_sticky_footer
|
from src.gui.components import hide_streamlit_chrome, render_sticky_footer
|
||||||
from src.pdf_extract import apply_template, extract_pages_auto
|
from src.pdf_extract import apply_template, extract_pages_auto, render_page_image
|
||||||
from src.pdf_templates import (
|
from src.pdf_templates import (
|
||||||
SCHEMA_VERSION,
|
SCHEMA_VERSION,
|
||||||
VALID_TARGETS,
|
VALID_TARGETS,
|
||||||
@@ -393,9 +393,185 @@ def _render_columns_editor(tpl: dict) -> None:
|
|||||||
tpl["columns"] = new_columns
|
tpl["columns"] = new_columns
|
||||||
|
|
||||||
|
|
||||||
|
def _render_visual_picker(tpl: dict) -> None:
|
||||||
|
"""Drawable-canvas overlay on a rendered sample page.
|
||||||
|
|
||||||
|
The user draws (mostly) vertical lines where columns should
|
||||||
|
split. We harvest each line's x-midpoint and write that into
|
||||||
|
``tpl["table"]["column_boundaries"]`` (in PDF point space). An
|
||||||
|
optional rectangle becomes ``tpl["visual"]["table_bbox"]`` (in
|
||||||
|
PDF points), preserved for round-trip but not yet used by
|
||||||
|
extraction — the header/end-marker pair is enough to slice
|
||||||
|
the row band in practice.
|
||||||
|
"""
|
||||||
|
from streamlit_drawable_canvas import st_canvas
|
||||||
|
|
||||||
|
pdf_bytes = st.session_state.get(K_SAMPLE_BYTES)
|
||||||
|
pages = st.session_state.get(K_SAMPLE_PAGES) or []
|
||||||
|
if not pdf_bytes or not pages:
|
||||||
|
st.info("Upload a sample PDF above to use the visual picker.")
|
||||||
|
return
|
||||||
|
|
||||||
|
max_page = len(pages)
|
||||||
|
sample_page = int(tpl.get("visual", {}).get("sample_page", 1))
|
||||||
|
sample_page = st.number_input(
|
||||||
|
"Sample page",
|
||||||
|
min_value=1,
|
||||||
|
max_value=max_page,
|
||||||
|
value=min(sample_page, max_page),
|
||||||
|
step=1,
|
||||||
|
help="Pick a page that contains the transactions table.",
|
||||||
|
)
|
||||||
|
tpl.setdefault("visual", {})["sample_page"] = int(sample_page)
|
||||||
|
|
||||||
|
try:
|
||||||
|
pil_image, scale = render_page_image(pdf_bytes, int(sample_page))
|
||||||
|
except Exception as e:
|
||||||
|
st.error(f"Couldn't render page {sample_page}: {type(e).__name__}: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
|
tpl["visual"]["page_width"] = pil_image.width / scale
|
||||||
|
tpl["visual"]["page_height"] = pil_image.height / scale
|
||||||
|
|
||||||
|
c_left, c_right = st.columns([2, 1])
|
||||||
|
with c_right:
|
||||||
|
st.markdown("**How to use**")
|
||||||
|
st.caption(
|
||||||
|
"• **Lines** mode: drag short vertical strokes where you "
|
||||||
|
"want columns to split. Each stroke contributes one "
|
||||||
|
"x-boundary.\n"
|
||||||
|
"• **Rect** mode: drag a box around the transactions "
|
||||||
|
"table to crop the working region.\n"
|
||||||
|
"• Use the trash icon (top-right of the canvas) to "
|
||||||
|
"remove the last shape, or the X to clear all."
|
||||||
|
)
|
||||||
|
drawing_mode = st.radio(
|
||||||
|
"Draw",
|
||||||
|
["line", "rect", "transform"],
|
||||||
|
horizontal=True,
|
||||||
|
help=(
|
||||||
|
"transform lets you move/resize already-drawn shapes."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
initial_objects = _boundaries_to_canvas_lines(
|
||||||
|
tpl["table"].get("column_boundaries", []),
|
||||||
|
scale=scale,
|
||||||
|
image_height=pil_image.height,
|
||||||
|
)
|
||||||
|
bbox = tpl["visual"].get("table_bbox")
|
||||||
|
if bbox:
|
||||||
|
initial_objects.append(_bbox_to_canvas_rect(bbox, scale))
|
||||||
|
|
||||||
|
with c_left:
|
||||||
|
canvas_state = st_canvas(
|
||||||
|
fill_color="rgba(255, 165, 0, 0.15)",
|
||||||
|
stroke_width=2,
|
||||||
|
stroke_color="#d62728",
|
||||||
|
background_image=pil_image,
|
||||||
|
update_streamlit=True,
|
||||||
|
height=pil_image.height,
|
||||||
|
width=pil_image.width,
|
||||||
|
drawing_mode=drawing_mode,
|
||||||
|
initial_drawing={"version": "4.4.0", "objects": initial_objects},
|
||||||
|
key=f"pdf_canvas_p{sample_page}",
|
||||||
|
)
|
||||||
|
|
||||||
|
new_bounds, new_bbox = _harvest_canvas(canvas_state, scale)
|
||||||
|
if new_bounds is not None:
|
||||||
|
tpl["table"]["column_boundaries"] = new_bounds
|
||||||
|
if new_bbox is not None:
|
||||||
|
tpl["visual"]["table_bbox"] = new_bbox
|
||||||
|
|
||||||
|
if tpl["table"].get("column_boundaries"):
|
||||||
|
st.caption(
|
||||||
|
"Boundaries (PDF pts): "
|
||||||
|
+ ", ".join(
|
||||||
|
f"{b:.0f}" for b in tpl["table"]["column_boundaries"]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _boundaries_to_canvas_lines(
|
||||||
|
boundaries: list[float],
|
||||||
|
*,
|
||||||
|
scale: float,
|
||||||
|
image_height: int,
|
||||||
|
) -> list[dict]:
|
||||||
|
"""Seed the canvas with full-height vertical lines for any
|
||||||
|
boundaries already on the template, so the user sees their
|
||||||
|
saved state when re-entering build mode."""
|
||||||
|
out: list[dict] = []
|
||||||
|
for b in boundaries:
|
||||||
|
x_px = float(b) * scale
|
||||||
|
out.append({
|
||||||
|
"type": "line",
|
||||||
|
"left": x_px,
|
||||||
|
"top": 0,
|
||||||
|
"width": 0,
|
||||||
|
"height": image_height,
|
||||||
|
"x1": 0, "y1": 0,
|
||||||
|
"x2": 0, "y2": image_height,
|
||||||
|
"stroke": "#1f77b4",
|
||||||
|
"strokeWidth": 2,
|
||||||
|
"fill": "#1f77b4",
|
||||||
|
"selectable": True,
|
||||||
|
})
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _bbox_to_canvas_rect(bbox: list[float], scale: float) -> dict:
|
||||||
|
x0, top, x1, bottom = bbox
|
||||||
|
return {
|
||||||
|
"type": "rect",
|
||||||
|
"left": x0 * scale,
|
||||||
|
"top": top * scale,
|
||||||
|
"width": (x1 - x0) * scale,
|
||||||
|
"height": (bottom - top) * scale,
|
||||||
|
"stroke": "#d62728",
|
||||||
|
"strokeWidth": 1,
|
||||||
|
"fill": "rgba(255, 165, 0, 0.10)",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _harvest_canvas(canvas_state, scale: float):
|
||||||
|
"""Pull boundaries + bbox out of a ``st_canvas`` return value.
|
||||||
|
|
||||||
|
Returns ``(boundaries_or_None, bbox_or_None)`` where ``None``
|
||||||
|
means "no change" (so the existing template values stay put)."""
|
||||||
|
if canvas_state is None or canvas_state.json_data is None:
|
||||||
|
return None, None
|
||||||
|
objects = canvas_state.json_data.get("objects") or []
|
||||||
|
|
||||||
|
bounds: list[float] = []
|
||||||
|
bbox: list[float] | None = None
|
||||||
|
for obj in objects:
|
||||||
|
kind = obj.get("type")
|
||||||
|
left = float(obj.get("left", 0))
|
||||||
|
width = float(obj.get("width", 0))
|
||||||
|
if kind == "line":
|
||||||
|
# Take the line's x-midpoint as the boundary x-position.
|
||||||
|
bounds.append((left + width / 2) / scale)
|
||||||
|
elif kind == "rect":
|
||||||
|
top = float(obj.get("top", 0))
|
||||||
|
height = float(obj.get("height", 0))
|
||||||
|
bbox = [
|
||||||
|
left / scale,
|
||||||
|
top / scale,
|
||||||
|
(left + width) / scale,
|
||||||
|
(top + height) / scale,
|
||||||
|
]
|
||||||
|
return sorted(bounds), bbox
|
||||||
|
|
||||||
|
|
||||||
def _render_build_form(tpl: dict) -> None:
|
def _render_build_form(tpl: dict) -> None:
|
||||||
"""Render every editable field on the template, in tabs."""
|
"""Render every editable field on the template, in tabs."""
|
||||||
t1, t2, t3, t4 = st.tabs(["Pages & table", "Columns", "Parsing", "Save"])
|
t0, t1, t2, t3, t4 = st.tabs(
|
||||||
|
["Visual picker", "Pages & table", "Columns", "Parsing", "Save"]
|
||||||
|
)
|
||||||
|
|
||||||
|
with t0:
|
||||||
|
_render_visual_picker(tpl)
|
||||||
|
|
||||||
with t1:
|
with t1:
|
||||||
c1, c2 = st.columns(2)
|
c1, c2 = st.columns(2)
|
||||||
|
|||||||
@@ -512,6 +512,39 @@ def ocr_available() -> tuple[bool, str]:
|
|||||||
return True, ""
|
return True, ""
|
||||||
|
|
||||||
|
|
||||||
|
def render_page_image(
|
||||||
|
pdf_bytes: bytes,
|
||||||
|
page_no: int,
|
||||||
|
*,
|
||||||
|
target_width: int = 900,
|
||||||
|
) -> tuple["Any", float]:
|
||||||
|
"""Rasterize one page of *pdf_bytes* (1-indexed) to a PIL image.
|
||||||
|
|
||||||
|
Returns ``(pil_image, scale)`` where ``scale`` is the
|
||||||
|
pixels-per-PDF-point factor. The caller uses ``scale`` to map
|
||||||
|
canvas coordinates (pixels) back to PDF coordinates (points).
|
||||||
|
|
||||||
|
``target_width`` caps the rendered width so the image is a
|
||||||
|
sensible size for the visual picker — bank statements at 100%
|
||||||
|
can be 800–1200 pts wide; we want ~900px on screen.
|
||||||
|
"""
|
||||||
|
import pypdfium2 as pdfium
|
||||||
|
|
||||||
|
pdf = pdfium.PdfDocument(pdf_bytes)
|
||||||
|
try:
|
||||||
|
idx = max(0, min(page_no - 1, len(pdf) - 1))
|
||||||
|
page = pdf[idx]
|
||||||
|
# Width in PDF points → pixels-per-point scale.
|
||||||
|
pdf_width = page.get_width()
|
||||||
|
scale = target_width / pdf_width if pdf_width else 2.0
|
||||||
|
# Cap scale so big A3-style scans don't blow up.
|
||||||
|
scale = min(scale, 3.0)
|
||||||
|
bitmap = page.render(scale=scale)
|
||||||
|
return bitmap.to_pil(), scale
|
||||||
|
finally:
|
||||||
|
pdf.close()
|
||||||
|
|
||||||
|
|
||||||
def ocr_pdf_to_pages(pdf_bytes: bytes, dpi: int = 200) -> list[Page]:
|
def ocr_pdf_to_pages(pdf_bytes: bytes, dpi: int = 200) -> list[Page]:
|
||||||
"""Run Tesseract over each page of *pdf_bytes* and return a
|
"""Run Tesseract over each page of *pdf_bytes* and return a
|
||||||
word-position-rich ``Page`` list, parallel to ``extract_pages``.
|
word-position-rich ``Page`` list, parallel to ``extract_pages``.
|
||||||
|
|||||||
Reference in New Issue
Block a user