diff --git a/src/gui/pages/10_PDF_Extractor.py b/src/gui/pages/10_PDF_Extractor.py
index e268ddb..7033a60 100644
--- a/src/gui/pages/10_PDF_Extractor.py
+++ b/src/gui/pages/10_PDF_Extractor.py
@@ -23,6 +23,7 @@ from src.audit import log_event, log_page_open
 from src.gui.components import hide_streamlit_chrome, render_sticky_footer
 from src.pdf_extract import (
     PdfDependencyMissing,
+    diagnose_pdf_lines,
     ocr_available,
     scan_pdf_for_transactions,
 )
@@ -58,6 +59,7 @@ render_sticky_footer()
 K_ROWS = "pdf_scan_rows"
 K_WARNINGS = "pdf_scan_warnings"
 K_SOURCE_COUNT = "pdf_scan_source_count"
+K_DIAGNOSTIC = "pdf_scan_diagnostic"
 
 
 # ---------------------------------------------------------------------------
@@ -130,6 +132,9 @@ scan_clicked = st.button(
 if scan_clicked and uploads:
     all_rows: list[dict] = []
     all_warnings: list[str] = []
+    # Cache the raw bytes per file so the diagnostic expander can
+    # re-extract lines without asking the user to re-upload.
+    cached_bytes: list[tuple[str, bytes]] = []
     with st.status(
         f"Scanning {len(uploads)} file(s)…",
         expanded=True,
@@ -137,8 +142,10 @@ if scan_clicked and uploads:
         for i, up in enumerate(uploads, start=1):
             st.write(f"**{i}/{len(uploads)}** · {up.name}")
             try:
+                raw = up.read()
+                cached_bytes.append((up.name, raw))
                 rows, warns = scan_pdf_for_transactions(
-                    up.read(),
+                    raw,
                     negative_in_parens=negative_in_parens,
                     allow_ocr=use_ocr,
                 )
@@ -164,6 +171,7 @@ if scan_clicked and uploads:
     st.session_state[K_ROWS] = all_rows
     st.session_state[K_WARNINGS] = all_warnings
     st.session_state[K_SOURCE_COUNT] = len(uploads)
+    st.session_state[K_DIAGNOSTIC] = cached_bytes
 
     log_event(
         "tool_run",
@@ -197,10 +205,53 @@ if rows is None:
 elif not rows:
     st.info(
         "No transaction rows detected. The scanner looks for lines "
-        "containing a date and at least one amount. Check the "
-        "warnings expander above for clues — most often the PDF is "
-        "scanned (image-only) and OCR isn't available."
+        "containing a date and at least one amount. The diagnostic "
+        "below shows every line the PDF reader could see — use the "
+        "``has_date`` and ``has_amount`` columns to spot which "
+        "pieces are missing (usually one or the other)."
     )
+    cached_bytes = st.session_state.get(K_DIAGNOSTIC) or []
+    if cached_bytes:
+        with st.expander(
+            "Diagnostic: what the scanner saw",
+            expanded=True,
+        ):
+            for fname, raw in cached_bytes:
+                st.markdown(f"**{fname}**")
+                try:
+                    lines, dwarns = diagnose_pdf_lines(
+                        raw, allow_ocr=use_ocr, max_lines=200,
+                    )
+                except Exception as e:
+                    st.error(f"Diagnostic failed: {type(e).__name__}: {e}")
+                    continue
+                for w in dwarns:
+                    st.caption(w)
+                if not lines:
+                    st.warning(
+                        "Zero text lines extracted. This is almost "
+                        "certainly a scanned (image-based) PDF — "
+                        "enable OCR in Scan options if available."
+                    )
+                    continue
+                st.dataframe(
+                    pd.DataFrame(lines),
+                    hide_index=True,
+                    use_container_width=True,
+                    height=400,
+                )
+                date_hits = sum(1 for ln in lines if ln["has_date"])
+                amt_hits = sum(1 for ln in lines if ln["has_amount"])
+                both = sum(
+                    1 for ln in lines
+                    if ln["has_date"] and ln["has_amount"]
+                )
+                st.caption(
+                    f"{len(lines):,} lines · {date_hits:,} look like "
+                    f"they contain a date · {amt_hits:,} look like "
+                    f"they contain an amount · {both:,} have both "
+                    "(those are the rows the scanner would have kept)."
+                )
 
 else:
     df = pd.DataFrame(rows)
diff --git a/src/pdf_extract.py b/src/pdf_extract.py
index 4c375e4..b709d07 100644
--- a/src/pdf_extract.py
+++ b/src/pdf_extract.py
@@ -98,7 +98,7 @@ class Page:
 # ---------------------------------------------------------------------------
 
 
-_DATE_RES = [
+_DATE_RES_FULL = [
     re.compile(r"\b(\d{1,2}/\d{1,2}/\d{2,4})\b"),
     re.compile(r"\b(\d{1,2}-\d{1,2}-\d{2,4})\b"),
     re.compile(r"\b(\d{4}-\d{2}-\d{2})\b"),
@@ -106,6 +106,19 @@ _DATE_RES = [
     re.compile(r"\b(\d{1,2}\s+[A-Z][a-z]{2}\s+\d{2,4})\b"),
 ]
 
+# Short-date patterns (no year). Many bank statements show dates as
+# ``MM/DD`` or ``Jan 13`` because the year is implied by the
+# statement period. Tried only after the full-year patterns fail
+# so a string like "1/2 cup" in a memo can't claim to be a date
+# when a real dated transaction was already matched on the same row.
+_DATE_RES_SHORT = [
+    re.compile(r"\b(\d{1,2}/\d{1,2})(?!\d)"),
+    re.compile(r"\b(\d{1,2}-\d{1,2})(?!\d)"),
+    re.compile(r"\b([A-Z][a-z]{2}\s+\d{1,2})(?!\d)"),
+]
+
+_DATE_RES = _DATE_RES_FULL + _DATE_RES_SHORT
+
 _DATE_FORMATS_FALLBACK = [
     "%m/%d/%Y", "%m/%d/%y", "%Y-%m-%d", "%d/%m/%Y", "%d/%m/%y",
     "%b %d %Y", "%b %d, %Y", "%d %b %Y", "%d-%b-%Y",
@@ -427,21 +440,45 @@ def extract_pages_auto(
 
 def _find_dates_in_words(
     row_words: list[WordBox],
-) -> list[tuple[int, str]]:
-    """Return ``[(word_index, date_text)]`` for the first date-like
-    substring on this row, or ``[]`` if none. The index lets the
-    caller exclude the date words from the description text.
+) -> list[tuple[int, int, str]]:
+    """Return ``[(start_idx, end_idx, date_text)]`` for the first
+    date-like substring on this row, or ``[]`` if none.
 
-    Multi-word formats like ``Jan 15, 2026`` are handled by stitching
-    up to three adjacent words before matching.
+    Two-pass search:
+
+    - **Pass 1** — full-year patterns (``01/15/2026``,
+      ``Jan 13, 2026``). Tries the longest window first within
+      this pass so a multi-word ``Jan 15, 2026`` isn't truncated
+      to ``Jan 15``.
+    - **Pass 2** — short patterns (``01/13``, ``Jan 13``). Only
+      runs if pass 1 found nothing — otherwise a stray
+      ``Page 1/2`` on the same line could shadow the real dated
+      transaction.
+
+    ``end_idx`` is exclusive — caller uses ``range(start, end)``
+    to exclude all words the date consumed from the description
+    (the previous single-index return mis-attributed the day
+    token of multi-word dates like ``Jan 13`` to the description).
     """
-    for i, w in enumerate(row_words):
-        for window in (3, 2, 1):
-            chunk = " ".join(x.text for x in row_words[i : i + window])
-            for rx in _DATE_RES:
-                m = rx.search(chunk)
-                if m:
-                    return [(i, m.group(1))]
+    for patterns, window_order in (
+        (_DATE_RES_FULL, (3, 2, 1)),
+        (_DATE_RES_SHORT, (2, 1)),
+    ):
+        for i in range(len(row_words)):
+            for window in window_order:
+                end = i + window
+                if end > len(row_words):
+                    continue
+                chunk = " ".join(x.text for x in row_words[i:end])
+                for rx in patterns:
+                    m = rx.search(chunk)
+                    if m:
+                        # Count whitespace-separated tokens in the
+                        # MATCH, not in the window — the window may
+                        # have included extra trailing words the
+                        # regex didn't actually consume.
+                        consumed = max(1, len(m.group(1).split()))
+                        return [(i, i + consumed, m.group(1))]
     return []
 
 
@@ -469,18 +506,23 @@ def _find_amount_tokens(
 
 def _description_from_row(
     row_words: list[WordBox],
-    date_idx: int,
+    date_range: tuple[int, int],
     amount_idxs: set[int],
 ) -> str:
     """Stitch the description from the row's non-date, non-amount
-    tokens. Keeps tokens before the first amount and after the last
-    amount (trailing check numbers and memos); drops words between
+    tokens. ``date_range`` is ``(start, end)`` exclusive — every
+    word in that range is excluded so multi-word dates like
+    ``Jan 13`` don't leak the day token into the description.
+
+    Keeps tokens before the first amount and after the last
+    amount (trailing check numbers, memos); drops words between
     amount tokens (usually whitespace artifacts in column gaps)."""
+    date_start, date_end = date_range
     keep: list[str] = []
     seen_first_amount = False
     last_amount_idx = max(amount_idxs) if amount_idxs else -1
     for i, w in enumerate(row_words):
-        if i == date_idx:
+        if date_start <= i < date_end:
             continue
         if i in amount_idxs:
             seen_first_amount = True
@@ -552,9 +594,11 @@ def scan_pdf_for_transactions(
                     )
                 continue
 
-            date_idx, date_text = dates[0]
+            date_start, date_end, date_text = dates[0]
             amount_idxs = {idx for idx, _, _ in amount_tokens}
-            desc = _description_from_row(row_words, date_idx, amount_idxs)
+            desc = _description_from_row(
+                row_words, (date_start, date_end), amount_idxs,
+            )
 
             record: dict[str, Any] = {
                 "date": parse_date(date_text, date_formats) or date_text,
@@ -578,11 +622,58 @@ def scan_pdf_for_transactions(
     return out_rows, warnings
 
 
+def diagnose_pdf_lines(
+    pdf_bytes: bytes,
+    *,
+    allow_ocr: bool = True,
+    max_lines: int = 200,
+) -> tuple[list[dict[str, Any]], list[str]]:
+    """Dump every clustered text line from a PDF for diagnosis.
+
+    Surfaces what the scanner actually saw — including lines the
+    detector dropped because they lacked a date or amount. Use
+    when ``scan_pdf_for_transactions`` returns 0 rows so the user
+    can spot what's wrong (no extractable text → scanned PDF /
+    weird date format / amounts in a column the regex misses).
+
+    Returns ``(lines, warnings)`` where each line is::
+
+        {"page": int, "text": str,
+         "has_date": bool, "has_amount": bool}
+
+    Capped at *max_lines* across all pages so a 100-page statement
+    doesn't dump 10,000 rows into the UI.
+    """
+    pages, warnings = extract_pages_auto(pdf_bytes, allow_ocr=allow_ocr)
+    out: list[dict[str, Any]] = []
+    for page in pages:
+        rows = cluster_rows(page.words)
+        for row_words in rows:
+            text = " ".join(w.text for w in row_words).strip()
+            if not text:
+                continue
+            out.append({
+                "page": page.page_no,
+                "text": text,
+                "has_date": bool(_find_dates_in_words(row_words)),
+                "has_amount": bool(_find_amount_tokens(row_words)),
+            })
+            if len(out) >= max_lines:
+                warnings.append(
+                    f"Diagnostic capped at {max_lines} lines. "
+                    "Larger PDFs aren't fully shown here — the full "
+                    "scan still runs in Scan mode."
+                )
+                return out, warnings
+    return out, warnings
+
+
 __all__ = [
     "PdfDependencyMissing",
     "Page",
     "WordBox",
     "cluster_rows",
+    "diagnose_pdf_lines",
     "extract_pages",
     "extract_pages_auto",
     "ocr_available",
diff --git a/tests/test_pdf_extract.py b/tests/test_pdf_extract.py
index 93abb2f..caec485 100644
--- a/tests/test_pdf_extract.py
+++ b/tests/test_pdf_extract.py
@@ -111,23 +111,54 @@ class TestClusterRows:
 
 
 class TestFindDatesInWords:
+    """Returns ``[(start, end, text)]`` — end is exclusive index of
+    words the date consumed."""
+
     def test_us_slash(self):
         row = [_w("01/15/2026", 0, 0), _w("Coffee", 100, 0)]
-        assert _find_dates_in_words(row) == [(0, "01/15/2026")]
+        assert _find_dates_in_words(row) == [(0, 1, "01/15/2026")]
 
     def test_two_digit_year(self):
         row = [_w("01/15/26", 0, 0), _w("Foo", 100, 0)]
         result = _find_dates_in_words(row)
-        assert result and result[0][1] == "01/15/26"
+        assert result and result[0][2] == "01/15/26"
 
     def test_iso(self):
         row = [_w("2026-01-15", 0, 0), _w("Tx", 100, 0)]
-        assert _find_dates_in_words(row) == [(0, "2026-01-15")]
+        assert _find_dates_in_words(row) == [(0, 1, "2026-01-15")]
 
-    def test_month_name(self):
+    def test_month_name_with_year_consumes_three_words(self):
         row = [_w("Jan", 0, 0), _w("15,", 25, 0), _w("2026", 50, 0)]
         result = _find_dates_in_words(row)
-        assert result and "Jan 15" in result[0][1]
+        assert result and "Jan 15" in result[0][2]
+        # Date consumes all 3 words so they don't leak to description.
+        assert result[0][1] == 3
+
+    def test_short_us_date_no_year(self):
+        """Chase-style ``01/13`` without a year still detects."""
+        row = [_w("01/13", 0, 0), _w("Coffee", 100, 0), _w("$4.50", 200, 0)]
+        result = _find_dates_in_words(row)
+        assert result and result[0][2] == "01/13"
+        assert result[0][1] == 1  # one word consumed
+
+    def test_short_month_name_no_year_consumes_two_words(self):
+        row = [_w("Jan", 0, 0), _w("13", 30, 0), _w("Coffee", 100, 0)]
+        result = _find_dates_in_words(row)
+        assert result
+        assert "Jan 13" in result[0][2]
+        assert result[0][1] == 2  # "Jan" + "13" both consumed
+
+    def test_short_pattern_does_not_shadow_full_year(self):
+        """If a full-year date is present, short patterns shouldn't
+        steal — e.g. ``Page 1/2 of 3 ... 01/13/2026 Coffee`` should
+        return the real ``01/13/2026``, not the ``1/2`` page marker."""
+        row = [
+            _w("Page", 0, 0), _w("1/2", 40, 0), _w("of", 80, 0),
+            _w("3", 100, 0),
+            _w("01/13/2026", 200, 0), _w("Coffee", 300, 0),
+        ]
+        result = _find_dates_in_words(row)
+        assert result and result[0][2] == "01/13/2026"
 
     def test_no_date(self):
         row = [_w("Just", 0, 0), _w("text", 50, 0)]