diff --git a/src/gui/pages/10_PDF_Extractor.py b/src/gui/pages/10_PDF_Extractor.py
index 611f2c0..cd81750 100644
--- a/src/gui/pages/10_PDF_Extractor.py
+++ b/src/gui/pages/10_PDF_Extractor.py
@@ -31,7 +31,12 @@ if str(_project_root) not in sys.path:
 
 from src.audit import log_event, log_page_open
 from src.gui.components import hide_streamlit_chrome, render_sticky_footer
-from src.pdf_extract import apply_template, extract_pages_auto, render_page_image
+from src.pdf_extract import (
+    apply_template,
+    extract_pages_auto,
+    ocr_available,
+    render_page_image,
+)
 from src.pdf_templates import (
     SCHEMA_VERSION,
     VALID_TARGETS,
@@ -89,12 +94,31 @@ st.caption(
     "every statement that follows the same layout."
 )
 
-mode = st.radio(
-    "Mode",
-    ["Extract", "Build template", "Manage templates"],
-    horizontal=True,
-    key=K_MODE,
-)
+_ocr_ok, _ocr_reason = ocr_available()
+c_mode, c_ocr = st.columns([3, 2])
+with c_mode:
+    mode = st.radio(
+        "Mode",
+        ["Extract", "Build template", "Manage templates"],
+        horizontal=True,
+        key=K_MODE,
+        label_visibility="collapsed",
+    )
+with c_ocr:
+    if _ocr_ok:
+        st.caption("**OCR:** ready · scanned pages will be transcribed.")
+    else:
+        with st.expander("**OCR:** unavailable", expanded=False):
+            st.caption(
+                f"Reason: {_ocr_reason or 'unknown'}. Scanned (image-based) "
+                "statements will fall through with warnings. "
+                "To enable OCR, install Tesseract on this machine — "
+                "[Windows](https://github.com/UB-Mannheim/tesseract/wiki) · "
+                "macOS: ``brew install tesseract`` · "
+                "Linux: ``apt install tesseract-ocr``. "
+                "Modern text-based statements don't need OCR."
+            )
+
 st.divider()
 
 
@@ -127,7 +151,7 @@ def _render_extract_mode() -> None:
         ),
     )
 
-    c1, c2 = st.columns(2)
+    c1, c2, c3 = st.columns(3)
     sort_by_date = c1.checkbox(
         "Sort combined output by date",
         value=True,
@@ -146,6 +170,15 @@ def _render_extract_mode() -> None:
             "back into separate ledgers."
         ),
     )
+    use_ocr = c3.checkbox(
+        "Use OCR for scanned pages",
+        value=_ocr_ok,
+        disabled=not _ocr_ok,
+        help=(
+            "When a page has no extractable text (typically a scan), "
+            "OCR it with Tesseract. Disabled when OCR isn't installed."
+        ),
+    )
 
     run = st.button("Extract", type="primary", disabled=not uploads)
     if run and uploads:
@@ -166,7 +199,9 @@ def _render_extract_mode() -> None:
                 st.write(f"**{i}/{len(uploads)}** · {up.name}")
                 try:
                     pdf_bytes = up.read()
-                    pages, warns = extract_pages_auto(pdf_bytes, allow_ocr=True)
+                    pages, warns = extract_pages_auto(
+                        pdf_bytes, allow_ocr=use_ocr,
+                    )
                     df = apply_template(pages, tpl)
                     df.insert(0, "source_file", up.name)
                     per_file_frames.append(df)