diff --git a/src/cli_text_clean.py b/src/cli_text_clean.py
index a234166..bc5c163 100644
--- a/src/cli_text_clean.py
+++ b/src/cli_text_clean.py
@@ -280,6 +280,10 @@ def clean(
             encoding=encoding_override,
             header_row=header_row,
             sheet_name=sheet_arg if sheet_arg is not None else 0,
+            # Bypass byte-level repair so the user's preset/flag choices
+            # remain authoritative. The cell-level cleaner does the
+            # smart-quote / NUL / BOM work itself.
+            repair=False,
         )
         if not isinstance(df, pd.DataFrame):
             df = pd.concat(list(df), ignore_index=True)
diff --git a/src/core/io.py b/src/core/io.py
index d668e45..dd45b87 100644
--- a/src/core/io.py
+++ b/src/core/io.py
@@ -137,6 +137,7 @@ def read_file(
     header_row: Optional[int] = None,
     sheet_name: Optional[str | int] = 0,
     chunk_size: Optional[int] = None,
+    repair: bool = True,
 ) -> pd.DataFrame | Generator[pd.DataFrame, None, None]:
     """Read a CSV, TSV, or Excel file into a DataFrame.
 
@@ -147,7 +148,13 @@ def read_file(
     delimiter : override detected delimiter (CSV only)
     header_row : 0-based row index for the header; auto-detected if *None*
     sheet_name : Excel sheet (name or 0-based index). Ignored for CSV.
-    chunk_size : if set, return a generator of DataFrames (CSV only).
+    chunk_size : if set, return a generator of DataFrames (CSV only). When
+        *chunk_size* is set, *repair* is forced off because the pre-parse
+        pass loads the entire file into memory.
+    repair : run :func:`repair_bytes` over the raw CSV before parsing
+        (default ``True``). Excel files always skip this step. Pass
+        ``repair=False`` when you specifically need pandas' raw view of
+        the input.
 
     Returns a DataFrame (or generator when *chunk_size* is set).
     """
@@ -165,6 +172,7 @@ def read_file(
             delimiter=delimiter,
             header_row=header_row,
             chunk_size=chunk_size,
+            repair=repair,
         )
 
 
@@ -175,15 +183,56 @@ def _read_csv(
     delimiter: Optional[str] = None,
     header_row: Optional[int] = None,
     chunk_size: Optional[int] = None,
+    repair: bool = True,
 ) -> pd.DataFrame | Generator[pd.DataFrame, None, None]:
     enc = encoding or detect_encoding(path)
     delim = delimiter or detect_delimiter(path, enc)
     hdr = header_row if header_row is not None else detect_header_row(path, enc, delim)
 
-    logger.debug("Reading CSV {} (encoding={}, delimiter={!r}, header_row={})",
-                 path.name, enc, delim, hdr)
+    logger.debug(
+        "Reading CSV {} (encoding={}, delimiter={!r}, header_row={}, repair={})",
+        path.name, enc, delim, hdr, repair,
+    )
 
-    kwargs: dict = dict(
+    if chunk_size:
+        # Streaming reads can't share memory with the repair pass; fall back
+        # to direct pandas read so chunked workflows on huge files still
+        # work.
+        return pd.read_csv(
+            filepath_or_buffer=path,
+            encoding=enc,
+            delimiter=delim,
+            header=hdr,
+            dtype=str,
+            keep_default_na=False,
+            on_bad_lines="warn",
+            chunksize=chunk_size,
+        )
+
+    if repair:
+        raw = path.read_bytes()
+        repair_result = repair_bytes(raw, encoding=enc, delimiter=delim)
+        if repair_result.changed:
+            logger.info(
+                "Pre-parse repair on {}: {}", path.name, repair_result.summary(),
+            )
+        if repair_result.unrepairable_lines:
+            logger.warning(
+                "Pre-parse repair on {}: {} unrepairable line(s) at {}",
+                path.name, len(repair_result.unrepairable_lines),
+                repair_result.unrepairable_lines[:10],
+            )
+        return pd.read_csv(
+            io.BytesIO(repair_result.repaired_bytes),
+            encoding="utf-8",
+            delimiter=delim,
+            header=hdr,
+            dtype=str,
+            keep_default_na=False,
+            on_bad_lines="warn",
+        )
+
+    return pd.read_csv(
         filepath_or_buffer=path,
         encoding=enc,
         delimiter=delim,
@@ -193,11 +242,6 @@ def _read_csv(
         on_bad_lines="warn",
     )
 
-    if chunk_size:
-        return pd.read_csv(**kwargs, chunksize=chunk_size)
-
-    return pd.read_csv(**kwargs)
-
 
 def _read_excel(
     path: Path,
diff --git a/tests/test_io.py b/tests/test_io.py
index 598b5ae..514b6d4 100644
--- a/tests/test_io.py
+++ b/tests/test_io.py
@@ -207,6 +207,40 @@ class TestRepairBytes:
         assert summary.get("strip_nul") == 1
 
 
+class TestReadFileWithRepair:
+    """``read_file(repair=True)`` (default) routes CSV through repair_bytes."""
+
+    def test_default_strips_bom_via_repair(self, tmp_path):
+        f = tmp_path / "bom.csv"
+        f.write_bytes(b"\xef\xbb\xbfid,name\n1,Alice\n")
+        df = read_file(f)
+        # First column header must be 'id', not '﻿id'.
+        assert list(df.columns)[0] == "id"
+
+    def test_default_folds_smart_double_quotes(self, tmp_path):
+        # Curly quotes are *unquoted* here — outer ASCII quotes would create
+        # a CSV-quoting collision once the fold runs.
+        f = tmp_path / "quoted.csv"
+        f.write_bytes("id,note\n1,curly “hello” world\n".encode("utf-8"))
+        df = read_file(f)
+        assert df.iloc[0]["note"] == 'curly "hello" world'
+
+    def test_repair_false_preserves_smart_quotes(self, tmp_path):
+        f = tmp_path / "quoted.csv"
+        f.write_bytes("id,note\n1,curly “hello” world\n".encode("utf-8"))
+        df = read_file(f, repair=False)
+        assert "“" in df.iloc[0]["note"] or "”" in df.iloc[0]["note"]
+
+    def test_chunked_read_skips_repair(self, tmp_path):
+        # Chunked reads bypass repair (memory budget). Verify they still work.
+        rows = "id,name\n" + "\n".join(f"{i},Alice" for i in range(1, 21))
+        f = tmp_path / "chunked.csv"
+        f.write_text(rows)
+        chunks = list(read_file(f, chunk_size=5))
+        total = sum(len(c) for c in chunks)
+        assert total == 20
+
+
 class TestReadCsvRepaired:
     def test_recovers_malformed_currency_row(self, tmp_path):
         f = tmp_path / "bad.csv"