"""Single-command release builder for DataTools. PyInstaller can't cross-compile — to produce a Windows .exe you run this on Windows, for a Mac .dmg you run it on macOS, for a Linux AppImage you run it on Linux. One script, one OS at a time. What this script does (in order): 1. Preflight — checks PyInstaller, Pillow, and the platform's packager (Inno Setup on Win / hdiutil + ditto on Mac / appimagetool on Linux) are reachable. Bails with install instructions if anything is missing. 2. Generates icon.ico / icon.icns / icon.png from the PNG asset. 3. Runs PyInstaller against build/datatools.spec. 4. Wraps the PyInstaller output into: * Windows: DataTools--win-setup.exe (Inno Setup) + DataTools--win-portable.zip * macOS: DataTools--mac.dmg + DataTools--mac-portable.zip * Linux: DataTools--linux-x86_64.AppImage 5. Prints what landed in dist/ and the byte sizes. Usage: python build/make_release.py # build everything for this OS python build/make_release.py --preflight # check tooling, don't build python build/make_release.py --skip-installer # only the portable zip python build/make_release.py --skip-portable # only the installer python build/make_release.py --clean # wipe dist/ first Run from the repo root or from build/ — either works. """ from __future__ import annotations import argparse import os import platform import re import shutil import subprocess import sys import urllib.request from pathlib import Path REPO = Path(__file__).resolve().parent.parent BUILD = REPO / "build" DIST = REPO / "dist" # Tesseract bundling. The runtime discovery code in # ``src/pdf_extract.py`` looks for the binary at # ``Path(sys._MEIPASS) / "tesseract" / "tesseract[.exe]"`` and tessdata # at ``... / "tesseract" / "tessdata" / "eng.traineddata"``. We stage # everything under ``build/_tesseract//`` (gitignored) and # the PyInstaller spec adds that staging dir to ``datas=`` so it lands # at the right place inside the frozen bundle. TESSERACT_VERSION = "5.5.0" TESSDATA_DIR = BUILD / "vendor" / "tessdata" TESSDATA_URL = ( "https://github.com/tesseract-ocr/tessdata_best/raw/main/eng.traineddata" ) TESSERACT_STAGING = BUILD / "_tesseract" # --------------------------------------------------------------------------- # Output helpers — colourless so logs stay readable in any terminal/CI tail. # --------------------------------------------------------------------------- def _step(msg: str) -> None: print(f"\n==> {msg}", flush=True) def _ok(msg: str) -> None: print(f" ok: {msg}", flush=True) def _warn(msg: str) -> None: print(f" warn: {msg}", flush=True) def _err(msg: str) -> None: print(f" ERROR: {msg}", file=sys.stderr, flush=True) def _run(cmd: list[str], cwd: Path | None = None, env: dict | None = None) -> None: """Run *cmd*, stream output, exit on failure with a useful banner.""" printable = " ".join(map(str, cmd)) print(f" $ {printable}", flush=True) try: subprocess.run(cmd, check=True, cwd=cwd or REPO, env=env) except subprocess.CalledProcessError as e: _err(f"command failed (exit {e.returncode}): {printable}") sys.exit(e.returncode) except FileNotFoundError: _err(f"command not found: {cmd[0]}") sys.exit(127) # --------------------------------------------------------------------------- # Platform detection # --------------------------------------------------------------------------- def _detect_platform() -> str: """Return ``win`` / ``mac`` / ``linux`` based on sys.platform.""" p = sys.platform if p.startswith("win"): return "win" if p == "darwin": return "mac" if p.startswith("linux"): return "linux" _err(f"unsupported platform {p!r}; this script handles win/mac/linux only.") sys.exit(2) # --------------------------------------------------------------------------- # Version — single source of truth in src/__init__.py # --------------------------------------------------------------------------- def _read_version() -> str: init_py = (REPO / "src" / "__init__.py").read_text(encoding="utf-8") m = re.search(r'__version__\s*=\s*["\']([^"\']+)["\']', init_py) if not m: _err("could not parse __version__ from src/__init__.py") sys.exit(1) return m.group(1) # --------------------------------------------------------------------------- # Preflight — check tooling before doing anything destructive # --------------------------------------------------------------------------- def _have_module(name: str) -> bool: try: __import__(name) return True except ImportError: return False def _have_command(name: str) -> bool: return shutil.which(name) is not None # Per-platform install hints. The error messages quote these so a buyer # building from source isn't left guessing what to install next. _INSTALL_HINTS = { "pyinstaller": "pip install pyinstaller", "pil": "pip install pillow", "iscc": "Inno Setup (Windows): https://jrsoftware.org/isdl.php — install, then re-open the shell so iscc lands on PATH.", "hdiutil": "ships with macOS — if it's missing your Mac install is broken.", "ditto": "ships with macOS — if it's missing your Mac install is broken.", "appimagetool": "Linux: download appimagetool-x86_64.AppImage from https://github.com/AppImage/AppImageKit/releases, chmod +x, drop on PATH.", } def preflight(target: str) -> None: """Verify every tool the target build needs is reachable; exit if not.""" _step(f"preflight ({target})") missing: list[tuple[str, str]] = [] # Python-side deps — same on every platform. The ``_INSTALL_HINTS`` # lookup uses lowercase keys so module name capitalization doesn't # need to match. for mod in ("PyInstaller", "PIL"): if not _have_module(mod): hint = _INSTALL_HINTS.get(mod.lower(), f"pip install {mod}") missing.append((mod.lower(), hint)) else: _ok(f"{mod} importable") # PyInstaller's CLI must also be reachable as a binary, not just as # an importable module — the spec is invoked via the ``pyinstaller`` # command. ``python -m PyInstaller`` is a fine fallback so don't # hard-fail if only the CLI binary is missing. if _have_command("pyinstaller"): _ok("pyinstaller on PATH") else: _warn("pyinstaller binary not on PATH — will fall back to `python -m PyInstaller`") # Platform-specific packagers. if target == "win": if _have_command("iscc"): _ok("Inno Setup (iscc) on PATH") else: missing.append(("iscc", _INSTALL_HINTS["iscc"])) elif target == "mac": for tool in ("hdiutil", "ditto"): if _have_command(tool): _ok(f"{tool} on PATH") else: missing.append((tool, _INSTALL_HINTS[tool])) elif target == "linux": if _have_command("appimagetool"): _ok("appimagetool on PATH") else: missing.append(("appimagetool", _INSTALL_HINTS["appimagetool"])) if missing: _err("missing prerequisites:") for name, hint in missing: print(f" - {name}: {hint}", file=sys.stderr) sys.exit(1) _ok("all prerequisites present") # --------------------------------------------------------------------------- # Tesseract bundling — fetch the binary + tessdata at build time. # # We download (not vendor) because: # * Binaries are large (5-40 MB per platform) and license-encumbered # to keep current in git. # * tessdata is Apache-2.0 and ~16 MB — fine to redistribute but # bloats clones for contributors who don't touch OCR. # # Caching layout: # build/_tesseract/win/tesseract.exe + DLLs # build/_tesseract/mac/tesseract + dylibs # build/_tesseract/linux/tesseract + libs # build/vendor/tessdata/eng.traineddata (shared across platforms) # # The PyInstaller spec reads ``build/_tesseract//`` and the # tessdata dir, then bundles them under ``/tesseract/``. # --------------------------------------------------------------------------- def _download(url: str, dest: Path, *, expected_min_bytes: int = 1024) -> None: """Download *url* to *dest* atomically. Sanity-check the size.""" dest.parent.mkdir(parents=True, exist_ok=True) tmp = dest.with_suffix(dest.suffix + ".part") print(f" GET {url}", flush=True) try: with urllib.request.urlopen(url, timeout=120) as r, open(tmp, "wb") as f: shutil.copyfileobj(r, f) except Exception as e: # noqa: BLE001 — bubble any network error up if tmp.exists(): tmp.unlink() _err(f"download failed: {url}\n {e}") raise size = tmp.stat().st_size if size < expected_min_bytes: tmp.unlink() raise RuntimeError( f"downloaded file too small ({size} bytes < {expected_min_bytes}); " f"the URL probably 404'd into an HTML error page." ) tmp.replace(dest) _ok(f"downloaded {dest.name} ({size / (1024 * 1024):.1f} MB)") def fetch_tessdata() -> Path: """Ensure ``build/vendor/tessdata/eng.traineddata`` exists; return its path. Shared across platforms. Downloaded once and cached. The runtime expects this file at ``/tesseract/tessdata/eng.traineddata``; the PyInstaller spec handles the placement. """ _step("fetch tessdata (eng.traineddata)") TESSDATA_DIR.mkdir(parents=True, exist_ok=True) target = TESSDATA_DIR / "eng.traineddata" if target.exists() and target.stat().st_size > 1_000_000: _ok(f"already cached: {target.relative_to(REPO)} " f"({target.stat().st_size / (1024 * 1024):.1f} MB)") return target # ~16 MB on disk for the "best" model. Allow some slack on the # min-bytes check (3 MB) so we still catch HTML 404 pages. _download(TESSDATA_URL, target, expected_min_bytes=3 * 1024 * 1024) return target def _fetch_tesseract_windows(staging: Path) -> None: """Stage tesseract.exe + DLLs into *staging*. Strategy (no easy stand-alone Windows tarball exists — UB-Mannheim ships the canonical Windows builds as Inno Setup installers): 1. Download the installer .exe from the UB-Mannheim mirror. 2. Extract it with 7-Zip (which can read Inno Setup archives via the {app} group). 7-Zip is preinstalled on ``windows-latest`` GitHub Actions runners (`C:\\Program Files\\7-Zip\\7z.exe`). 3. Copy tesseract.exe + every DLL + the tessdata dir from the extraction into ``staging/``. The DLL set tesseract.exe needs at runtime (per UB-Mannheim's Inno Setup script): libtesseract-5.dll, libleptonica-6.dll, libgomp-1.dll, libstdc++-6.dll, libwinpthread-1.dll, libgcc_s_seh-1.dll, liblz4.dll, libjpeg-8.dll, libpng16-16.dll, libtiff-6.dll, libwebp-7.dll, libwebpmux-3.dll, libopenjp2-7.dll, zlib1.dll The whole {app} tree from the installer is ~120 MB; we copy just the .exe + .dll files (~50 MB) since the runtime only needs the binary and its direct deps. """ # UB-Mannheim posts builds under a versioned filename; the exact # build revision changes (5.5.0.20241111 at time of writing). # We pin a specific rev so reproducible builds don't drift. rev = "20241111" # patch rev for tesseract 5.5.0 on the UB-Mannheim mirror fname = f"tesseract-ocr-w64-setup-{TESSERACT_VERSION}.{rev}.exe" url = f"https://digi.bib.uni-mannheim.de/tesseract/{fname}" cache = TESSERACT_STAGING / fname if not cache.exists(): _download(url, cache, expected_min_bytes=20 * 1024 * 1024) # 7-Zip is preinstalled on windows-latest runners; on a dev box # the user installs it (choco install 7zip) or substitutes # innoextract. Locate it. sevenz = ( shutil.which("7z") or shutil.which("7z.exe") or r"C:\Program Files\7-Zip\7z.exe" ) if not Path(sevenz).exists() and not shutil.which("7z"): _err( "7-Zip not found. On Windows CI runners it's preinstalled; " "on a dev box install via ``choco install 7zip`` or extract " f"{cache} manually into {staging}/ and re-run with " "TESSERACT_SKIP_FETCH=1." ) raise FileNotFoundError("7z") extract = TESSERACT_STAGING / "win_extract" if extract.exists(): shutil.rmtree(extract) extract.mkdir(parents=True) _run([str(sevenz), "x", "-y", f"-o{extract}", str(cache)]) staging.mkdir(parents=True, exist_ok=True) # The Inno Setup payload lands under ``{app}/`` inside the # extraction. Recursively grab tesseract.exe + DLLs. found_exe = False for root, _dirs, files in os.walk(extract): for f in files: src = Path(root) / f if f.lower() == "tesseract.exe": shutil.copy2(src, staging / "tesseract.exe") found_exe = True elif f.lower().endswith(".dll"): shutil.copy2(src, staging / f) if not found_exe: raise RuntimeError( f"tesseract.exe not found inside extracted installer at {extract}" ) _ok(f"staged Windows tesseract into {staging.relative_to(REPO)}") def _fetch_tesseract_macos(staging: Path) -> None: """Stage tesseract + dylibs into *staging* on macOS. Strategy: use Homebrew. ``brew install tesseract`` is the sanctioned macOS path and the binary it installs is the same one every guide on the internet points at. We copy the binary + every dylib it links against into the staging dir, then run ``install_name_tool`` to rewrite the load paths so the binary works after relocation into the .app bundle. Caveat: ``brew`` must be on PATH (it is on ``macos-latest`` runners). If it isn't, we surface a helpful error rather than fail mysteriously. """ if not shutil.which("brew"): _err( "Homebrew not found. On macos-latest GitHub runners it's " "preinstalled; on a dev Mac install from https://brew.sh and " "re-run. Alternatively pre-stage tesseract into " f"{staging}/ and set TESSERACT_SKIP_FETCH=1." ) raise FileNotFoundError("brew") # ``brew install`` is idempotent — fine to run on every build. We # don't pin the version through brew because brew tracks its own # taps; instead we assert the version matches TESSERACT_VERSION # after install. _run(["brew", "install", "tesseract"]) # Find the binary brew just installed. tess_path = shutil.which("tesseract") if not tess_path: raise RuntimeError("brew install tesseract succeeded but tesseract not on PATH") staging.mkdir(parents=True, exist_ok=True) shutil.copy2(tess_path, staging / "tesseract") # Copy every non-system dylib the binary links against. The # ``otool -L`` output lists absolute paths under /opt/homebrew/ # (Apple Silicon) or /usr/local/ (Intel). We skip /usr/lib/* and # /System/* (Apple-shipped, present on every Mac). try: otool = subprocess.run( ["otool", "-L", str(staging / "tesseract")], check=True, capture_output=True, text=True, ) except subprocess.CalledProcessError as e: raise RuntimeError(f"otool failed: {e.stderr}") from e deps = [] for line in otool.stdout.splitlines()[1:]: path = line.strip().split(" ", 1)[0] if path.startswith(("/opt/homebrew/", "/usr/local/")): deps.append(path) # Copy each dep and its transitive deps. One level of recursion # is usually enough for the tesseract dep tree (libtesseract → # libleptonica → libpng/libjpeg/libtiff/libwebp). copied: set[str] = set() def _copy_with_deps(libpath: str) -> None: if libpath in copied or not Path(libpath).exists(): return copied.add(libpath) dest = staging / Path(libpath).name shutil.copy2(libpath, dest) # Rewrite the dest's own load path to @loader_path so the # bundle is relocatable. try: subprocess.run( ["install_name_tool", "-id", f"@loader_path/{Path(libpath).name}", str(dest)], check=True, capture_output=True, ) except subprocess.CalledProcessError: # Not fatal — install_name_tool refuses on already-relative # IDs. The dyld loader will still find them via # @loader_path rewrites on the consumer side. pass # Walk this lib's own deps. try: sub = subprocess.run( ["otool", "-L", libpath], check=True, capture_output=True, text=True, ) for sub_line in sub.stdout.splitlines()[1:]: sub_path = sub_line.strip().split(" ", 1)[0] if sub_path.startswith(("/opt/homebrew/", "/usr/local/")): _copy_with_deps(sub_path) except subprocess.CalledProcessError: pass for dep in deps: _copy_with_deps(dep) # Rewrite the tesseract binary's references to point at # @loader_path/ so it can find its deps inside the bundle. bin_path = staging / "tesseract" for dep in deps: try: subprocess.run( ["install_name_tool", "-change", dep, f"@loader_path/{Path(dep).name}", str(bin_path)], check=True, capture_output=True, ) except subprocess.CalledProcessError: pass _ok(f"staged macOS tesseract + {len(copied)} dylibs into {staging.relative_to(REPO)}") def _fetch_tesseract_linux(staging: Path) -> None: """Stage tesseract + .so files into *staging* on Linux. Strategy: ``apt-get install tesseract-ocr libtesseract5`` (preinstalled on most ubuntu-latest images; we run install anyway because the package is idempotent). Then copy the binary + every .so it links against into staging. ``patchelf`` rewrites RPATH so the bundle is relocatable. """ if not shutil.which("apt-get") and not shutil.which("tesseract"): _err( "Neither apt-get nor a pre-installed tesseract found. On " "ubuntu-latest runners both are present. On other distros " "install tesseract-ocr via your package manager and re-run " "with TESSERACT_SKIP_FETCH=1 after pre-staging the binary." ) raise FileNotFoundError("tesseract") if shutil.which("apt-get") and not shutil.which("tesseract"): _run(["sudo", "apt-get", "update"]) _run(["sudo", "apt-get", "install", "-y", "tesseract-ocr", "libtesseract5"]) tess_path = shutil.which("tesseract") if not tess_path: raise RuntimeError("apt-get install succeeded but tesseract not on PATH") staging.mkdir(parents=True, exist_ok=True) shutil.copy2(tess_path, staging / "tesseract") # Collect .so dependencies via ldd. Skip the dynamic linker and # libc/libpthread/libdl/libm/libstdc++/libgcc_s — those are # guaranteed to exist on every Linux target and shipping them can # cause GLIBC mismatch errors on older distros. The interesting # tesseract-specific deps are libtesseract, libleptonica, and the # image format libs (libpng, libjpeg, libtiff, libwebp, libgif). SKIP_PREFIXES = ( "linux-vdso", "/lib64/ld-linux", "/lib/ld-linux", "libc.so", "libdl.so", "libpthread.so", "libm.so", "librt.so", "libnsl.so", "libutil.so", ) try: ldd = subprocess.run( ["ldd", str(staging / "tesseract")], check=True, capture_output=True, text=True, ) except subprocess.CalledProcessError as e: raise RuntimeError(f"ldd failed: {e.stderr}") from e copied = 0 for line in ldd.stdout.splitlines(): # Format: " libfoo.so.N => /path/to/libfoo.so.N (0x...)" parts = line.split("=>") if len(parts) != 2: continue soname = parts[0].strip() if soname.startswith(SKIP_PREFIXES): continue path_part = parts[1].strip().split(" ", 1)[0] if not path_part or not Path(path_part).exists(): continue shutil.copy2(path_part, staging / Path(path_part).name) copied += 1 # patchelf is optional — if present, rewrite RPATH to $ORIGIN so # the binary finds its bundled .so files. If absent, the # PyInstaller LD_LIBRARY_PATH that the launcher sets will cover # it (we already chdir into _MEIPASS for the runtime). if shutil.which("patchelf"): try: _run(["patchelf", "--set-rpath", "$ORIGIN", str(staging / "tesseract")]) except SystemExit: _warn("patchelf rpath rewrite failed — relying on LD_LIBRARY_PATH at runtime") _ok(f"staged Linux tesseract + {copied} .so files into {staging.relative_to(REPO)}") def fetch_tesseract_for_platform(target: str) -> Path: """Stage the per-platform Tesseract binary + libs into ``build/_tesseract//``. Returns the staging dir path. The PyInstaller spec adds this dir (plus tessdata) to its ``datas=`` so the bundle ends up with everything under ``/tesseract/`` where the runtime discovery code expects it. Honours ``TESSERACT_SKIP_FETCH=1`` — set this when you've pre-staged the binary by hand (offline build, behind a proxy, custom build of tesseract, etc.). The script still verifies the binary is present and surfaces a helpful error if not. """ _step(f"fetch tesseract binary ({target})") staging = TESSERACT_STAGING / target exe_name = "tesseract.exe" if target == "win" else "tesseract" exe_path = staging / exe_name if os.environ.get("TESSERACT_SKIP_FETCH") == "1": if not exe_path.exists(): _err( f"TESSERACT_SKIP_FETCH=1 but {exe_path} is missing. " "Pre-stage the binary + its libs into that dir, then re-run." ) sys.exit(1) _ok(f"skipping fetch (TESSERACT_SKIP_FETCH=1); using {exe_path.relative_to(REPO)}") return staging if exe_path.exists(): _ok(f"already staged: {exe_path.relative_to(REPO)}") return staging if target == "win": _fetch_tesseract_windows(staging) elif target == "mac": _fetch_tesseract_macos(staging) elif target == "linux": _fetch_tesseract_linux(staging) else: _err(f"unknown target {target!r} for tesseract fetch") sys.exit(2) if not exe_path.exists(): _err( f"fetch step finished but {exe_path.relative_to(REPO)} is missing. " "Inspect the logs above; you may need to pre-stage the binary manually." ) sys.exit(1) return staging # --------------------------------------------------------------------------- # Build steps # --------------------------------------------------------------------------- def step_generate_icons() -> None: _step("generate icons") _run([sys.executable, str(BUILD / "generate_icons.py")]) def step_pyinstaller(clean: bool, *, target: str | None = None) -> None: _step("pyinstaller bundle") # Use ``python -m PyInstaller`` so we don't depend on the binary # being on PATH (Windows users frequently see this — pip's # Scripts/ dir isn't auto-added). cmd = [sys.executable, "-m", "PyInstaller", str(BUILD / "datatools.spec"), "--noconfirm"] if clean: cmd.append("--clean") # The spec reads ``DATATOOLS_TESS_STAGING`` to find the per-platform # tesseract staging dir. Passing it via env keeps the spec file # platform-agnostic — the spec doesn't need to detect win/mac/linux # itself; the orchestrator already did. env = os.environ.copy() if target: env["DATATOOLS_TESS_STAGING"] = str(TESSERACT_STAGING / target) _run(cmd, env=env) def step_package_win(version: str, do_installer: bool, do_portable: bool) -> list[Path]: out: list[Path] = [] if do_installer: _step("Windows installer (Inno Setup)") _run(["iscc", f"/DAppVersion={version}", str(BUILD / "installer.iss")]) out.append(DIST / f"DataTools-{version}-win-setup.exe") if do_portable: _step("Windows portable .zip") _run([sys.executable, str(BUILD / "build_portable_zip.py"), "win", version]) out.append(DIST / f"DataTools-{version}-win-portable.zip") return out def step_package_mac(version: str, do_installer: bool, do_portable: bool) -> list[Path]: out: list[Path] = [] if do_installer: _step("macOS DMG (installer)") _run(["bash", str(BUILD / "macos" / "build_dmg.sh"), version]) out.append(DIST / f"DataTools-{version}-mac.dmg") if do_portable: _step("macOS portable .zip") _run(["bash", str(BUILD / "macos" / "build_zip.sh"), version]) out.append(DIST / f"DataTools-{version}-mac-portable.zip") return out def step_package_linux(version: str, do_installer: bool, do_portable: bool) -> list[Path]: # On Linux the AppImage IS the portable. We ignore the two flags # and always produce the single file — splitting wouldn't add # value. if not (do_installer or do_portable): return [] _step("Linux AppImage") _run(["bash", str(BUILD / "appimage" / "build.sh"), version]) return [DIST / f"DataTools-{version}-linux-x86_64.AppImage"] # --------------------------------------------------------------------------- # Orchestration # --------------------------------------------------------------------------- def _summarise(outputs: list[Path]) -> None: _step("done — outputs") if not outputs: _warn("no files produced (everything skipped via flags)") return for p in outputs: if p.exists(): size_mb = p.stat().st_size / (1024 * 1024) print(f" {p.relative_to(REPO)} ({size_mb:.1f} MB)") else: _warn(f"expected output missing: {p.relative_to(REPO)}") def main() -> int: parser = argparse.ArgumentParser( prog="make_release.py", description=( "Build the installer + portable zip for the current OS. " "Cross-compilation isn't supported by PyInstaller — run " "this once per platform you want to target." ), formatter_class=argparse.RawDescriptionHelpFormatter, ) parser.add_argument( "--platform", choices=("auto", "win", "mac", "linux"), default="auto", help="Override OS detection (mostly for testing). Default: auto.", ) parser.add_argument( "--preflight", action="store_true", help="Check tooling and exit without building.", ) parser.add_argument( "--clean", action="store_true", help="Wipe dist/ before building.", ) parser.add_argument( "--skip-installer", action="store_true", help="Don't build the OS installer (.exe / .dmg).", ) parser.add_argument( "--skip-portable", action="store_true", help="Don't build the portable .zip.", ) args = parser.parse_args() target = _detect_platform() if args.platform == "auto" else args.platform version = _read_version() do_installer = not args.skip_installer do_portable = not args.skip_portable print(f"DataTools release builder") print(f" target: {target} (host: {platform.platform()})") print(f" version: {version}") print(f" installer: {'yes' if do_installer else 'no'}") print(f" portable: {'yes' if do_portable else 'no'}") print(f" dist dir: {DIST}") if target != _detect_platform(): _warn( f"--platform {target} but host is {_detect_platform()}. " "PyInstaller can't cross-compile — the bundle will be for " "the HOST, only the packaging step will follow your override. " "Useful only for testing the packager paths." ) preflight(target) if args.preflight: return 0 if args.clean and DIST.exists(): _step(f"cleaning {DIST}") shutil.rmtree(DIST) step_generate_icons() # Stage Tesseract OCR before PyInstaller runs. The spec reads # ``build/_tesseract//`` + ``build/vendor/tessdata/`` and # bundles them under ``/tesseract/`` so the runtime # discovery in src/pdf_extract.py finds them at: # Path(sys._MEIPASS) / "tesseract" / "tesseract[.exe]" # Path(sys._MEIPASS) / "tesseract" / "tessdata" / "eng.traineddata" fetch_tessdata() fetch_tesseract_for_platform(target) step_pyinstaller(clean=args.clean, target=target) if target == "win": outputs = step_package_win(version, do_installer, do_portable) elif target == "mac": outputs = step_package_mac(version, do_installer, do_portable) else: outputs = step_package_linux(version, do_installer, do_portable) _summarise(outputs) return 0 if __name__ == "__main__": sys.exit(main())