diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3a421ae..61e1ee8 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,18 +1,17 @@ name: Build installers # Triggers: -# * Tag push (v*) → produces installers + portable zips, attaches them -# to a GitHub Release. -# * Manual dispatch → uploads everything as workflow artifacts only. +# * Tag push (v*) → produces installers, attaches them to a GitHub Release. +# * Manual dispatch → uploads the installers as workflow artifacts only. # # Outputs per platform (downloadable by buyers): -# * macOS: .dmg installer + portable .zip (signed .app inside). -# * Windows: .exe installer + portable .zip (no-install). -# * Linux: .AppImage (already portable; no separate zip). +# * macOS: .dmg installer +# * Windows: .exe installer +# * Linux: .AppImage (already portable; no separate installer step) # # Self-contained: every artifact ships its own Python interpreter + every -# runtime dep through PyInstaller. No pre/post install steps on the -# buyer's machine. +# runtime dep (including bundled Tesseract OCR) through PyInstaller. No +# pre/post install steps on the buyer's machine. # # What this workflow doesn't do (yet): # * Code signing (Mac Developer ID, Windows code-signing cert). @@ -40,16 +39,16 @@ jobs: include: - os: macos-latest platform: mac - installer_glob: dist/DataTools-*-mac.dmg - portable_glob: dist/DataTools-*-mac-portable.zip + artifact_name: DataTools-mac.dmg + artifact_path: dist/DataTools-*-mac.dmg - os: windows-latest platform: win - installer_glob: dist/DataTools-*-win-setup.exe - portable_glob: dist/DataTools-*-win-portable.zip + artifact_name: DataTools-win.exe + artifact_path: dist/DataTools-*-win-setup.exe - os: ubuntu-latest platform: linux - installer_glob: dist/DataTools-*-linux-x86_64.AppImage - portable_glob: '' # AppImage is already a portable single file + artifact_name: DataTools-linux.AppImage + artifact_path: dist/DataTools-*-linux-x86_64.AppImage runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v4 @@ -66,7 +65,7 @@ jobs: pip install pyinstaller pillow # ---- Tesseract bundling cache -------------------------------- - # The fetch logic inside build/make_release.py downloads: + # The fetch logic inside build/tesseract.py downloads: # * build/vendor/tessdata/eng.traineddata (~16 MB, shared) # * build/_tesseract// (binary + libs, 30-120 MB) # Cache both so iterative CI runs don't re-download. The @@ -80,9 +79,9 @@ jobs: build/vendor/tessdata key: tesseract-${{ runner.os }}-5.5.0-tessdata_best-v1 - # ---- Linux: install patchelf so make_release.py can rewrite + # ---- Linux: install patchelf so tesseract.py can rewrite # RPATH on the bundled tesseract binary. apt-get install - # tesseract-ocr is handled inside make_release.py itself. ----- + # tesseract-ocr is handled inside tesseract.py itself. -------- - name: Install Linux build prereqs for Tesseract bundling if: matrix.os == 'ubuntu-latest' run: | @@ -99,9 +98,9 @@ jobs: - name: Generate platform icons run: python build/generate_icons.py - # Stage Tesseract before PyInstaller. The make_release.py - # helpers handle the per-platform fetch (UB-Mannheim on Win, - # brew on Mac, apt on Linux) and stage the binary + libs into + # Stage Tesseract before PyInstaller. The tesseract.py helpers + # handle the per-platform fetch (UB-Mannheim on Win, brew on + # Mac, apt on Linux) and stage the binary + libs into # build/_tesseract// where the spec picks them up. # We invoke a tiny inline Python so the workflow doesn't have # to know the per-platform target string. @@ -113,7 +112,7 @@ jobs: python - <<'PY' import os, sys sys.path.insert(0, "build") - from make_release import fetch_tessdata, fetch_tesseract_for_platform + from tesseract import fetch_tessdata, fetch_tesseract_for_platform target = os.environ["DATATOOLS_PLATFORM"] fetch_tessdata() fetch_tesseract_for_platform(target) @@ -133,10 +132,6 @@ jobs: if: matrix.os == 'macos-latest' run: bash build/macos/build_dmg.sh "${{ steps.version.outputs.version }}" - - name: Package macOS portable .zip - if: matrix.os == 'macos-latest' - run: bash build/macos/build_zip.sh "${{ steps.version.outputs.version }}" - - name: Install Inno Setup (Windows) if: matrix.os == 'windows-latest' run: choco install innosetup --no-progress -y @@ -147,10 +142,6 @@ jobs: run: | iscc /DAppVersion=${{ steps.version.outputs.version }} build\installer.iss - - name: Package Windows portable .zip - if: matrix.os == 'windows-latest' - run: python build/build_portable_zip.py win ${{ steps.version.outputs.version }} - - name: Install AppImage tooling (Linux) if: matrix.os == 'ubuntu-latest' run: | @@ -168,29 +159,14 @@ jobs: - name: Upload installer artifact uses: actions/upload-artifact@v4 with: - name: DataTools-${{ matrix.platform }}-installer - path: ${{ matrix.installer_glob }} + name: ${{ matrix.artifact_name }} + path: ${{ matrix.artifact_path }} if-no-files-found: error - - name: Upload portable artifact - if: matrix.portable_glob != '' - uses: actions/upload-artifact@v4 - with: - name: DataTools-${{ matrix.platform }}-portable - path: ${{ matrix.portable_glob }} - if-no-files-found: error - - - name: Attach installer to Release (tag push only) + - name: Attach to Release (tag push only) if: startsWith(github.ref, 'refs/tags/v') uses: softprops/action-gh-release@v2 with: - files: ${{ matrix.installer_glob }} + files: ${{ matrix.artifact_path }} fail_on_unmatched_files: true generate_release_notes: true - - - name: Attach portable to Release (tag push only) - if: startsWith(github.ref, 'refs/tags/v') && matrix.portable_glob != '' - uses: softprops/action-gh-release@v2 - with: - files: ${{ matrix.portable_glob }} - fail_on_unmatched_files: true diff --git a/README.es.md b/README.es.md index 829a57b..33e7f6d 100644 --- a/README.es.md +++ b/README.es.md @@ -22,15 +22,15 @@ Cada página de herramienta incluye una ventana emergente de **Help** (a la dere ## Descarga (usuarios no técnicos) -Paquetes precompilados — sin instalar Python, sin permisos de administrador, sin internet en ejecución. Cada versión ofrece dos formatos por sistema operativo: un **instalador** que crea accesos directos en el escritorio + menú Inicio / Launchpad, y un **.zip portable** que descomprimes y haces doble clic. Elige el que te permita tu política de TI. +Paquetes precompilados — sin instalar Python, sin permisos de administrador, sin internet en ejecución. Cada versión ofrece un **instalador** por sistema operativo que crea accesos directos en el escritorio + menú Inicio / Launchpad. -| Plataforma | Instalador (recomendado) | Portable (sin instalar) | -|---|---|---| -| **macOS** | `DataTools-X.Y.Z-mac.dmg` — ábrelo, arrastra DataTools.app a /Applications, ejecútalo desde Launchpad. | `DataTools-X.Y.Z-mac-portable.zip` — descomprime donde quieras, doble clic en `DataTools.app`. | -| **Windows** | `DataTools-X.Y.Z-win-setup.exe` — ejecuta el instalador (por usuario, sin admin). Crea acceso directo en el escritorio + entrada en el menú Inicio. | `DataTools-X.Y.Z-win-portable.zip` — descomprime donde quieras, doble clic en `DataTools.exe`. | -| **Linux** | `DataTools-X.Y.Z-linux-x86_64.AppImage` — `chmod +x` y doble clic. | El AppImage ya es portable. | +| Plataforma | Instalador | +|---|---| +| **macOS** | `DataTools-X.Y.Z-mac.dmg` — ábrelo, arrastra DataTools.app a /Applications, ejecútalo desde Launchpad. | +| **Windows** | `DataTools-X.Y.Z-win-setup.exe` — ejecuta el instalador (por usuario, sin admin). Crea acceso directo en el escritorio + entrada en el menú Inicio. | +| **Linux** | `DataTools-X.Y.Z-linux-x86_64.AppImage` — `chmod +x` y doble clic. El AppImage ya es portable. | -Última versión: consulta [GitHub Releases](https://git.invixiom.com/giteadmin/datatools-dev/releases) (o el listado de Gumroad). Cada paquete ocupa ~300 MB descomprimido; al primer arranque la app levanta un servidor local en http://127.0.0.1:8501 y abre tu navegador predeterminado. Nada sale de tu equipo — instalador y portable son idénticos por dentro. +Última versión: consulta [GitHub Releases](https://git.invixiom.com/giteadmin/datatools-dev/releases) (o el listado de Gumroad). Cada paquete ocupa ~300 MB descomprimido; al primer arranque la app levanta un servidor local en http://127.0.0.1:8501 y abre tu navegador predeterminado. Nada sale de tu equipo. **Tesseract OCR viene incluido.** El soporte para PDFs escaneados del Extractor de PDF funciona sin configuración adicional en las tres plataformas — no hace falta instalar Tesseract por separado. Atribución de licencia: ver [`LICENSE_TESSERACT.txt`](LICENSE_TESSERACT.txt). diff --git a/README.md b/README.md index 44c95b9..7c23863 100644 --- a/README.md +++ b/README.md @@ -22,15 +22,15 @@ Every tool page has an in-tool **Help** popover (right of the title) with a comp ## Download (non-technical users) -Pre-built bundles — no Python install, no admin rights, no internet at runtime. Each release ships two flavors per OS: an **installer** that wires up Desktop + Start Menu / Launchpad shortcuts, and a **portable .zip** you unzip and double-click. Pick whichever your IT policy allows. +Pre-built bundles — no Python install, no admin rights, no internet at runtime. Each release ships an **installer** per OS that wires up Desktop + Start Menu / Launchpad shortcuts. -| Platform | Installer (recommended) | Portable (no install) | -|---|---|---| -| **macOS** | `DataTools-X.Y.Z-mac.dmg` — open, drag DataTools.app into /Applications, launch from Launchpad. | `DataTools-X.Y.Z-mac-portable.zip` — unzip anywhere, double-click `DataTools.app`. | -| **Windows** | `DataTools-X.Y.Z-win-setup.exe` — run installer (per-user, no admin). Desktop shortcut + Start Menu entry created. | `DataTools-X.Y.Z-win-portable.zip` — unzip anywhere, double-click `DataTools.exe`. | -| **Linux** | `DataTools-X.Y.Z-linux-x86_64.AppImage` — `chmod +x`, double-click. | The AppImage is already portable. | +| Platform | Installer | +|---|---| +| **macOS** | `DataTools-X.Y.Z-mac.dmg` — open, drag DataTools.app into /Applications, launch from Launchpad. | +| **Windows** | `DataTools-X.Y.Z-win-setup.exe` — run installer (per-user, no admin). Desktop shortcut + Start Menu entry created. | +| **Linux** | `DataTools-X.Y.Z-linux-x86_64.AppImage` — `chmod +x`, double-click. The AppImage is already portable. | -Latest release: see [GitHub Releases](https://git.invixiom.com/giteadmin/datatools-dev/releases) (or the Gumroad listing). Each bundle is ~300 MB unpacked; on first launch the app starts a local server at http://127.0.0.1:8501 and opens your default browser. Nothing leaves your machine — installers and portables are byte-identical inside. +Latest release: see [GitHub Releases](https://git.invixiom.com/giteadmin/datatools-dev/releases) (or the Gumroad listing). Each bundle is ~300 MB unpacked; on first launch the app starts a local server at http://127.0.0.1:8501 and opens your default browser. Nothing leaves your machine. **Tesseract OCR is bundled.** Scanned-PDF support in the PDF Extractor works out of the box on all three platforms — no separate Tesseract install required. License attribution: see [`LICENSE_TESSERACT.txt`](LICENSE_TESSERACT.txt). diff --git a/build/README.md b/build/README.md index fe31ec6..314dfcc 100644 --- a/build/README.md +++ b/build/README.md @@ -23,14 +23,12 @@ build/ ├── generate_icons.py Builds icon.ico / icon.icns / icon.png from │ src/gui/assets/datatools_icon_256.png. Run │ once before pyinstaller (CI does this). -├── build_portable_zip.py Cross-platform: zips dist/DataTools/ into a -│ no-install portable download. Used by the -│ Windows + Linux portable artifacts. +├── tesseract.py Fetches the per-platform Tesseract binary + +│ eng.traineddata at build time. CI imports +│ fetch_tessdata + fetch_tesseract_for_platform. ├── macos/ -│ ├── build_dmg.sh Wraps dist/DataTools.app into a .dmg with a -│ │ drag-to-/Applications layout (installer). -│ └── build_zip.sh Wraps dist/DataTools.app into a portable -│ .zip via ditto (preserves bundle metadata). +│ └── build_dmg.sh Wraps dist/DataTools.app into a .dmg with a +│ drag-to-/Applications layout (installer). ├── appimage/ │ ├── AppRun Entry point invoked when the AppImage runs. │ ├── datatools.desktop Linux desktop-entry metadata. @@ -43,17 +41,15 @@ build/ ## Distribution outputs per platform -Each CI run produces two downloads per platform — an installer for -buyers who want shortcuts wired automatically, and a portable .zip -for buyers (or IT-locked-down machines) that can't run installers: +Each CI run produces one installer per platform: -| Platform | Installer | Portable | -|----------|----------------------------------------|------------------------------------------------| -| macOS | `DataTools--mac.dmg` | `DataTools--mac-portable.zip` (ditto .app)| -| Windows | `DataTools--win-setup.exe` | `DataTools--win-portable.zip` | -| Linux | `DataTools--linux-x86_64.AppImage`| (the AppImage IS the portable) | +| Platform | Installer | +|----------|----------------------------------------| +| macOS | `DataTools--mac.dmg` | +| Windows | `DataTools--win-setup.exe` | +| Linux | `DataTools--linux-x86_64.AppImage` (already portable) | -All six outputs are self-contained: every dependency (Python, pandas, +All three outputs are self-contained: every dependency (Python, pandas, streamlit, pdfplumber, **Tesseract OCR + `eng.traineddata`**, the lot) is frozen into the bundle. The buyer does not need to install Python, pip, Tesseract, or anything else first. With Tesseract bundled, each @@ -76,47 +72,44 @@ the resulting installers to a GitHub Release. Manual ## Releasing -### Single-command local build (recommended for one-developer workflow) +### CI build (push tag → GitHub Release) — the release process -PyInstaller can't cross-compile, so a single machine produces one -platform's packages. Run this on each target OS: - -```bash -# One-time setup per machine: -pip install -r requirements.txt -pip install pyinstaller pillow -# Windows only: install Inno Setup from https://jrsoftware.org/isdl.php -# Linux only: drop appimagetool onto PATH (see preflight output) - -# Build everything for the current OS: -python build/make_release.py -``` - -Outputs land in `dist/`: -- Windows host → `DataTools--win-setup.exe` + `DataTools--win-portable.zip` -- macOS host → `DataTools--mac.dmg` + `DataTools--mac-portable.zip` -- Linux host → `DataTools--linux-x86_64.AppImage` - -Useful flags: - -```bash -python build/make_release.py --preflight # check tooling, build nothing -python build/make_release.py --clean # wipe dist/ first -python build/make_release.py --skip-installer # just the portable zip -python build/make_release.py --skip-portable # just the installer -``` - -### CI build (push tag → GitHub Release) - -If you have CI runners for all three OSes: +Releases are built by GitHub Actions (`.github/workflows/build.yml`), +not on a developer's machine. The matrix runs on +macos-latest / windows-latest / ubuntu-latest, stages Tesseract +(`build/tesseract.py`), runs PyInstaller, packages the per-platform +installer, and attaches it to a GitHub Release on tag push: 1. Bump `__version__` in `src/__init__.py`. 2. `git commit -am "release: vX.Y.Z" && git tag vX.Y.Z`. 3. `git push && git push --tags`. 4. CI builds all three platforms and creates a Release with the - installers + portable zips attached. + installers attached. 5. Mirror the Release assets to Gumroad (manual until v2). +A manual `workflow_dispatch` run does the same build but uploads the +installers as workflow artifacts instead of creating a Release — +useful for smoke-testing a build without cutting a tag. + +### Local build (single platform, for testing) + +PyInstaller can't cross-compile, so a local build produces only the +current OS's installer. This mirrors what CI does, by hand — use it to +debug the bundle before tagging. See the per-platform recipes below for +the exact commands; the short version is: + +```bash +pip install -r requirements.txt +pip install pyinstaller pillow +python build/generate_icons.py +python -c "import sys; sys.path.insert(0,'build'); \ + from tesseract import fetch_tessdata, fetch_tesseract_for_platform; \ + fetch_tessdata(); fetch_tesseract_for_platform('mac')" # win / mac / linux +pyinstaller build/datatools.spec --clean --noconfirm +# then run the matching packager: build/macos/build_dmg.sh, +# build/installer.iss (iscc), or build/appimage/build.sh +``` + ## Signing (Phase 2 — needs accounts/credentials) Both code-signing steps are intentionally not in CI yet because they @@ -321,17 +314,18 @@ The runtime resolver (in `src/`, owned by the runtime team) walks: (sourced from [tessdata_best](https://github.com/tesseract-ocr/tessdata_best)). `datatools.spec` copies it into `tesseract/tessdata/`. - **Binary** — fetched per-platform at build time by - `build/make_release.py` from pinned upstream URLs. Current pin: - **Tesseract 5.5.0**. + `build/tesseract.py` from pinned upstream URLs. Current pin: + **Tesseract 5.5.0**. CI imports `fetch_tessdata` + + `fetch_tesseract_for_platform` from this module before PyInstaller. **Updating Tesseract**: 1. Bump the version pin and the per-platform fetch URLs in - `build/make_release.py`. + `build/tesseract.py`. 2. If the model schema changed upstream, refresh `build/vendor/tessdata/eng.traineddata` from `tessdata_best` at the matching tag. -3. Rebuild on each platform (`python build/make_release.py`) and +3. Push a `v*` tag so CI rebuilds all three platforms, then smoke-test a scanned PDF through the PDF Extractor. 4. Update `LICENSE_TESSERACT.txt` at the repo root if upstream license terms change (Apache-2.0 today). diff --git a/build/build_portable_zip.py b/build/build_portable_zip.py deleted file mode 100644 index 3f324c0..0000000 --- a/build/build_portable_zip.py +++ /dev/null @@ -1,69 +0,0 @@ -"""Wrap the PyInstaller folder build into a portable .zip. - -Self-contained download: unzip → double-click the launcher → app runs. -No installer, no Python install, no admin rights required. - -Usage: - python build/build_portable_zip.py - -Where ``platform`` is one of ``win`` / ``mac`` / ``linux``. The -script just produces a generic ``dist/DataTools/`` zip; on macOS the -preferred portable format is the ``ditto``-wrapped .app — see -``build/macos/build_zip.sh`` for that flow. This helper exists mainly -for Windows + Linux, where there's no .app bundle to wrap. - -Output: - dist/DataTools---portable.zip - -The zip root is the ``DataTools/`` folder so an unzip produces a -self-contained dir the user can drop anywhere (Desktop, USB stick, -network share). On Windows, the launcher is ``DataTools.exe`` inside -that folder; on Linux, ``DataTools``. -""" - -from __future__ import annotations - -import shutil -import sys -from pathlib import Path - -REPO = Path(__file__).resolve().parent.parent -DIST_DIR = REPO / "dist" -BUNDLE_DIR = DIST_DIR / "DataTools" - - -def main() -> int: - if len(sys.argv) < 3: - sys.stderr.write( - "usage: python build/build_portable_zip.py \n" - ) - return 2 - platform = sys.argv[1] - version = sys.argv[2] - - if not BUNDLE_DIR.is_dir(): - sys.stderr.write( - f"Bundle dir not found at {BUNDLE_DIR}.\n" - "Run ``pyinstaller build/datatools.spec --clean --noconfirm`` first.\n" - ) - return 1 - - out_stem = DIST_DIR / f"DataTools-{version}-{platform}-portable" - # ``make_archive`` takes a base name (no extension) and produces - # ``.zip``. ``root_dir`` = parent of what we want compressed, - # ``base_dir`` = the folder name inside the archive root. This - # combo yields a single top-level ``DataTools/`` directory inside - # the .zip rather than dumping its contents loose. - archive = shutil.make_archive( - base_name=str(out_stem), - format="zip", - root_dir=str(DIST_DIR), - base_dir="DataTools", - ) - size_mb = Path(archive).stat().st_size / (1024 * 1024) - print(f"wrote {archive} ({size_mb:.1f} MB)") - return 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/build/datatools.spec b/build/datatools.spec index b55dbbb..5a2401e 100644 --- a/build/datatools.spec +++ b/build/datatools.spec @@ -105,7 +105,7 @@ datas += [ ] # ----- Tesseract OCR bundle ---------------------------------------- -# ``build/make_release.py`` stages the per-platform Tesseract binary +# ``build/tesseract.py`` stages the per-platform Tesseract binary # + its runtime libs (DLLs/dylibs/sos) into # ``build/_tesseract//`` and the shared eng.traineddata into # ``build/vendor/tessdata/``. We add both to ``datas`` so PyInstaller @@ -119,16 +119,16 @@ datas += [ # from ``Path(sys._MEIPASS) / "tesseract" / ...``. Keep the two ends # in sync — if you rename "tesseract" here, update pdf_extract.py too. # -# The orchestrator (make_release.py) sets DATATOOLS_TESS_STAGING to -# the right per-platform dir before invoking PyInstaller. For ad-hoc -# `pyinstaller build/datatools.spec` runs without the orchestrator, -# fall back to the canonical staging path. +# CI (.github/workflows/build.yml) sets DATATOOLS_TESS_STAGING to the +# right per-platform dir before invoking PyInstaller. For ad-hoc +# `pyinstaller build/datatools.spec` runs without that env var, fall +# back to the canonical staging path. _tess_staging_env = os.environ.get("DATATOOLS_TESS_STAGING") if _tess_staging_env: _tess_staging = Path(_tess_staging_env) else: # Pick the obvious per-host staging dir as a fallback so spec-only - # builds (without the orchestrator) still work in dev. + # builds (without the CI env var) still work in dev. import sys as _sys_for_target _target_guess = ( "win" if _sys_for_target.platform.startswith("win") @@ -149,8 +149,8 @@ else: # though, since the OCR feature will silently fail at runtime. print( f"WARNING: {_tess_staging} is empty or missing — OCR will be " - "disabled in the bundle. Run build/make_release.py (which " - "calls fetch_tesseract_for_platform) before pyinstaller, or " + "disabled in the bundle. Run build/tesseract.py's " + "fetch_tesseract_for_platform before pyinstaller, or " "pre-stage the binary manually." ) @@ -159,8 +159,8 @@ if (_tessdata / "eng.traineddata").exists(): else: print( f"WARNING: {_tessdata}/eng.traineddata is missing — OCR will " - "have no language data at runtime. Run build/make_release.py " - "or fetch manually per build/vendor/README.md." + "have no language data at runtime. Run build/tesseract.py's " + "fetch_tessdata or fetch manually per build/vendor/README.md." ) # Bundle the Apache-2.0 LICENSE text alongside the binary. The docs diff --git a/build/macos/build_zip.sh b/build/macos/build_zip.sh deleted file mode 100755 index d979ca0..0000000 --- a/build/macos/build_zip.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env bash -# Wrap dist/DataTools.app into a no-install portable .zip. -# -# Usage: -# bash build/macos/build_zip.sh -# -# Why a portable .zip in addition to the .dmg: -# * Buyers who don't want an installer can unzip and double-click the -# .app directly — no drag-to-/Applications step, no installer -# chrome. Self-contained: the .app holds Python + every dep. -# * IT-locked-down machines often block .dmg auto-mount but allow -# .zip download + extraction. -# -# Run after ``pyinstaller build/datatools.spec --clean --noconfirm`` -# has produced ``dist/DataTools.app``. Output goes to -# ``dist/DataTools--mac-portable.zip``. -# -# Tesseract bundling: no-op here. The bundled Tesseract binary + -# dylibs + tessdata are already inside DataTools.app/Contents/Resources/tesseract/ -# (placed by PyInstaller's BUNDLE/datas mechanism). ``ditto -c -k`` -# preserves the whole .app tree. - -set -euo pipefail - -VERSION="${1:-0.0.0-dev}" -APP="dist/DataTools.app" -ZIP="dist/DataTools-${VERSION}-mac-portable.zip" - -if [[ ! -d "$APP" ]]; then - echo "Error: $APP not found. Run pyinstaller build/datatools.spec first." >&2 - exit 1 -fi - -# ``ditto`` preserves the .app bundle's extended attributes and -# resource forks (a plain ``zip`` strips them and can break code -# signatures + Info.plist resolution on the buyer's machine). -# -# --sequesterRsrc keeps the AppleDouble metadata inside the archive -# rather than as parallel ._ files on disk after extraction. -rm -f "$ZIP" -ditto -c -k --sequesterRsrc --keepParent "$APP" "$ZIP" - -echo "Built $ZIP ($(du -h "$ZIP" | cut -f1))" diff --git a/build/make_release.py b/build/tesseract.py similarity index 59% rename from build/make_release.py rename to build/tesseract.py index 23ff242..f8efbfe 100644 --- a/build/make_release.py +++ b/build/tesseract.py @@ -1,40 +1,23 @@ -"""Single-command release builder for DataTools. +"""Tesseract bundling helpers for the release build. -PyInstaller can't cross-compile — to produce a Windows .exe you run -this on Windows, for a Mac .dmg you run it on macOS, for a Linux -AppImage you run it on Linux. One script, one OS at a time. +PDF Extractor OCR ships a per-platform Tesseract binary plus the English +``eng.traineddata`` model inside the frozen PyInstaller bundle so scanned +PDFs work without a separate user install. These helpers fetch the binary +and tessdata at build time; the GitHub Actions workflow +(``.github/workflows/build.yml``) imports ``fetch_tessdata`` and +``fetch_tesseract_for_platform`` and runs them before PyInstaller. -What this script does (in order): - 1. Preflight — checks PyInstaller, Pillow, and the platform's - packager (Inno Setup on Win / hdiutil + ditto on Mac / - appimagetool on Linux) are reachable. Bails with install - instructions if anything is missing. - 2. Generates icon.ico / icon.icns / icon.png from the PNG asset. - 3. Runs PyInstaller against build/datatools.spec. - 4. Wraps the PyInstaller output into: - * Windows: DataTools--win-setup.exe (Inno Setup) - + DataTools--win-portable.zip - * macOS: DataTools--mac.dmg - + DataTools--mac-portable.zip - * Linux: DataTools--linux-x86_64.AppImage - 5. Prints what landed in dist/ and the byte sizes. - -Usage: - python build/make_release.py # build everything for this OS - python build/make_release.py --preflight # check tooling, don't build - python build/make_release.py --skip-installer # only the portable zip - python build/make_release.py --skip-portable # only the installer - python build/make_release.py --clean # wipe dist/ first - -Run from the repo root or from build/ — either works. +Everything is staged under ``build/_tesseract//`` (gitignored). +The PyInstaller spec (``build/datatools.spec``) reads that staging dir plus +``build/vendor/tessdata/`` and bundles them under ``/tesseract/``, +where the runtime discovery code in ``src/pdf_extract.py`` expects: + Path(sys._MEIPASS) / "tesseract" / "tesseract[.exe]" + Path(sys._MEIPASS) / "tesseract" / "tessdata" / "eng.traineddata" """ from __future__ import annotations -import argparse import os -import platform -import re import shutil import subprocess import sys @@ -43,7 +26,6 @@ from pathlib import Path REPO = Path(__file__).resolve().parent.parent BUILD = REPO / "build" -DIST = REPO / "dist" # Tesseract bundling. The runtime discovery code in # ``src/pdf_extract.py`` looks for the binary at @@ -95,119 +77,6 @@ def _run(cmd: list[str], cwd: Path | None = None, env: dict | None = None) -> No sys.exit(127) -# --------------------------------------------------------------------------- -# Platform detection -# --------------------------------------------------------------------------- - - -def _detect_platform() -> str: - """Return ``win`` / ``mac`` / ``linux`` based on sys.platform.""" - p = sys.platform - if p.startswith("win"): - return "win" - if p == "darwin": - return "mac" - if p.startswith("linux"): - return "linux" - _err(f"unsupported platform {p!r}; this script handles win/mac/linux only.") - sys.exit(2) - - -# --------------------------------------------------------------------------- -# Version — single source of truth in src/__init__.py -# --------------------------------------------------------------------------- - - -def _read_version() -> str: - init_py = (REPO / "src" / "__init__.py").read_text(encoding="utf-8") - m = re.search(r'__version__\s*=\s*["\']([^"\']+)["\']', init_py) - if not m: - _err("could not parse __version__ from src/__init__.py") - sys.exit(1) - return m.group(1) - - -# --------------------------------------------------------------------------- -# Preflight — check tooling before doing anything destructive -# --------------------------------------------------------------------------- - - -def _have_module(name: str) -> bool: - try: - __import__(name) - return True - except ImportError: - return False - - -def _have_command(name: str) -> bool: - return shutil.which(name) is not None - - -# Per-platform install hints. The error messages quote these so a buyer -# building from source isn't left guessing what to install next. -_INSTALL_HINTS = { - "pyinstaller": "pip install pyinstaller", - "pil": "pip install pillow", - "iscc": "Inno Setup (Windows): https://jrsoftware.org/isdl.php — install, then re-open the shell so iscc lands on PATH.", - "hdiutil": "ships with macOS — if it's missing your Mac install is broken.", - "ditto": "ships with macOS — if it's missing your Mac install is broken.", - "appimagetool": "Linux: download appimagetool-x86_64.AppImage from https://github.com/AppImage/AppImageKit/releases, chmod +x, drop on PATH.", -} - - -def preflight(target: str) -> None: - """Verify every tool the target build needs is reachable; exit if not.""" - _step(f"preflight ({target})") - - missing: list[tuple[str, str]] = [] - - # Python-side deps — same on every platform. The ``_INSTALL_HINTS`` - # lookup uses lowercase keys so module name capitalization doesn't - # need to match. - for mod in ("PyInstaller", "PIL"): - if not _have_module(mod): - hint = _INSTALL_HINTS.get(mod.lower(), f"pip install {mod}") - missing.append((mod.lower(), hint)) - else: - _ok(f"{mod} importable") - - # PyInstaller's CLI must also be reachable as a binary, not just as - # an importable module — the spec is invoked via the ``pyinstaller`` - # command. ``python -m PyInstaller`` is a fine fallback so don't - # hard-fail if only the CLI binary is missing. - if _have_command("pyinstaller"): - _ok("pyinstaller on PATH") - else: - _warn("pyinstaller binary not on PATH — will fall back to `python -m PyInstaller`") - - # Platform-specific packagers. - if target == "win": - if _have_command("iscc"): - _ok("Inno Setup (iscc) on PATH") - else: - missing.append(("iscc", _INSTALL_HINTS["iscc"])) - elif target == "mac": - for tool in ("hdiutil", "ditto"): - if _have_command(tool): - _ok(f"{tool} on PATH") - else: - missing.append((tool, _INSTALL_HINTS[tool])) - elif target == "linux": - if _have_command("appimagetool"): - _ok("appimagetool on PATH") - else: - missing.append(("appimagetool", _INSTALL_HINTS["appimagetool"])) - - if missing: - _err("missing prerequisites:") - for name, hint in missing: - print(f" - {name}: {hint}", file=sys.stderr) - sys.exit(1) - - _ok("all prerequisites present") - - # --------------------------------------------------------------------------- # Tesseract bundling — fetch the binary + tessdata at build time. # @@ -582,176 +451,3 @@ def fetch_tesseract_for_platform(target: str) -> Path: ) sys.exit(1) return staging - - -# --------------------------------------------------------------------------- -# Build steps -# --------------------------------------------------------------------------- - - -def step_generate_icons() -> None: - _step("generate icons") - _run([sys.executable, str(BUILD / "generate_icons.py")]) - - -def step_pyinstaller(clean: bool, *, target: str | None = None) -> None: - _step("pyinstaller bundle") - # Use ``python -m PyInstaller`` so we don't depend on the binary - # being on PATH (Windows users frequently see this — pip's - # Scripts/ dir isn't auto-added). - cmd = [sys.executable, "-m", "PyInstaller", - str(BUILD / "datatools.spec"), - "--noconfirm"] - if clean: - cmd.append("--clean") - # The spec reads ``DATATOOLS_TESS_STAGING`` to find the per-platform - # tesseract staging dir. Passing it via env keeps the spec file - # platform-agnostic — the spec doesn't need to detect win/mac/linux - # itself; the orchestrator already did. - env = os.environ.copy() - if target: - env["DATATOOLS_TESS_STAGING"] = str(TESSERACT_STAGING / target) - _run(cmd, env=env) - - -def step_package_win(version: str, do_installer: bool, do_portable: bool) -> list[Path]: - out: list[Path] = [] - if do_installer: - _step("Windows installer (Inno Setup)") - _run(["iscc", f"/DAppVersion={version}", str(BUILD / "installer.iss")]) - out.append(DIST / f"DataTools-{version}-win-setup.exe") - if do_portable: - _step("Windows portable .zip") - _run([sys.executable, str(BUILD / "build_portable_zip.py"), "win", version]) - out.append(DIST / f"DataTools-{version}-win-portable.zip") - return out - - -def step_package_mac(version: str, do_installer: bool, do_portable: bool) -> list[Path]: - out: list[Path] = [] - if do_installer: - _step("macOS DMG (installer)") - _run(["bash", str(BUILD / "macos" / "build_dmg.sh"), version]) - out.append(DIST / f"DataTools-{version}-mac.dmg") - if do_portable: - _step("macOS portable .zip") - _run(["bash", str(BUILD / "macos" / "build_zip.sh"), version]) - out.append(DIST / f"DataTools-{version}-mac-portable.zip") - return out - - -def step_package_linux(version: str, do_installer: bool, do_portable: bool) -> list[Path]: - # On Linux the AppImage IS the portable. We ignore the two flags - # and always produce the single file — splitting wouldn't add - # value. - if not (do_installer or do_portable): - return [] - _step("Linux AppImage") - _run(["bash", str(BUILD / "appimage" / "build.sh"), version]) - return [DIST / f"DataTools-{version}-linux-x86_64.AppImage"] - - -# --------------------------------------------------------------------------- -# Orchestration -# --------------------------------------------------------------------------- - - -def _summarise(outputs: list[Path]) -> None: - _step("done — outputs") - if not outputs: - _warn("no files produced (everything skipped via flags)") - return - for p in outputs: - if p.exists(): - size_mb = p.stat().st_size / (1024 * 1024) - print(f" {p.relative_to(REPO)} ({size_mb:.1f} MB)") - else: - _warn(f"expected output missing: {p.relative_to(REPO)}") - - -def main() -> int: - parser = argparse.ArgumentParser( - prog="make_release.py", - description=( - "Build the installer + portable zip for the current OS. " - "Cross-compilation isn't supported by PyInstaller — run " - "this once per platform you want to target." - ), - formatter_class=argparse.RawDescriptionHelpFormatter, - ) - parser.add_argument( - "--platform", choices=("auto", "win", "mac", "linux"), default="auto", - help="Override OS detection (mostly for testing). Default: auto.", - ) - parser.add_argument( - "--preflight", action="store_true", - help="Check tooling and exit without building.", - ) - parser.add_argument( - "--clean", action="store_true", - help="Wipe dist/ before building.", - ) - parser.add_argument( - "--skip-installer", action="store_true", - help="Don't build the OS installer (.exe / .dmg).", - ) - parser.add_argument( - "--skip-portable", action="store_true", - help="Don't build the portable .zip.", - ) - args = parser.parse_args() - - target = _detect_platform() if args.platform == "auto" else args.platform - version = _read_version() - do_installer = not args.skip_installer - do_portable = not args.skip_portable - - print(f"DataTools release builder") - print(f" target: {target} (host: {platform.platform()})") - print(f" version: {version}") - print(f" installer: {'yes' if do_installer else 'no'}") - print(f" portable: {'yes' if do_portable else 'no'}") - print(f" dist dir: {DIST}") - - if target != _detect_platform(): - _warn( - f"--platform {target} but host is {_detect_platform()}. " - "PyInstaller can't cross-compile — the bundle will be for " - "the HOST, only the packaging step will follow your override. " - "Useful only for testing the packager paths." - ) - - preflight(target) - if args.preflight: - return 0 - - if args.clean and DIST.exists(): - _step(f"cleaning {DIST}") - shutil.rmtree(DIST) - - step_generate_icons() - - # Stage Tesseract OCR before PyInstaller runs. The spec reads - # ``build/_tesseract//`` + ``build/vendor/tessdata/`` and - # bundles them under ``/tesseract/`` so the runtime - # discovery in src/pdf_extract.py finds them at: - # Path(sys._MEIPASS) / "tesseract" / "tesseract[.exe]" - # Path(sys._MEIPASS) / "tesseract" / "tessdata" / "eng.traineddata" - fetch_tessdata() - fetch_tesseract_for_platform(target) - - step_pyinstaller(clean=args.clean, target=target) - - if target == "win": - outputs = step_package_win(version, do_installer, do_portable) - elif target == "mac": - outputs = step_package_mac(version, do_installer, do_portable) - else: - outputs = step_package_linux(version, do_installer, do_portable) - - _summarise(outputs) - return 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/build/vendor/README.md b/build/vendor/README.md index 31763b8..b3961a9 100644 --- a/build/vendor/README.md +++ b/build/vendor/README.md @@ -4,9 +4,10 @@ This tree holds the third-party assets that get bundled into the PyInstaller artifacts but that we deliberately do **not** keep in git (too large / license-encumbered / re-fetchable on demand). -The build pipeline (`build/make_release.py`) populates everything in -here before the PyInstaller step. The contents are git-ignored except -for this README. +The build's Tesseract helper (`build/tesseract.py`) populates +everything in here before the PyInstaller step — CI +(`.github/workflows/build.yml`) calls it ahead of the build. The +contents are git-ignored except for this README. ## tessdata/ @@ -40,9 +41,9 @@ statements (the only OCR use case so far), the extra accuracy of the ### How it gets populated -`build/make_release.py::fetch_tessdata()` checks for +`build/tesseract.py::fetch_tessdata()` checks for `build/vendor/tessdata/eng.traineddata` on every run. If it's -missing, the script downloads it from the canonical URL above and +missing, it downloads the file from the canonical URL above and caches it here. Subsequent builds reuse the cached file. On CI, the directory is restored from the GitHub Actions cache so we diff --git a/docs/DEVELOPER.md b/docs/DEVELOPER.md index 62a67b1..67e3147 100644 --- a/docs/DEVELOPER.md +++ b/docs/DEVELOPER.md @@ -298,7 +298,7 @@ All `DataToolsError` subclasses extend stdlib `ValueError` or `OSError` so exist ## PDF Extractor — bundled Tesseract -Frozen builds (installer / portable .zip / AppImage) ship Tesseract OCR inside the bundle so scanned PDFs work without a separate system install. Source / `pip` developer environments still resolve Tesseract from `PATH`. +Frozen builds (installer / AppImage) ship Tesseract OCR inside the bundle so scanned PDFs work without a separate system install. Source / `pip` developer environments still resolve Tesseract from `PATH`. **Runtime layout (frozen bundles)**: @@ -318,13 +318,13 @@ Frozen builds (installer / portable .zip / AppImage) ship Tesseract OCR inside t **Where the bytes come from**: - **Tessdata** is vendored at `build/vendor/tessdata/eng.traineddata` — the "best" English model from [tessdata_best](https://github.com/tesseract-ocr/tessdata_best). PyInstaller's spec copies it into `tesseract/tessdata/` inside the bundle. -- **Tesseract binary** is fetched at build time by `build/make_release.py` — per-platform download URLs are pinned in that script. The current pin is **Tesseract 5.5.0**. +- **Tesseract binary** is fetched at build time by `build/tesseract.py` — per-platform download URLs are pinned in that module. The current pin is **Tesseract 5.5.0**. CI (`.github/workflows/build.yml`) imports `fetch_tessdata` + `fetch_tesseract_for_platform` and runs them before PyInstaller. **To update Tesseract**: -1. Bump the version pin + the per-platform fetch URLs in `build/make_release.py`. +1. Bump the version pin + the per-platform fetch URLs in `build/tesseract.py`. 2. If upstream changed the `eng.traineddata` schema, refresh `build/vendor/tessdata/eng.traineddata` from `tessdata_best` at the matching tag. -3. Rebuild on each platform (`python build/make_release.py`) and smoke-test a scanned-PDF run through the PDF Extractor before tagging the release. +3. Push a `v*` tag so CI rebuilds all three platforms, then smoke-test a scanned-PDF run through the PDF Extractor before publishing the release. 4. Update `LICENSE_TESSERACT.txt` at the repo root if the upstream license terms change (Tesseract is Apache-2.0 today). ## Tests diff --git a/docs/TECHNICAL.md b/docs/TECHNICAL.md index 505b7ae..2cb999b 100644 --- a/docs/TECHNICAL.md +++ b/docs/TECHNICAL.md @@ -124,7 +124,7 @@ Tag a release → 3 platform artifacts upload to GitHub Releases. Manual: copy t ### 3.10 Bundled Tesseract (PDF Extractor OCR) -Frozen builds ship Tesseract 5.5 + `eng.traineddata` inside the PyInstaller bundle so scanned PDFs work without a separate install. Per-platform binary URLs pinned in `build/make_release.py`; tessdata vendored at `build/vendor/tessdata/eng.traineddata`. License attribution in `LICENSE_TESSERACT.txt` at the repo root. +Frozen builds ship Tesseract 5.5 + `eng.traineddata` inside the PyInstaller bundle so scanned PDFs work without a separate install. Per-platform binary URLs pinned in `build/tesseract.py`; tessdata vendored at `build/vendor/tessdata/eng.traineddata`. License attribution in `LICENSE_TESSERACT.txt` at the repo root. **Discovery order at runtime** (see `docs/DEVELOPER.md` for the full Path layout): diff --git a/docs/USER-GUIDE.es.md b/docs/USER-GUIDE.es.md index 6e2e984..4505761 100644 --- a/docs/USER-GUIDE.es.md +++ b/docs/USER-GUIDE.es.md @@ -25,16 +25,11 @@ Para usar la misma licencia en otro equipo: desactiva éste (página Activar → ## 1. Instalación -No necesitas tener Python ni permisos de administrador — el paquete trae su propio intérprete y todas las dependencias. Dos formatos por sistema operativo, elige el que tu política de TI permita: - -- **Instalador** — crea automáticamente acceso directo en el escritorio + entrada en el menú Inicio / Launchpad. Recomendado para la mayoría. -- **.zip portable** — descomprime y haz doble clic. No toca el registro, se ejecuta desde cualquier lugar (escritorio, USB, recurso de red). Úsalo si no puedes ejecutar instaladores, quieres una instalación de una sola carpeta que puedas copiar entre equipos, o estás evaluando antes de instalar. - -Ambos formatos son idénticos por dentro: mismo Python, mismas dependencias, mismo comportamiento de arranque. +No necesitas tener Python ni permisos de administrador — el paquete trae su propio intérprete y todas las dependencias. Cada sistema operativo tiene un único instalador que crea automáticamente el acceso directo en el escritorio + la entrada en el menú Inicio / Launchpad. ### 1.1 Windows -**Opción A — Instalador (`DataTools--win-setup.exe`)** +**Instalador (`DataTools--win-setup.exe`)** 1. Descarga `DataTools--win-setup.exe` desde tu correo de licencia o GitHub Releases. 2. Doble clic en el instalador. La primera vez, Windows SmartScreen mostrará **"Windows protegió tu PC"** — pulsa **Más información** → **Ejecutar de todas formas**. (Este aviso solo aparece una vez por compilación hasta que tengamos un certificado EV de firma de código.) @@ -44,18 +39,11 @@ Ambos formatos son idénticos por dentro: mismo Python, mismas dependencias, mis Para anclarlo a la barra de tareas, lanza la app una vez, clic derecho en su icono de la barra de tareas, y **Anclar a la barra de tareas**. Windows requiere este paso manual — ningún instalador puede anclar por programa. -**Opción B — Portable (`DataTools--win-portable.zip`)** - -1. Descarga `DataTools--win-portable.zip`. -2. Clic derecho en el .zip → **Extraer todo…** → elige una carpeta (p. ej. `C:\Tools\DataTools`). -3. Abre la carpeta `DataTools\` extraída, doble clic en `DataTools.exe`. El aviso de SmartScreen aparece solo la primera vez. -4. Para crear tu propio acceso directo en el escritorio: clic derecho en `DataTools.exe` → **Enviar a → Escritorio (crear acceso directo)**. - -**Desinstalar** (solo instalador): Configuración → Aplicaciones → DataTools → Desinstalar. Portable: borra la carpeta. +**Desinstalar**: Configuración → Aplicaciones → DataTools → Desinstalar. ### 1.2 macOS -**Opción A — DMG instalador (`DataTools--mac.dmg`)** +**DMG instalador (`DataTools--mac.dmg`)** 1. Descarga `DataTools--mac.dmg`. 2. Doble clic en el .dmg. Se abre una ventana de Finder con el icono **DataTools** y un alias **Aplicaciones**. @@ -65,12 +53,6 @@ Para anclarlo a la barra de tareas, lanza la app una vez, clic derecho en su ico Para mantener DataTools en el Dock: lanza la app, clic derecho en su icono del Dock → **Opciones → Mantener en el Dock**. macOS no permite que los instaladores fijen al Dock automáticamente. -**Opción B — Portable (`DataTools--mac-portable.zip`)** - -1. Descarga `DataTools--mac-portable.zip`. Safari descomprime al descargar por defecto; en Finder verás `DataTools.app` directamente. -2. Mueve `DataTools.app` a **Aplicaciones** si quieres que aparezca en Launchpad — o déjalo en el escritorio, un USB o un recurso de red. La .app portable se ejecuta desde cualquier sitio. -3. Doble clic en `DataTools.app`. Clic derecho → **Abrir** la primera vez (misma rutina que con el DMG). - **Desinstalar**: arrastra `DataTools.app` a la Papelera. Tus archivos de datos siguen donde estén — la app no instala nada más. ### 1.3 Linux diff --git a/docs/USER-GUIDE.md b/docs/USER-GUIDE.md index 4d567de..75bddb9 100644 --- a/docs/USER-GUIDE.md +++ b/docs/USER-GUIDE.md @@ -25,16 +25,11 @@ To use the same license on a different machine: deactivate this one (Activate pa ## 1. Install -You don't need Python and you don't need admin rights — the bundle ships its own interpreter and every dependency. Two flavors per OS, pick whichever your IT policy allows: - -- **Installer** — wires up Desktop shortcut + Start Menu / Launchpad entry automatically. Recommended for most users. -- **Portable .zip** — unzip and double-click. No registry writes, runs from anywhere (Desktop, USB stick, network share). Use this if you can't run installers, want a single-folder install you can copy between machines, or are evaluating before committing to install. - -Both flavors are byte-identical inside: same Python, same dependencies, same launch behavior. +You don't need Python and you don't need admin rights — the bundle ships its own interpreter and every dependency. Each OS gets a single installer that wires up the Desktop shortcut + Start Menu / Launchpad entry automatically. ### 1.1 Windows -**Option A — Installer (`DataTools--win-setup.exe`)** +**Installer (`DataTools--win-setup.exe`)** 1. Download `DataTools--win-setup.exe` from your release email or GitHub Releases. 2. Double-click the installer. On the first run Windows SmartScreen will say **"Windows protected your PC"** — click **More info** → **Run anyway**. (This warning only appears once per build until we have an EV code-signing cert.) @@ -44,18 +39,11 @@ Both flavors are byte-identical inside: same Python, same dependencies, same lau To pin to the taskbar, launch the app once, right-click its icon in the taskbar, then **Pin to taskbar**. Windows requires this manual step — no installer is allowed to pin programmatically. -**Option B — Portable (`DataTools--win-portable.zip`)** - -1. Download `DataTools--win-portable.zip`. -2. Right-click the .zip → **Extract All…** → pick a folder (e.g. `C:\Tools\DataTools`). -3. Open the extracted `DataTools\` folder, double-click `DataTools.exe`. SmartScreen warning fires the first time only. -4. To create your own desktop shortcut later: right-click `DataTools.exe` → **Send to → Desktop (create shortcut)**. - -**Uninstall** (installer only): Settings → Apps → DataTools → Uninstall. Portable: delete the folder. +**Uninstall**: Settings → Apps → DataTools → Uninstall. ### 1.2 macOS -**Option A — Installer DMG (`DataTools--mac.dmg`)** +**Installer DMG (`DataTools--mac.dmg`)** 1. Download `DataTools--mac.dmg`. 2. Double-click the .dmg. A Finder window opens showing the **DataTools** icon and an **Applications** alias. @@ -65,12 +53,6 @@ To pin to the taskbar, launch the app once, right-click its icon in the taskbar, To keep DataTools in the Dock: launch the app, right-click its Dock icon → **Options → Keep in Dock**. macOS doesn't allow installers to pin to the Dock automatically. -**Option B — Portable (`DataTools--mac-portable.zip`)** - -1. Download `DataTools--mac-portable.zip`. Safari auto-unzips on download; in Finder you'll see `DataTools.app` directly. -2. Move `DataTools.app` to **Applications** if you want it discoverable via Launchpad — or keep it on your Desktop, a USB stick, or a network share. The portable .app runs from anywhere. -3. Double-click `DataTools.app`. Right-click → **Open** the first time (same unsigned-build dance as the DMG). - **Uninstall**: drag `DataTools.app` to the Trash. Your data files stay where you put them — nothing else is installed. ### 1.3 Linux