datatools-dev/requirements.txt

pandas>=2.2,<3
openpyxl>=3.1,<4
numpy>=1.26,<3
rapidfuzz>=3.6,<4
charset-normalizer>=3.3,<4
loguru>=0.7,<1
tqdm>=4.66,<5
typer>=0.12,<1
phonenumbers>=8.13,<9
streamlit>=1.35,<2
cryptography>=41,<49
# PDF Extractor stack — pinned to exact tested versions so a future
# upstream release can't quietly change pdfplumber's word-position
# behavior or pypdfium2's OCR rendering mid-build. Bump these
# explicitly when re-testing against a new release.
#
# ``pypdfium2`` is here for the OCR fallback path only (rasterizing
# pages to images for Tesseract). The drawable-canvas dep was
# removed when the visual picker was ripped out — the scanner is
# pure heuristic now, no coordinate UI.
pdfplumber==0.11.9
pypdfium2==5.8.0
pytesseract==0.3.13