"""Build a deploy-ready ``landing/dist/`` from the source HTML. Run from the repo root after copying ``landing/deploy.config.example.json`` to ``landing/deploy.config.json`` and filling in the real URLs: python3 landing/deploy.py Output: landing/dist/index.html landing/dist/bookkeeper/index.html landing/dist/ap-1099/index.html landing/dist/ar-aging/index.html landing/dist/_shared/styles.css landing/dist/robots.txt landing/dist/sitemap.xml landing/dist/404.html landing/dist/favicon.svg Upload ``landing/dist/`` to Cloudflare Pages (drag-and-drop in the dashboard, or ``wrangler pages deploy landing/dist``). Why this script exists: The source HTML carries placeholder URLs (``{{demo_base_url}}``, ``{{gumroad_url}}``, ``{{support_email}}``, ``{{site_origin}}``) so the operator's actual demo / Gumroad / domain URLs aren't committed to the repo. This script reads the operator's config and produces a ready-to-upload bundle. It also stamps a sitemap.xml + robots.txt + 404.html and copies the shared CSS so the output directory is fully self-contained. """ from __future__ import annotations import json import re import shutil import sys from datetime import date from pathlib import Path LANDING = Path(__file__).resolve().parent REPO = LANDING.parent DIST = LANDING / "dist" CONFIG_PATH = LANDING / "deploy.config.json" EXAMPLE_PATH = LANDING / "deploy.config.example.json" # Files to substitute and copy. Order matters only for readability. HTML_PAGES = [ LANDING / "index.html", LANDING / "bookkeeper" / "index.html", LANDING / "ap-1099" / "index.html", LANDING / "ar-aging" / "index.html", ] SHARED = LANDING / "_shared" / "styles.css" def _load_config() -> dict: if not CONFIG_PATH.exists(): sys.stderr.write( f"\nERROR: {CONFIG_PATH.name} not found.\n" f" cp {EXAMPLE_PATH.name} {CONFIG_PATH.name}\n" f" edit {CONFIG_PATH.name} with your real URLs\n" f" re-run: python3 landing/deploy.py\n\n" ) sys.exit(2) cfg = json.loads(CONFIG_PATH.read_text()) required = ("site_origin", "demo_base_url", "gumroad_listing", "support_email") missing = [k for k in required if not cfg.get(k)] if missing: sys.stderr.write( f"\nERROR: {CONFIG_PATH.name} is missing required fields: {missing}\n" f" See {EXAMPLE_PATH.name} for the full template.\n\n" ) sys.exit(2) return cfg def _substitute(text: str, cfg: dict) -> str: """Replace placeholders + the demo / Gumroad URL patterns the source HTML uses today.""" site_origin = cfg["site_origin"].rstrip("/") demo_base = cfg["demo_base_url"].rstrip("/") gumroad_base = cfg["gumroad_listing"] support_email = cfg["support_email"] # Direct placeholder tokens (clean approach — used by future copy). text = text.replace("{{site_origin}}", site_origin) text = text.replace("{{demo_base_url}}", demo_base) text = text.replace("{{gumroad_url}}", gumroad_base) text = text.replace("{{support_email}}", support_email) # Backwards-compatible patterns: the source HTML in this repo carries # literal ``https://datatools.app`` and ``https://demo.datatools.app`` # so this script swaps those too. Once new pages adopt the # ``{{placeholder}}`` style above, this layer can be retired. text = re.sub( r"https://demo\.datatools\.app", demo_base, text, ) # Replace ``https://datatools.app/...`` for canonical / OG URLs but # do NOT swap ``https://datatools.app`` when it is followed by an # at-sign as part of an email address (no such case today; defensive). text = re.sub( r"https://datatools\.app", site_origin, text, ) # Gumroad URL family — preserve the ``?from=`` query. text = re.sub( r"https://gumroad\.com/l/datatools", gumroad_base.rstrip("/").replace("/l/datatools", "/l/datatools"), text, ) # Support email shows up only as ``mailto:hello@datatools.app``. text = text.replace("mailto:hello@datatools.app", f"mailto:{support_email}") text = text.replace("hello@datatools.app", support_email) return text def _stamp_sitemap(cfg: dict) -> str: site = cfg["site_origin"].rstrip("/") today = date.today().isoformat() urls = [site + "/"] + [ f"{site}/{p}/" for p in cfg.get("personas", ["bookkeeper", "ap-1099", "ar-aging"]) ] items = "\n".join( f" {u}{today}" for u in urls ) return ( '\n' '\n' f"{items}\n" "\n" ) def _robots_txt(cfg: dict) -> str: return ( "# Allow everything; we want every persona page indexable.\n" "User-agent: *\n" "Allow: /\n" f"Sitemap: {cfg['site_origin'].rstrip('/')}/sitemap.xml\n" ) def _favicon_svg() -> str: """Tiny self-contained SVG favicon — broom emoji-style mark.""" return ( '\n' '\n' ' \n' ' \n' "\n" ) def _build_404_html(cfg: dict) -> str: """Cloudflare Pages serves 404.html when a path doesn't match.""" site_origin = cfg["site_origin"].rstrip("/") return f""" Not found · DataTools
404

That page isn't here.

Pick a workflow below to land somewhere useful.

For bookkeepers   For AP / 1099   For AR

""" def main() -> int: cfg = _load_config() if DIST.exists(): shutil.rmtree(DIST) DIST.mkdir(parents=True) # Shared CSS (same path the source HTML expects: ``../_shared/styles.css``) (DIST / "_shared").mkdir() shutil.copy(SHARED, DIST / "_shared" / "styles.css") # Per-page substitutions page_count = 0 for src in HTML_PAGES: rel = src.relative_to(LANDING) dest = DIST / rel dest.parent.mkdir(parents=True, exist_ok=True) dest.write_text(_substitute(src.read_text(), cfg)) page_count += 1 # Stamped supporting files (DIST / "robots.txt").write_text(_robots_txt(cfg)) (DIST / "sitemap.xml").write_text(_stamp_sitemap(cfg)) (DIST / "404.html").write_text(_build_404_html(cfg)) (DIST / "favicon.svg").write_text(_favicon_svg()) # Final report print(f"\n✓ Built {page_count} HTML pages + sitemap + robots + 404 + favicon") print(f" Output: {DIST.relative_to(REPO)}/") print() print("Next steps:") print(" 1) wrangler pages deploy landing/dist # if you use Wrangler") print(" OR drag-and-drop landing/dist/ in the Cloudflare Pages dashboard") print(" 2) Configure custom domain on Cloudflare Pages → " f"{cfg['site_origin']}") print(" 3) Verify: open the deployed apex URL, click each persona " "card, click each demo iframe, click each buy button → Gumroad listing") print() return 0 if __name__ == "__main__": sys.exit(main())