datatools-dev/landing/deploy.py

"""Build a deploy-ready ``landing/dist/`` from the source HTML.

Run from the repo root after copying ``landing/deploy.config.example.json``
to ``landing/deploy.config.json`` and filling in the real URLs:

    python3 landing/deploy.py

Output:
    landing/dist/index.html
    landing/dist/shopify-pet/index.html
    landing/dist/bookkeeper/index.html
    landing/dist/revops/index.html
    landing/dist/_shared/styles.css
    landing/dist/robots.txt
    landing/dist/sitemap.xml
    landing/dist/404.html
    landing/dist/favicon.svg

Upload ``landing/dist/`` to Cloudflare Pages (drag-and-drop in the
dashboard, or ``wrangler pages deploy landing/dist``).

Why this script exists:
    The source HTML carries placeholder URLs (``{{demo_base_url}}``,
    ``{{gumroad_url}}``, ``{{support_email}}``, ``{{site_origin}}``)
    so the operator's actual demo / Gumroad / domain URLs aren't
    committed to the repo. This script reads the operator's config
    and produces a ready-to-upload bundle.

    It also stamps a sitemap.xml + robots.txt + 404.html and copies
    the shared CSS so the output directory is fully self-contained.
"""

from __future__ import annotations

import json
import re
import shutil
import sys
from datetime import date
from pathlib import Path

LANDING = Path(__file__).resolve().parent
REPO = LANDING.parent
DIST = LANDING / "dist"

CONFIG_PATH = LANDING / "deploy.config.json"
EXAMPLE_PATH = LANDING / "deploy.config.example.json"


# Files to substitute and copy. Order matters only for readability.
HTML_PAGES = [
    LANDING / "index.html",
    LANDING / "shopify-pet" / "index.html",
    LANDING / "bookkeeper"  / "index.html",
    LANDING / "revops"      / "index.html",
]
SHARED = LANDING / "_shared" / "styles.css"


def _load_config() -> dict:
    if not CONFIG_PATH.exists():
        sys.stderr.write(
            f"\nERROR: {CONFIG_PATH.name} not found.\n"
            f"  cp {EXAMPLE_PATH.name} {CONFIG_PATH.name}\n"
            f"  edit {CONFIG_PATH.name} with your real URLs\n"
            f"  re-run: python3 landing/deploy.py\n\n"
        )
        sys.exit(2)
    cfg = json.loads(CONFIG_PATH.read_text())
    required = ("site_origin", "demo_base_url", "gumroad_listing", "support_email")
    missing = [k for k in required if not cfg.get(k)]
    if missing:
        sys.stderr.write(
            f"\nERROR: {CONFIG_PATH.name} is missing required fields: {missing}\n"
            f"  See {EXAMPLE_PATH.name} for the full template.\n\n"
        )
        sys.exit(2)
    return cfg


def _substitute(text: str, cfg: dict) -> str:
    """Replace placeholders + the demo / Gumroad URL patterns the source HTML uses today."""
    site_origin   = cfg["site_origin"].rstrip("/")
    demo_base     = cfg["demo_base_url"].rstrip("/")
    gumroad_base  = cfg["gumroad_listing"]
    support_email = cfg["support_email"]

    # Direct placeholder tokens (clean approach — used by future copy).
    text = text.replace("{{site_origin}}", site_origin)
    text = text.replace("{{demo_base_url}}", demo_base)
    text = text.replace("{{gumroad_url}}", gumroad_base)
    text = text.replace("{{support_email}}", support_email)

    # Backwards-compatible patterns: the source HTML in this repo carries
    # literal ``https://datatools.app`` and ``https://demo.datatools.app``
    # so this script swaps those too. Once new pages adopt the
    # ``{{placeholder}}`` style above, this layer can be retired.
    text = re.sub(
        r"https://demo\.datatools\.app",
        demo_base,
        text,
    )
    # Replace ``https://datatools.app/...`` for canonical / OG URLs but
    # do NOT swap ``https://datatools.app`` when it is followed by an
    # at-sign as part of an email address (no such case today; defensive).
    text = re.sub(
        r"https://datatools\.app",
        site_origin,
        text,
    )
    # Gumroad URL family — preserve the ``?from=<persona>`` query.
    text = re.sub(
        r"https://gumroad\.com/l/datatools",
        gumroad_base.rstrip("/").replace("/l/datatools", "/l/datatools"),
        text,
    )
    # Support email shows up only as ``mailto:hello@datatools.app``.
    text = text.replace("mailto:hello@datatools.app", f"mailto:{support_email}")
    text = text.replace("hello@datatools.app", support_email)

    return text


def _stamp_sitemap(cfg: dict) -> str:
    site = cfg["site_origin"].rstrip("/")
    today = date.today().isoformat()
    urls = [site + "/"] + [
        f"{site}/{p}/" for p in cfg.get("personas", ["shopify-pet", "bookkeeper", "revops"])
    ]
    items = "\n".join(
        f"  <url><loc>{u}</loc><lastmod>{today}</lastmod></url>"
        for u in urls
    )
    return (
        '<?xml version="1.0" encoding="UTF-8"?>\n'
        '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n'
        f"{items}\n"
        "</urlset>\n"
    )


def _robots_txt(cfg: dict) -> str:
    return (
        "# Allow everything; we want every persona page indexable.\n"
        "User-agent: *\n"
        "Allow: /\n"
        f"Sitemap: {cfg['site_origin'].rstrip('/')}/sitemap.xml\n"
    )


def _favicon_svg() -> str:
    """Tiny self-contained SVG favicon — broom emoji-style mark."""
    return (
        '<?xml version="1.0" encoding="UTF-8"?>\n'
        '<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 64 64">\n'
        '  <rect width="64" height="64" rx="14" fill="#0f1115"/>\n'
        '  <circle cx="32" cy="32" r="9" fill="#6ee7b7"/>\n'
        "</svg>\n"
    )


def _build_404_html(cfg: dict) -> str:
    """Cloudflare Pages serves 404.html when a path doesn't match."""
    site_origin = cfg["site_origin"].rstrip("/")
    return f"""<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="utf-8" />
  <meta name="viewport" content="width=device-width, initial-scale=1" />
  <title>Not found · DataTools</title>
  <link rel="stylesheet" href="/_shared/styles.css" />
</head>
<body>
<section class="hero" style="text-align: center;">
  <div class="container">
    <div class="eyebrow">404</div>
    <h1>That page isn't here.</h1>
    <p class="lead" style="margin: 0 auto 28px;">Pick a workflow below to land somewhere useful.</p>
    <p>
      <a class="btn" href="{site_origin}/shopify-pet/">For Shopify</a>
      &nbsp;
      <a class="btn" href="{site_origin}/bookkeeper/">For bookkeepers</a>
      &nbsp;
      <a class="btn" href="{site_origin}/revops/">For RevOps</a>
    </p>
  </div>
</section>
</body>
</html>
"""


def main() -> int:
    cfg = _load_config()

    if DIST.exists():
        shutil.rmtree(DIST)
    DIST.mkdir(parents=True)

    # Shared CSS (same path the source HTML expects: ``../_shared/styles.css``)
    (DIST / "_shared").mkdir()
    shutil.copy(SHARED, DIST / "_shared" / "styles.css")

    # Per-page substitutions
    page_count = 0
    for src in HTML_PAGES:
        rel = src.relative_to(LANDING)
        dest = DIST / rel
        dest.parent.mkdir(parents=True, exist_ok=True)
        dest.write_text(_substitute(src.read_text(), cfg))
        page_count += 1

    # Stamped supporting files
    (DIST / "robots.txt").write_text(_robots_txt(cfg))
    (DIST / "sitemap.xml").write_text(_stamp_sitemap(cfg))
    (DIST / "404.html").write_text(_build_404_html(cfg))
    (DIST / "favicon.svg").write_text(_favicon_svg())

    # Final report
    print(f"\n✓ Built {page_count} HTML pages + sitemap + robots + 404 + favicon")
    print(f"  Output:  {DIST.relative_to(REPO)}/")
    print()
    print("Next steps:")
    print("  1) wrangler pages deploy landing/dist          # if you use Wrangler")
    print("     OR drag-and-drop landing/dist/ in the Cloudflare Pages dashboard")
    print("  2) Configure custom domain on Cloudflare Pages → "
          f"{cfg['site_origin']}")
    print("  3) Verify: open the deployed apex URL, click each persona "
          "card, click each demo iframe, click each buy button → Gumroad listing")
    print()
    return 0


if __name__ == "__main__":
    sys.exit(main())