datatools-dev/samples/demo/agency_leads_pipeline.json

{
  "steps": [
    {
      "tool": "text_clean",
      "options": {},
      "enabled": true,
      "name": "1. Clean text (whitespace + smart quotes from copy-paste)"
    },
    {
      "tool": "format_standardize",
      "options": {
        "column_types": {
          "First Name": "name",
          "Last Name": "name",
          "Company": "name",
          "Email": "email",
          "Phone": "phone"
        },
        "phone_country_column": "Country",
        "phone_format": "E164",
        "email_gmail_canonical": true
      },
      "enabled": true,
      "name": "2. E.164 phones (per-row country) · canonical emails · name casing"
    },
    {
      "tool": "missing",
      "options": {
        "strategy": "none",
        "standardize_sentinels": true,
        "sentinels": ["N/A", "n/a", "—", "?", "(unknown)", "unknown", "(blank)", "(none)", "TBD", "#N/A"]
      },
      "enabled": true,
      "name": "3. Standardize sentinels across vendor exports"
    },
    {
      "tool": "column_map",
      "options": {
        "schema": {
          "fields": [
            {"name": "Lead ID", "dtype": "string", "required": true},
            {"name": "First Name", "dtype": "string"},
            {"name": "Last Name", "dtype": "string"},
            {"name": "Company", "dtype": "string"},
            {"name": "Title", "dtype": "string"},
            {"name": "Email", "dtype": "string"},
            {"name": "Phone", "dtype": "string"},
            {"name": "Country", "dtype": "string"},
            {"name": "Source", "dtype": "string"},
            {"name": "Score", "dtype": "integer"},
            {"name": "Last Activity", "dtype": "date"},
            {"name": "Tags", "dtype": "string"}
          ]
        },
        "auto_infer": true,
        "unmapped": "keep",
        "coerce_types": true,
        "reorder_to_schema": true,
        "enforce_required": false
      },
      "enabled": true,
      "name": "4. Coerce types · reorder to canonical schema"
    },
    {
      "tool": "dedup",
      "options": {
        "survivor_rule": "most_complete",
        "merge": true
      },
      "enabled": true,
      "name": "5. Dedup leads across HubSpot / LinkedIn / Manual Scrape (fuzzy + merge)"
    }
  ]
}