feat: 3 new tools, format streaming, distribution-ready demo + landing pages

Tools shipped this batch (4 → 6 of 9 Ready): 04 Missing Value Handler src/core/missing.py + cli_missing.py + GUI 05 Column Mapper src/core/column_mapper.py + cli_column_map.py + GUI 09 Pipeline Runner src/core/pipeline.py + cli_pipeline.py + GUI with soft tool-dependency graph (recommended, not enforced) and JSON save/load for repeatable weekly cleanups. Format Standardizer reworked for 1 GB international files: • Vectorised dispatch + LRU cache over phone/date/currency/boolean/email • Per-row country / address columns drive parsing • Audit cap (default 10 k rows, ~50 MB RAM) • standardize_file(): chunked streaming entry point (~165 k rows/sec) • currency_decimal="auto" for EU comma-decimal locales • R$ / kr / zł multi-char currency prefixes • cli_format.py with auto-stream above 100 MB inputs Encoding detection arbiter + language-aware probe: Closes the last 4 xfails (cp1250 / mac_iceland / shift_jis_2004 / lying-BOM) via tied-confidence arbiter + Cyrillic / EE-Latin coverage probes. Distribution-readiness assets: • streamlit_app.py — Streamlit Community Cloud entry shim • src/gui/app_demo.py — single-page demo, ?p=<persona> routing, 100-row cap + watermark, free-vs-paid boundary enforced at surface • samples/demo/ — 3 niche datasets + pre-tuned pipeline JSONs • landing/ — 4 static HTML pages (apex chooser + 3 niche), shared CSS, deploy.py URL-substitution script, auto-generated robots.txt + sitemap.xml + 404.html + favicon • docs/PLAN.md, DEMO-PLAN.md, DEPLOYMENT.md, POST-LAUNCH.md, NEXT-STEPS.md — full strategy + measurement + deployment + master checklist Test counts: before: 1,520 passed · 4 skipped · 17 xfailed after: 1,729 passed · 0 skipped · 0 xfailed Tier-1 corpora added: • missing-corpus 3 use cases + 16 edge cases • column-mapper-corpus 3 use cases + 5 edge cases • format-cleaner intl 20-row 13-country stress fixture Engine hardening flushed out by the corpora: • interpolate guards against object-dtype columns • mean/median skip all-NaN columns (silences numpy warning) • fillna runs under future.no_silent_downcasting (silences pandas warning) • mojibake test no longer skips when ftfy installed (monkeypatch path) • drop-row threshold semantics: strict-greater (consistent across rows / cols) • currency_decimal validator allow-set updated for "auto" Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 22:31:26 +00:00
parent d18b95880d
commit 966af8ef94
89 changed files with 12039 additions and 284 deletions
--- a/src/core/column_mapper.py
+++ b/src/core/column_mapper.py
@@ -0,0 +1,633 @@
+"""DataTools Column Mapper.
+
+Rename columns, enforce a target schema, coerce types, drop / add /
+reorder columns. Designed for the three buyer profiles the toolkit
+already serves:
+
+1. **Schema enforcement** — analyst receives a CSV that has to fit a
+   known target shape (a CRM import format, a database schema, a
+   mailing-list contract). Map source columns to target names, coerce
+   each to the declared type, drop the extras, fail clearly when a
+   required target field is missing.
+2. **Multi-source unification** — operator merges vendor/partner
+   exports where every file uses different column names ("First Name"
+   / "first_name" / "FirstName"). The fuzzy auto-mapper proposes a
+   mapping; the user reviews and overrides.
+3. **Type coercion** — quick conversion of mis-typed columns (string
+   "123" → int, "true"/"yes" → bool, "2024-01-15" → date) without
+   leaving the tool, with errors surfaced row-by-row.
+
+Public API
+----------
+Types:
+    TargetField, TargetSchema, ColumnMapping, MapOptions, MapResult,
+    ColumnDtype
+
+Functions:
+    map_columns(df, options) -> MapResult
+    infer_mapping(df, schema, *, threshold=0.6) -> dict[src, target]
+    coerce_series(series, dtype) -> (Series, n_failures)
+
+Presets:
+    PRESETS = {"rename-only", "strict-schema", "lenient-schema"}
+"""
+
+from __future__ import annotations
+
+import json
+import re
+from dataclasses import asdict, dataclass, field
+from pathlib import Path
+from typing import Any, Iterable, Literal, Optional
+
+import numpy as np
+import pandas as pd
+from loguru import logger
+from pandas.api import types as pdtypes
+
+from .errors import ConfigError, InputValidationError, ensure_choice, ensure_dataframe
+
+
+# ---------------------------------------------------------------------------
+# Types
+# ---------------------------------------------------------------------------
+
+ColumnDtype = Literal[
+    "string",
+    "integer",
+    "float",
+    "boolean",
+    "date",
+    "datetime",
+    "category",
+    "auto",        # leave dtype alone
+]
+
+_VALID_DTYPES: frozenset[str] = frozenset({
+    "string", "integer", "float", "boolean", "date", "datetime",
+    "category", "auto",
+})
+
+
+@dataclass
+class TargetField:
+    """One field in a target schema.
+
+    Required fields whose source column is missing produce a
+    ``MapResult.missing_required_targets`` entry rather than silently
+    creating a NaN column.
+    """
+
+    name: str
+    dtype: ColumnDtype = "auto"
+    required: bool = False
+    aliases: list[str] = field(default_factory=list)
+    default: Any = None
+
+
+@dataclass
+class TargetSchema:
+    """Ordered list of target fields. Ordering survives into the result DataFrame."""
+
+    fields: list[TargetField]
+
+    def field_names(self) -> list[str]:
+        return [f.name for f in self.fields]
+
+    def get(self, name: str) -> Optional[TargetField]:
+        return next((f for f in self.fields if f.name == name), None)
+
+    def to_dict(self) -> dict:
+        return {"fields": [asdict(f) for f in self.fields]}
+
+    def to_file(self, path: str | Path) -> Path:
+        out = Path(path)
+        out.write_text(json.dumps(self.to_dict(), indent=2, default=str))
+        return out
+
+    @classmethod
+    def from_dict(cls, data: dict) -> TargetSchema:
+        if "fields" not in data:
+            raise ConfigError(
+                "Target schema must contain a 'fields' list",
+                operation="TargetSchema.from_dict",
+                suggestion='Example: {"fields": [{"name": "email", "dtype": "string", "required": true}, ...]}',
+            )
+        fields = []
+        for entry in data["fields"]:
+            if isinstance(entry, str):
+                fields.append(TargetField(name=entry))
+                continue
+            if "name" not in entry:
+                raise ConfigError(
+                    f"Schema field is missing 'name': {entry!r}",
+                    operation="TargetSchema.from_dict",
+                )
+            dtype = entry.get("dtype", "auto")
+            if dtype not in _VALID_DTYPES:
+                raise ConfigError(
+                    f"Schema field {entry['name']!r}: unknown dtype {dtype!r}",
+                    operation="TargetSchema.from_dict",
+                    suggestion=f"Valid: {sorted(_VALID_DTYPES)}",
+                )
+            fields.append(TargetField(
+                name=entry["name"],
+                dtype=dtype,
+                required=bool(entry.get("required", False)),
+                aliases=list(entry.get("aliases", [])),
+                default=entry.get("default"),
+            ))
+        return cls(fields=fields)
+
+    @classmethod
+    def from_file(cls, path: str | Path) -> TargetSchema:
+        return cls.from_dict(json.loads(Path(path).read_text()))
+
+
+# ---------------------------------------------------------------------------
+# Fuzzy column-name matching
+# ---------------------------------------------------------------------------
+
+# Whitespace, punctuation, and case all vary across vendors. We normalise
+# both sides to a token list before comparing.
+_NORM_RE = re.compile(r"[^a-z0-9]+")
+
+
+def _normalize_name(name: str) -> str:
+    """Lowercase, strip non-alphanumerics — ``First Name`` → ``firstname``."""
+    if not isinstance(name, str):
+        return ""
+    return _NORM_RE.sub("", name.strip().lower())
+
+
+def _token_set(name: str) -> frozenset[str]:
+    """Tokenise a column name on non-alphanumeric boundaries."""
+    if not isinstance(name, str):
+        return frozenset()
+    parts = [p for p in _NORM_RE.split(name.strip().lower()) if p]
+    return frozenset(parts)
+
+
+def _name_similarity(a: str, b: str) -> float:
+    """Cheap similarity score in [0.0, 1.0].
+
+    Combines exact-after-normalisation, token Jaccard, and SequenceMatcher
+    ratio. A real fuzzy library (rapidfuzz) is already a project
+    dependency for the deduplicator — we use it when available, fall
+    back to stdlib ``difflib`` otherwise so the mapper works in trimmed
+    builds.
+    """
+    if not a or not b:
+        return 0.0
+    na, nb = _normalize_name(a), _normalize_name(b)
+    if na == nb:
+        return 1.0
+
+    ta, tb = _token_set(a), _token_set(b)
+    jaccard = (len(ta & tb) / len(ta | tb)) if (ta or tb) else 0.0
+
+    try:
+        from rapidfuzz import fuzz
+        seq = fuzz.ratio(na, nb) / 100.0
+    except ImportError:
+        from difflib import SequenceMatcher
+        seq = SequenceMatcher(None, na, nb).ratio()
+
+    return max(jaccard, seq)
+
+
+def infer_mapping(
+    df: pd.DataFrame,
+    schema: TargetSchema,
+    *,
+    threshold: float = 0.6,
+) -> dict[str, str]:
+    """Best-guess source-column → target-field mapping.
+
+    Returns a dict keyed by source-column name. A source column is
+    omitted from the result when no candidate scores above *threshold*.
+    Each target is matched at most once: the highest-scoring source
+    wins, ties broken by source-column order in *df*.
+
+    Aliases declared on a :class:`TargetField` are scored as if they
+    were target names — useful for vendor-specific synonyms
+    (``["customer_id", "cust_id", "client_no"]``).
+    """
+    ensure_dataframe(df, function="infer_mapping")
+    sources = list(df.columns)
+    targets = schema.fields
+
+    # All (source, target) candidate scores; keep only those above
+    # threshold, sorted descending so a greedy walk picks the best
+    # available pairings first.
+    scored: list[tuple[float, str, str]] = []
+    for src in sources:
+        for tgt in targets:
+            best = _name_similarity(src, tgt.name)
+            for alias in tgt.aliases:
+                s = _name_similarity(src, alias)
+                if s > best:
+                    best = s
+            if best >= threshold:
+                scored.append((best, str(src), tgt.name))
+
+    scored.sort(key=lambda x: (-x[0], sources.index(x[1])))
+
+    mapping: dict[str, str] = {}
+    used_targets: set[str] = set()
+    for score, src, tgt in scored:
+        if src in mapping or tgt in used_targets:
+            continue
+        mapping[src] = tgt
+        used_targets.add(tgt)
+    return mapping
+
+
+# ---------------------------------------------------------------------------
+# Type coercion
+# ---------------------------------------------------------------------------
+
+_TRUTHY = frozenset({"true", "t", "yes", "y", "1"})
+_FALSY = frozenset({"false", "f", "no", "n", "0"})
+
+
+def _coerce_boolean(value: Any) -> Any:
+    if isinstance(value, bool):
+        return value
+    if value is None or (isinstance(value, float) and pd.isna(value)):
+        return pd.NA
+    if isinstance(value, (int, float)):
+        return bool(value)
+    if isinstance(value, str):
+        v = value.strip().lower()
+        if v in _TRUTHY:
+            return True
+        if v in _FALSY:
+            return False
+    raise ValueError(f"cannot coerce to boolean: {value!r}")
+
+
+def coerce_series(series: pd.Series, dtype: ColumnDtype) -> tuple[pd.Series, int]:
+    """Coerce *series* to *dtype*, returning ``(coerced, n_failures)``.
+
+    Failures are counted but never raised — the caller (``map_columns``)
+    surfaces them through ``MapResult.coercion_failures`` so the user
+    can inspect which rows didn't fit. Already-typed inputs are cheap
+    no-ops.
+    """
+    if dtype == "auto":
+        return series, 0
+    if dtype == "string":
+        return series.astype("string"), 0
+    if dtype == "category":
+        return series.astype("category"), 0
+    if dtype == "integer":
+        coerced = pd.to_numeric(series, errors="coerce")
+        # Use nullable Int64 so NaN entries don't get cast to floats.
+        rounded = coerced.round().astype("Int64")
+        # Failures = original non-NaN cells whose numeric coercion produced NaN.
+        original_filled = series.notna()
+        failed = (rounded.isna() & original_filled).sum()
+        return rounded, int(failed)
+    if dtype == "float":
+        coerced = pd.to_numeric(series, errors="coerce").astype("Float64")
+        original_filled = series.notna()
+        failed = (coerced.isna() & original_filled).sum()
+        return coerced, int(failed)
+    if dtype == "boolean":
+        out: list[Any] = []
+        failed = 0
+        for v in series.tolist():
+            try:
+                out.append(_coerce_boolean(v))
+            except ValueError:
+                out.append(pd.NA)
+                failed += 1
+        return pd.Series(out, index=series.index, dtype="boolean"), failed
+    if dtype in {"date", "datetime"}:
+        coerced = pd.to_datetime(series, errors="coerce", utc=False)
+        original_filled = series.notna()
+        failed = (coerced.isna() & original_filled).sum()
+        if dtype == "date":
+            # Drop the time component but keep dtype as datetime64 so
+            # downstream operations (delta, sort) still work.
+            coerced = coerced.dt.normalize()
+        return coerced, int(failed)
+    raise InputValidationError(
+        f"Unknown dtype {dtype!r}",
+        operation="coerce_series",
+        suggestion=f"Valid: {sorted(_VALID_DTYPES)}",
+    )
+
+
+# ---------------------------------------------------------------------------
+# Options / result dataclasses
+# ---------------------------------------------------------------------------
+
+# Strategy for handling source columns that don't appear in the target
+# schema. ``keep`` preserves them at the end of the output; ``drop``
+# removes them; ``error`` raises an InputValidationError.
+UnmappedStrategy = Literal["keep", "drop", "error"]
+
+PRESETS: dict[str, dict[str, Any]] = {
+    "rename-only": {
+        "auto_infer": True,
+        "unmapped": "keep",
+        "coerce_types": False,
+        "reorder_to_schema": False,
+    },
+    "strict-schema": {
+        "auto_infer": True,
+        "unmapped": "drop",
+        "coerce_types": True,
+        "reorder_to_schema": True,
+    },
+    "lenient-schema": {
+        "auto_infer": True,
+        "unmapped": "keep",
+        "coerce_types": True,
+        "reorder_to_schema": True,
+    },
+}
+
+
+@dataclass
+class MapOptions:
+    """Toggles for column mapping.
+
+    Defaults match the ``rename-only`` preset: best-effort fuzzy match
+    against the schema (if provided), keep unmapped source columns
+    after the mapped ones, no type coercion, no reorder.
+    """
+
+    # Either pass an explicit ``mapping`` dict or a ``schema`` (and let
+    # the engine infer the mapping). Explicit mapping wins when both
+    # are set.
+    mapping: dict[str, str] = field(default_factory=dict)
+    schema: Optional[TargetSchema] = None
+
+    # When True (default), missing entries in ``mapping`` are filled in
+    # by ``infer_mapping`` against ``schema``. When False, only the
+    # explicit mapping is honoured.
+    auto_infer: bool = True
+    fuzzy_threshold: float = 0.6
+
+    # What to do with source columns that aren't in the mapping.
+    unmapped: UnmappedStrategy = "keep"
+
+    # Apply target-field dtypes from the schema after rename.
+    coerce_types: bool = False
+
+    # Reorder output to match schema.fields order. Unmapped survivors
+    # (when unmapped="keep") are appended at the end in their original
+    # source order.
+    reorder_to_schema: bool = False
+
+    # Required-target enforcement. When True (default), a required
+    # target field that has no source column raises an InputValidationError.
+    # When False, the missing field is added with ``default`` value.
+    enforce_required: bool = True
+
+    @classmethod
+    def from_preset(cls, name: str) -> MapOptions:
+        if name not in PRESETS:
+            raise ConfigError(
+                f"Unknown preset '{name}'",
+                operation="MapOptions.from_preset",
+                suggestion=f"Available: {sorted(PRESETS)}",
+            )
+        return cls(**PRESETS[name])
+
+    @classmethod
+    def from_dict(cls, data: dict) -> MapOptions:
+        known = set(cls.__dataclass_fields__)
+        kwargs = {k: v for k, v in data.items() if k in known}
+        if "schema" in kwargs and isinstance(kwargs["schema"], dict):
+            kwargs["schema"] = TargetSchema.from_dict(kwargs["schema"])
+        return cls(**kwargs)
+
+    def to_dict(self) -> dict:
+        out: dict[str, Any] = {
+            "mapping": dict(self.mapping),
+            "auto_infer": self.auto_infer,
+            "fuzzy_threshold": self.fuzzy_threshold,
+            "unmapped": self.unmapped,
+            "coerce_types": self.coerce_types,
+            "reorder_to_schema": self.reorder_to_schema,
+            "enforce_required": self.enforce_required,
+        }
+        if self.schema is not None:
+            out["schema"] = self.schema.to_dict()
+        return out
+
+    def to_file(self, path: str | Path) -> Path:
+        out = Path(path)
+        out.write_text(json.dumps(self.to_dict(), indent=2, default=str))
+        return out
+
+    @classmethod
+    def from_file(cls, path: str | Path) -> MapOptions:
+        return cls.from_dict(json.loads(Path(path).read_text()))
+
+    def validate(self) -> None:
+        ensure_choice(
+            self.unmapped, name="unmapped",
+            choices=("keep", "drop", "error"),
+            function="MapOptions.validate",
+        )
+        if not (0.0 <= self.fuzzy_threshold <= 1.0):
+            raise ConfigError(
+                f"fuzzy_threshold must be in [0.0, 1.0], got {self.fuzzy_threshold!r}",
+                operation="MapOptions.validate",
+            )
+
+
+@dataclass
+class MapResult:
+    """Output of ``map_columns``."""
+
+    mapped_df: pd.DataFrame
+    mapping: dict[str, str]                # source → target
+    inferred_pairs: dict[str, str]         # subset of mapping that was auto-inferred
+    columns_renamed: int
+    columns_dropped: list[str]
+    columns_added: list[str]                # required-defaulted fields added with default value
+    coercion_failures: dict[str, int]       # column → n_rows_that_failed_coercion
+    unmapped_kept: list[str]
+    missing_required_targets: list[str]
+
+
+# ---------------------------------------------------------------------------
+# Main entry point
+# ---------------------------------------------------------------------------
+
+def map_columns(
+    df: pd.DataFrame,
+    options: Optional[MapOptions] = None,
+) -> MapResult:
+    """Apply *options* to *df* and return a :class:`MapResult`.
+
+    Pipeline placement (recommended, not enforced)
+    ----------------------------------------------
+    Two natural slots:
+      * **Early** — header alignment for multi-vendor unification.
+        Each vendor uses different column names; rename to a canonical
+        schema before any other tool runs.
+      * **Late** — schema enforcement for output. After cleaning, coerce
+        types and project to the target shape (CRM import contract,
+        database schema). Run after format / missing so the coerced
+        data is canonical first.
+    The pipeline runner does not enforce a position; place by use case.
+
+    Pipeline:
+      1. Compose mapping (explicit ``options.mapping`` ∪ inferred
+         pairs from ``options.schema``).
+      2. Reject duplicate target names — two source columns mapped to
+         the same target is a user error, not a silent overwrite.
+      3. Decide what to do with unmapped source columns
+         (``keep`` / ``drop`` / ``error``).
+      4. Rename, then handle missing required targets, then coerce
+         types, then reorder.
+    """
+    ensure_dataframe(df, function="map_columns")
+    options = options or MapOptions()
+    options.validate()
+
+    # ------------------------------------------------------------------
+    # 1. Compose the effective mapping
+    # ------------------------------------------------------------------
+    explicit = dict(options.mapping)
+    inferred: dict[str, str] = {}
+    if options.schema is not None and options.auto_infer:
+        all_inferred = infer_mapping(df, options.schema, threshold=options.fuzzy_threshold)
+        # Explicit user pairings always win.
+        used_targets = set(explicit.values())
+        for src, tgt in all_inferred.items():
+            if src in explicit:
+                continue
+            if tgt in used_targets:
+                continue
+            inferred[src] = tgt
+            used_targets.add(tgt)
+
+    mapping: dict[str, str] = {**inferred, **explicit}
+
+    # ------------------------------------------------------------------
+    # 2. Validate mapping coherence
+    # ------------------------------------------------------------------
+    unknown_sources = [s for s in mapping if s not in df.columns]
+    if unknown_sources:
+        raise InputValidationError(
+            f"Mapping references columns not in input: {unknown_sources}",
+            operation="map_columns",
+            suggestion=f"Available source columns: {list(df.columns)}",
+        )
+    target_counts: dict[str, int] = {}
+    for tgt in mapping.values():
+        target_counts[tgt] = target_counts.get(tgt, 0) + 1
+    duplicates = [t for t, n in target_counts.items() if n > 1]
+    if duplicates:
+        raise InputValidationError(
+            f"Multiple source columns mapped to the same target(s): {duplicates}",
+            operation="map_columns",
+            suggestion="Each target name must be unique. Drop or rename the conflicting source columns.",
+        )
+
+    # ------------------------------------------------------------------
+    # 3. Handle unmapped source columns
+    # ------------------------------------------------------------------
+    unmapped_sources = [c for c in df.columns if c not in mapping]
+    unmapped_kept: list[str] = []
+    columns_dropped: list[str] = []
+    if unmapped_sources:
+        if options.unmapped == "drop":
+            columns_dropped = list(unmapped_sources)
+        elif options.unmapped == "error":
+            raise InputValidationError(
+                f"Source columns have no mapping and unmapped='error': {unmapped_sources}",
+                operation="map_columns",
+                suggestion=(
+                    "Either add explicit mapping entries, set unmapped='keep' / 'drop', "
+                    "or include the columns in the target schema."
+                ),
+            )
+        else:
+            unmapped_kept = list(unmapped_sources)
+
+    # ------------------------------------------------------------------
+    # 4. Apply rename and drop
+    # ------------------------------------------------------------------
+    out = df.copy()
+    if columns_dropped:
+        out = out.drop(columns=columns_dropped)
+    if mapping:
+        out = out.rename(columns=mapping)
+    columns_renamed = sum(1 for src, tgt in mapping.items() if src != tgt)
+
+    # ------------------------------------------------------------------
+    # 5. Handle the schema's required + default fields
+    # ------------------------------------------------------------------
+    columns_added: list[str] = []
+    missing_required: list[str] = []
+    if options.schema is not None:
+        present = set(out.columns)
+        for tf in options.schema.fields:
+            if tf.name in present:
+                continue
+            if tf.required and tf.default is None:
+                missing_required.append(tf.name)
+                continue
+            # Add with default value (NaN if no default).
+            out[tf.name] = tf.default if tf.default is not None else pd.NA
+            columns_added.append(tf.name)
+
+    if missing_required and options.enforce_required:
+        raise InputValidationError(
+            f"Required target field(s) missing from input: {missing_required}",
+            operation="map_columns",
+            suggestion=(
+                "Either add explicit mapping entries, lower fuzzy_threshold, "
+                "supply a default in the schema, or set enforce_required=False."
+            ),
+        )
+
+    # ------------------------------------------------------------------
+    # 6. Coerce types per the schema
+    # ------------------------------------------------------------------
+    coercion_failures: dict[str, int] = {}
+    if options.coerce_types and options.schema is not None:
+        for tf in options.schema.fields:
+            if tf.name not in out.columns or tf.dtype == "auto":
+                continue
+            try:
+                series, fails = coerce_series(out[tf.name], tf.dtype)
+            except (ValueError, TypeError) as e:
+                logger.warning(
+                    "map_columns: coerce of {!r} → {} failed: {}",
+                    tf.name, tf.dtype, e,
+                )
+                continue
+            out[tf.name] = series
+            if fails:
+                coercion_failures[tf.name] = fails
+
+    # ------------------------------------------------------------------
+    # 7. Reorder
+    # ------------------------------------------------------------------
+    if options.reorder_to_schema and options.schema is not None:
+        ordered = [f.name for f in options.schema.fields if f.name in out.columns]
+        # Append survivors (kept-unmapped originals) in their pre-rename order.
+        survivors = [c for c in out.columns if c not in ordered]
+        out = out.loc[:, ordered + survivors]
+
+    return MapResult(
+        mapped_df=out,
+        mapping=mapping,
+        inferred_pairs=inferred,
+        columns_renamed=columns_renamed,
+        columns_dropped=columns_dropped,
+        columns_added=columns_added,
+        coercion_failures=coercion_failures,
+        unmapped_kept=unmapped_kept,
+        missing_required_targets=missing_required,
+    )