Files
datatools-dev/src/cli_analyze.py
Michael e435103113 feat(license): registration + 1-year licenses + tier scaffolding
A complete offline licensing layer (no internet at any step):

Core
- src/license/ — schema (License, Tier, FeatureFlag), HMAC crypto,
  JSON storage, LicenseManager singleton with activate/renew/
  deactivate/issue_trial. Tier-scaffolded so future SKUs can carve
  per-tool feature sets without consumer-code edits.
- scripts/generate_license.py — creator-only key generator. Mints a
  DTLIC1: blob the buyer pastes into the activation page.

GUI
- New activation form component (src/gui/components/activation.py).
- hide_streamlit_chrome() now inline-renders the activation form when
  no valid license is present (every page short-circuits to the form
  until activated).
- Sidebar shows tier + days remaining; renewal warning under 30 days.
- New pages/_Activate.py for revisiting the form after activation.

CLI
- src/license_cli.py — activate / renew / status / trial / deactivate
  commands. Exempt from the guard.
- src/cli_license_guard.py — drop-in guard call added to every tool
  CLI's main(). Lets --help through; respects DATATOOLS_DEV_MODE.

i18n
- New activation.* and license.* keys in en.json + es.json
  (page title, form labels, status badges, renewal warnings, error
  messages). Pack parity test stays green.

Test infrastructure
- tests/conftest.py autouse fixture sets DATATOOLS_DEV_MODE=1 so the
  existing 1916 tests continue to pass.
- isolated_license_path / activated_license_manager /
  unactivated_license_manager fixtures for tests that want to drive
  the real check.

Tests (+79)
- tests/test_license.py (40): schema, crypto roundtrip, blob
  encode/decode, tier→feature mapping, activation flow, name/email
  mismatch rejection, tamper detection, expiration, renewal,
  dev-mode bypass.
- tests/test_license_cli.py (26): every license_cli command +
  subprocess tests confirming every tool CLI refuses to run without
  a license, --help always works, DEV_MODE bypasses.
- tests/gui/test_activation.py (13): gate blocks without license,
  passes with trial, activation form submission unlocks the gate,
  sidebar status, renewal warning, i18n.

Total: 1916 → 1995 tests. All pass under the strict warning filter.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-13 16:54:23 +00:00

165 lines
5.2 KiB
Python

"""CLI for the DataTools upload-time analyzer.
Usage:
python -m src.cli_analyze input.csv # human-readable report
python -m src.cli_analyze input.csv --json # JSON to stdout
python -m src.cli_analyze input.csv --sample-rows 5000
The analyzer is purely advisory; exit code is always 0 on a successful scan
even when findings are present. Use --strict to exit non-zero on warnings.
"""
from __future__ import annotations
import json
import sys
from pathlib import Path
from typing import Optional
import typer
from rich.console import Console
from rich.table import Table
from src.core.analyze import analyze, findings_by_tool, to_dict
app = typer.Typer(
name="analyze",
help=(
"Scan a CSV or Excel file and report data quality issues with the "
"tools that can fix each one. Read-only and advisory.\n\n"
"Examples:\n\n"
" # Default scan (first 1000 rows, human-readable)\n"
" python -m src.cli_analyze customers.csv\n\n"
" # Machine-readable output for piping\n"
" python -m src.cli_analyze customers.csv --json\n\n"
" # Scan more rows on a large file\n"
" python -m src.cli_analyze big.csv --sample-rows 50000\n\n"
" # Exit non-zero when warnings exist (CI gate)\n"
" python -m src.cli_analyze customers.csv --strict\n"
),
add_completion=False,
no_args_is_help=True,
)
# Tool id -> friendly display name. Kept in the CLI module since the GUI has
# its own version; both stay in lockstep with the actual script lineup.
_TOOL_DISPLAY = {
"01_deduplicator": "Deduplicator",
"02_text_cleaner": "Text Cleaner",
"03_format_standardizer": "Format Standardizer",
"04_missing_handler": "Missing Value Handler",
"05_column_mapper": "Column Mapper",
"06_outlier_detector": "Outlier Detector",
"07_multi_file_merger": "Multi-File Merger",
"08_validator_reporter": "Validator & Reporter",
"09_pipeline_runner": "Pipeline Runner",
}
def _tool_label(tool_id: str) -> str:
return _TOOL_DISPLAY.get(tool_id, tool_id) if tool_id else ""
_SEVERITY_STYLE = {
"info": "cyan",
"warn": "yellow",
"error": "red",
}
@app.command()
def scan(
input_file: str = typer.Argument(
..., help="Path to the CSV or Excel file to scan.",
),
sample_rows: int = typer.Option(
1000, "--sample-rows", "-n",
help="Cap on rows scanned. Default 1000.",
),
json_out: bool = typer.Option(
False, "--json",
help="Print findings as a JSON array on stdout.",
),
strict: bool = typer.Option(
False, "--strict",
help="Exit non-zero when any 'warn' or 'error' finding is reported.",
),
) -> None:
path = Path(input_file)
if not path.exists():
typer.echo(f"File not found: {path}", err=True)
raise typer.Exit(code=2)
findings = analyze(path, sample_rows=sample_rows)
if json_out:
typer.echo(json.dumps([to_dict(f) for f in findings], indent=2))
_maybe_strict_exit(findings, strict)
return
console = Console()
if not findings:
console.print(f"[green]✓[/green] No issues detected in {path.name}.")
return
grouped = findings_by_tool(findings)
untargeted = [f for f in findings if not f.tool]
# Top-line summary
by_sev: dict[str, int] = {}
for f in findings:
by_sev[f.severity] = by_sev.get(f.severity, 0) + 1
summary_parts = [
f"[{_SEVERITY_STYLE[s]}]{by_sev[s]} {s}[/{_SEVERITY_STYLE[s]}]"
for s in ("error", "warn", "info") if by_sev.get(s)
]
console.print(
f"[bold]Scanned[/bold] {path.name}: "
f"{len(findings)} finding(s) ({', '.join(summary_parts)})."
)
console.print()
# Per-tool tables — surface what each downstream tool would need to do.
for tool_id in sorted(grouped):
_render_tool_table(console, tool_id, grouped[tool_id])
if untargeted:
_render_tool_table(console, "", untargeted, header="Informational / file-level")
_maybe_strict_exit(findings, strict)
def _render_tool_table(console: Console, tool_id: str, items, header: str | None = None) -> None:
label = header or f"{_tool_label(tool_id)}"
table = Table(title=label, title_style="bold", show_lines=False, expand=True)
table.add_column("Severity", width=8)
table.add_column("Finding", width=32)
table.add_column("Count", justify="right", width=7)
table.add_column("Description")
for f in items:
sev = f"[{_SEVERITY_STYLE[f.severity]}]{f.severity}[/{_SEVERITY_STYLE[f.severity]}]"
table.add_row(sev, f.id, str(f.count), f.description)
console.print(table)
console.print()
def _maybe_strict_exit(findings, strict: bool) -> None:
if not strict:
return
if any(f.severity in ("warn", "error") for f in findings):
raise typer.Exit(code=1)
def main() -> None:
from src.cli_license_guard import guard
guard()
app()
# Entrypoint when run via `python -m src.cli_analyze`. Typer's no_args_is_help
# kicks in when the user invokes without args; we expose the single command at
# the top level for convenience: ``python -m src.cli_analyze input.csv``.
if __name__ == "__main__":
main()