A complete offline licensing layer (no internet at any step): Core - src/license/ — schema (License, Tier, FeatureFlag), HMAC crypto, JSON storage, LicenseManager singleton with activate/renew/ deactivate/issue_trial. Tier-scaffolded so future SKUs can carve per-tool feature sets without consumer-code edits. - scripts/generate_license.py — creator-only key generator. Mints a DTLIC1: blob the buyer pastes into the activation page. GUI - New activation form component (src/gui/components/activation.py). - hide_streamlit_chrome() now inline-renders the activation form when no valid license is present (every page short-circuits to the form until activated). - Sidebar shows tier + days remaining; renewal warning under 30 days. - New pages/_Activate.py for revisiting the form after activation. CLI - src/license_cli.py — activate / renew / status / trial / deactivate commands. Exempt from the guard. - src/cli_license_guard.py — drop-in guard call added to every tool CLI's main(). Lets --help through; respects DATATOOLS_DEV_MODE. i18n - New activation.* and license.* keys in en.json + es.json (page title, form labels, status badges, renewal warnings, error messages). Pack parity test stays green. Test infrastructure - tests/conftest.py autouse fixture sets DATATOOLS_DEV_MODE=1 so the existing 1916 tests continue to pass. - isolated_license_path / activated_license_manager / unactivated_license_manager fixtures for tests that want to drive the real check. Tests (+79) - tests/test_license.py (40): schema, crypto roundtrip, blob encode/decode, tier→feature mapping, activation flow, name/email mismatch rejection, tamper detection, expiration, renewal, dev-mode bypass. - tests/test_license_cli.py (26): every license_cli command + subprocess tests confirming every tool CLI refuses to run without a license, --help always works, DEV_MODE bypasses. - tests/gui/test_activation.py (13): gate blocks without license, passes with trial, activation form submission unlocks the gate, sidebar status, renewal warning, i18n. Total: 1916 → 1995 tests. All pass under the strict warning filter. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
165 lines
5.2 KiB
Python
165 lines
5.2 KiB
Python
"""CLI for the DataTools upload-time analyzer.
|
|
|
|
Usage:
|
|
python -m src.cli_analyze input.csv # human-readable report
|
|
python -m src.cli_analyze input.csv --json # JSON to stdout
|
|
python -m src.cli_analyze input.csv --sample-rows 5000
|
|
|
|
The analyzer is purely advisory; exit code is always 0 on a successful scan
|
|
even when findings are present. Use --strict to exit non-zero on warnings.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
import typer
|
|
from rich.console import Console
|
|
from rich.table import Table
|
|
|
|
from src.core.analyze import analyze, findings_by_tool, to_dict
|
|
|
|
app = typer.Typer(
|
|
name="analyze",
|
|
help=(
|
|
"Scan a CSV or Excel file and report data quality issues with the "
|
|
"tools that can fix each one. Read-only and advisory.\n\n"
|
|
"Examples:\n\n"
|
|
" # Default scan (first 1000 rows, human-readable)\n"
|
|
" python -m src.cli_analyze customers.csv\n\n"
|
|
" # Machine-readable output for piping\n"
|
|
" python -m src.cli_analyze customers.csv --json\n\n"
|
|
" # Scan more rows on a large file\n"
|
|
" python -m src.cli_analyze big.csv --sample-rows 50000\n\n"
|
|
" # Exit non-zero when warnings exist (CI gate)\n"
|
|
" python -m src.cli_analyze customers.csv --strict\n"
|
|
),
|
|
add_completion=False,
|
|
no_args_is_help=True,
|
|
)
|
|
|
|
|
|
# Tool id -> friendly display name. Kept in the CLI module since the GUI has
|
|
# its own version; both stay in lockstep with the actual script lineup.
|
|
_TOOL_DISPLAY = {
|
|
"01_deduplicator": "Deduplicator",
|
|
"02_text_cleaner": "Text Cleaner",
|
|
"03_format_standardizer": "Format Standardizer",
|
|
"04_missing_handler": "Missing Value Handler",
|
|
"05_column_mapper": "Column Mapper",
|
|
"06_outlier_detector": "Outlier Detector",
|
|
"07_multi_file_merger": "Multi-File Merger",
|
|
"08_validator_reporter": "Validator & Reporter",
|
|
"09_pipeline_runner": "Pipeline Runner",
|
|
}
|
|
|
|
|
|
def _tool_label(tool_id: str) -> str:
|
|
return _TOOL_DISPLAY.get(tool_id, tool_id) if tool_id else "—"
|
|
|
|
|
|
_SEVERITY_STYLE = {
|
|
"info": "cyan",
|
|
"warn": "yellow",
|
|
"error": "red",
|
|
}
|
|
|
|
|
|
@app.command()
|
|
def scan(
|
|
input_file: str = typer.Argument(
|
|
..., help="Path to the CSV or Excel file to scan.",
|
|
),
|
|
sample_rows: int = typer.Option(
|
|
1000, "--sample-rows", "-n",
|
|
help="Cap on rows scanned. Default 1000.",
|
|
),
|
|
json_out: bool = typer.Option(
|
|
False, "--json",
|
|
help="Print findings as a JSON array on stdout.",
|
|
),
|
|
strict: bool = typer.Option(
|
|
False, "--strict",
|
|
help="Exit non-zero when any 'warn' or 'error' finding is reported.",
|
|
),
|
|
) -> None:
|
|
path = Path(input_file)
|
|
if not path.exists():
|
|
typer.echo(f"File not found: {path}", err=True)
|
|
raise typer.Exit(code=2)
|
|
|
|
findings = analyze(path, sample_rows=sample_rows)
|
|
|
|
if json_out:
|
|
typer.echo(json.dumps([to_dict(f) for f in findings], indent=2))
|
|
_maybe_strict_exit(findings, strict)
|
|
return
|
|
|
|
console = Console()
|
|
if not findings:
|
|
console.print(f"[green]✓[/green] No issues detected in {path.name}.")
|
|
return
|
|
|
|
grouped = findings_by_tool(findings)
|
|
untargeted = [f for f in findings if not f.tool]
|
|
|
|
# Top-line summary
|
|
by_sev: dict[str, int] = {}
|
|
for f in findings:
|
|
by_sev[f.severity] = by_sev.get(f.severity, 0) + 1
|
|
summary_parts = [
|
|
f"[{_SEVERITY_STYLE[s]}]{by_sev[s]} {s}[/{_SEVERITY_STYLE[s]}]"
|
|
for s in ("error", "warn", "info") if by_sev.get(s)
|
|
]
|
|
console.print(
|
|
f"[bold]Scanned[/bold] {path.name}: "
|
|
f"{len(findings)} finding(s) ({', '.join(summary_parts)})."
|
|
)
|
|
console.print()
|
|
|
|
# Per-tool tables — surface what each downstream tool would need to do.
|
|
for tool_id in sorted(grouped):
|
|
_render_tool_table(console, tool_id, grouped[tool_id])
|
|
|
|
if untargeted:
|
|
_render_tool_table(console, "", untargeted, header="Informational / file-level")
|
|
|
|
_maybe_strict_exit(findings, strict)
|
|
|
|
|
|
def _render_tool_table(console: Console, tool_id: str, items, header: str | None = None) -> None:
|
|
label = header or f"→ {_tool_label(tool_id)}"
|
|
table = Table(title=label, title_style="bold", show_lines=False, expand=True)
|
|
table.add_column("Severity", width=8)
|
|
table.add_column("Finding", width=32)
|
|
table.add_column("Count", justify="right", width=7)
|
|
table.add_column("Description")
|
|
for f in items:
|
|
sev = f"[{_SEVERITY_STYLE[f.severity]}]{f.severity}[/{_SEVERITY_STYLE[f.severity]}]"
|
|
table.add_row(sev, f.id, str(f.count), f.description)
|
|
console.print(table)
|
|
console.print()
|
|
|
|
|
|
def _maybe_strict_exit(findings, strict: bool) -> None:
|
|
if not strict:
|
|
return
|
|
if any(f.severity in ("warn", "error") for f in findings):
|
|
raise typer.Exit(code=1)
|
|
|
|
|
|
def main() -> None:
|
|
from src.cli_license_guard import guard
|
|
guard()
|
|
app()
|
|
|
|
|
|
# Entrypoint when run via `python -m src.cli_analyze`. Typer's no_args_is_help
|
|
# kicks in when the user invokes without args; we expose the single command at
|
|
# the top level for convenience: ``python -m src.cli_analyze input.csv``.
|
|
if __name__ == "__main__":
|
|
main()
|