Add multilingual audit CI pipeline + extract mandelblog_content_guard

2026-03-29 20:49:42 +02:00
parent 2a51989fa4
commit 1f05011a63
104 changed files with 3372 additions and 6 deletions
--- a/mandelblog_content_guard/management/commands/audit_locales.py
+++ b/mandelblog_content_guard/management/commands/audit_locales.py
@@ -0,0 +1,163 @@
+from __future__ import annotations
+
+import json
+from collections import defaultdict
+
+from django.core.management.base import BaseCommand
+
+from ...settings import audit_default_locales
+from ...validators.multilingual import audit_locales
+
+
+class Command(BaseCommand):
+    help = "Audit all public locale pages for multilingual integrity issues."
+
+    def add_arguments(self, parser):
+        parser.add_argument(
+            "--locale",
+            action="append",
+            dest="locales",
+            help="Limit the audit to one or more locale codes. Repeat the flag for multiple locales.",
+        )
+        parser.add_argument(
+            "--url",
+            action="append",
+            dest="urls",
+            help="Limit the audit to one or more public page URLs. Repeat the flag for multiple URLs.",
+        )
+        parser.add_argument(
+            "--fix",
+            action="store_true",
+            help="Apply known safe replacements and republish changed content.",
+        )
+        parser.add_argument(
+            "--rewrite",
+            action="store_true",
+            help="Rewrite flagged content through the locale agent system.",
+        )
+        parser.add_argument(
+            "--dry-run",
+            action="store_true",
+            help="Preview rewrite changes without saving content.",
+        )
+        parser.add_argument(
+            "--format",
+            choices=["text", "json"],
+            default="text",
+            help="Output format.",
+        )
+
+    def handle(self, *args, **options):
+        locale_codes = options["locales"] or audit_default_locales()
+        run = audit_locales(
+            locale_codes,
+            fix=options["fix"],
+            rewrite=options["rewrite"],
+            dry_run=options["dry_run"],
+            url_filters=options["urls"],
+        )
+        grouped = defaultdict(list)
+        for issue in run.issues.all().order_by("locale_code", "url", "field_path"):
+            grouped[issue.locale_code].append(issue)
+
+        grouped_compact = defaultdict(list)
+        for locale_code, issues in grouped.items():
+            bucket = {}
+            for issue in issues:
+                key = (
+                    issue.url,
+                    issue.issue_type,
+                    issue.bad_value,
+                    issue.replacement,
+                )
+                extra = issue.extra or {}
+                if key not in bucket:
+                    bucket[key] = {
+                        "url": issue.url,
+                        "title": issue.title,
+                        "severity": issue.severity,
+                        "issue_type": issue.issue_type,
+                        "field_paths": set([issue.field_path] if issue.field_path else []),
+                        "bad_value": issue.bad_value,
+                        "replacement": issue.replacement,
+                        "fixed": issue.fixed,
+                        "sources": set([extra.get("source")] if extra.get("source") else []),
+                        "count": extra.get("count", 1),
+                    }
+                else:
+                    if issue.field_path:
+                        bucket[key]["field_paths"].add(issue.field_path)
+                    if extra.get("source"):
+                        bucket[key]["sources"].add(extra["source"])
+                    bucket[key]["count"] += extra.get("count", 1)
+            grouped_compact[locale_code] = [
+                {
+                    **entry,
+                    "field_paths": sorted(entry["field_paths"]),
+                    "sources": sorted(entry["sources"]),
+                }
+                for entry in bucket.values()
+            ]
+
+        if options["format"] == "json":
+            payload = {
+                "run_id": run.pk,
+                "total_urls_checked": run.total_urls_checked,
+                "issues_found": run.issues_found,
+                "summary": run.summary,
+                "issues": {
+                    locale_code: grouped_compact.get(locale_code, [])
+                    for locale_code in locale_codes
+                },
+            }
+            self.stdout.write(json.dumps(payload, indent=2, ensure_ascii=False))
+            return
+
+        for locale_code in locale_codes:
+            locale_summary = run.summary.get(locale_code, {})
+            self.stdout.write(f"Locale: {locale_code}")
+            self.stdout.write(
+                f"URLs checked: {locale_summary.get('total_urls_checked', 0)}"
+            )
+            self.stdout.write(
+                f"Issues found: {locale_summary.get('issues_found', 0)}"
+            )
+            self.stdout.write(
+                f"Severity: {locale_summary.get('by_severity', {})}"
+            )
+            if options["fix"]:
+                self.stdout.write(
+                    f"Issues auto-fixed: {locale_summary.get('issues_fixed', 0)}"
+                )
+            if options["rewrite"]:
+                self.stdout.write(
+                    f"Rewrite mode: {'dry-run' if options['dry_run'] else 'apply'}"
+                )
+            for issue in grouped_compact.get(locale_code, []):
+                target = issue["url"] or issue["title"] or "object"
+                self.stdout.write(
+                    f"- {target} -> {issue['issue_type']}: {issue['bad_value']}"
+                )
+                if issue.get("replacement"):
+                    self.stdout.write(f"  after: {issue['replacement']}")
+                if issue.get("field_paths"):
+                    self.stdout.write(f"  fields: {', '.join(issue['field_paths'][:5])}")
+                if issue.get("sources"):
+                    self.stdout.write(f"  sources: {', '.join(issue['sources'])}")
+                if issue.get("count"):
+                    self.stdout.write(f"  count: {issue['count']}")
+            if not grouped_compact.get(locale_code):
+                self.stdout.write("- no issues found")
+            self.stdout.write("")
+
+        snippet_summary = run.summary.get("snippets") or {}
+        if snippet_summary:
+            self.stdout.write("Snippet issues:")
+            for model_name, count in snippet_summary.items():
+                self.stdout.write(f"- {model_name}: {count}")
+
+        self.stdout.write(
+            self.style.SUCCESS(
+                f"Audit run {run.pk} completed. Total URLs checked: {run.total_urls_checked}. Issues found: {run.issues_found}."
+            )
+        )