Add multilingual audit CI pipeline + extract mandelblog_content_guard
This commit is contained in:
163
mandelblog_content_guard/management/commands/audit_locales.py
Normal file
163
mandelblog_content_guard/management/commands/audit_locales.py
Normal file
@@ -0,0 +1,163 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from collections import defaultdict
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from ...settings import audit_default_locales
|
||||
from ...validators.multilingual import audit_locales
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Audit all public locale pages for multilingual integrity issues."
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
"--locale",
|
||||
action="append",
|
||||
dest="locales",
|
||||
help="Limit the audit to one or more locale codes. Repeat the flag for multiple locales.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--url",
|
||||
action="append",
|
||||
dest="urls",
|
||||
help="Limit the audit to one or more public page URLs. Repeat the flag for multiple URLs.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--fix",
|
||||
action="store_true",
|
||||
help="Apply known safe replacements and republish changed content.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--rewrite",
|
||||
action="store_true",
|
||||
help="Rewrite flagged content through the locale agent system.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="Preview rewrite changes without saving content.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--format",
|
||||
choices=["text", "json"],
|
||||
default="text",
|
||||
help="Output format.",
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
locale_codes = options["locales"] or audit_default_locales()
|
||||
run = audit_locales(
|
||||
locale_codes,
|
||||
fix=options["fix"],
|
||||
rewrite=options["rewrite"],
|
||||
dry_run=options["dry_run"],
|
||||
url_filters=options["urls"],
|
||||
)
|
||||
grouped = defaultdict(list)
|
||||
for issue in run.issues.all().order_by("locale_code", "url", "field_path"):
|
||||
grouped[issue.locale_code].append(issue)
|
||||
|
||||
grouped_compact = defaultdict(list)
|
||||
for locale_code, issues in grouped.items():
|
||||
bucket = {}
|
||||
for issue in issues:
|
||||
key = (
|
||||
issue.url,
|
||||
issue.issue_type,
|
||||
issue.bad_value,
|
||||
issue.replacement,
|
||||
)
|
||||
extra = issue.extra or {}
|
||||
if key not in bucket:
|
||||
bucket[key] = {
|
||||
"url": issue.url,
|
||||
"title": issue.title,
|
||||
"severity": issue.severity,
|
||||
"issue_type": issue.issue_type,
|
||||
"field_paths": set([issue.field_path] if issue.field_path else []),
|
||||
"bad_value": issue.bad_value,
|
||||
"replacement": issue.replacement,
|
||||
"fixed": issue.fixed,
|
||||
"sources": set([extra.get("source")] if extra.get("source") else []),
|
||||
"count": extra.get("count", 1),
|
||||
}
|
||||
else:
|
||||
if issue.field_path:
|
||||
bucket[key]["field_paths"].add(issue.field_path)
|
||||
if extra.get("source"):
|
||||
bucket[key]["sources"].add(extra["source"])
|
||||
bucket[key]["count"] += extra.get("count", 1)
|
||||
grouped_compact[locale_code] = [
|
||||
{
|
||||
**entry,
|
||||
"field_paths": sorted(entry["field_paths"]),
|
||||
"sources": sorted(entry["sources"]),
|
||||
}
|
||||
for entry in bucket.values()
|
||||
]
|
||||
|
||||
if options["format"] == "json":
|
||||
payload = {
|
||||
"run_id": run.pk,
|
||||
"total_urls_checked": run.total_urls_checked,
|
||||
"issues_found": run.issues_found,
|
||||
"summary": run.summary,
|
||||
"issues": {
|
||||
locale_code: grouped_compact.get(locale_code, [])
|
||||
for locale_code in locale_codes
|
||||
},
|
||||
}
|
||||
self.stdout.write(json.dumps(payload, indent=2, ensure_ascii=False))
|
||||
return
|
||||
|
||||
for locale_code in locale_codes:
|
||||
locale_summary = run.summary.get(locale_code, {})
|
||||
self.stdout.write(f"Locale: {locale_code}")
|
||||
self.stdout.write(
|
||||
f"URLs checked: {locale_summary.get('total_urls_checked', 0)}"
|
||||
)
|
||||
self.stdout.write(
|
||||
f"Issues found: {locale_summary.get('issues_found', 0)}"
|
||||
)
|
||||
self.stdout.write(
|
||||
f"Severity: {locale_summary.get('by_severity', {})}"
|
||||
)
|
||||
if options["fix"]:
|
||||
self.stdout.write(
|
||||
f"Issues auto-fixed: {locale_summary.get('issues_fixed', 0)}"
|
||||
)
|
||||
if options["rewrite"]:
|
||||
self.stdout.write(
|
||||
f"Rewrite mode: {'dry-run' if options['dry_run'] else 'apply'}"
|
||||
)
|
||||
for issue in grouped_compact.get(locale_code, []):
|
||||
target = issue["url"] or issue["title"] or "object"
|
||||
self.stdout.write(
|
||||
f"- {target} -> {issue['issue_type']}: {issue['bad_value']}"
|
||||
)
|
||||
if issue.get("replacement"):
|
||||
self.stdout.write(f" after: {issue['replacement']}")
|
||||
if issue.get("field_paths"):
|
||||
self.stdout.write(f" fields: {', '.join(issue['field_paths'][:5])}")
|
||||
if issue.get("sources"):
|
||||
self.stdout.write(f" sources: {', '.join(issue['sources'])}")
|
||||
if issue.get("count"):
|
||||
self.stdout.write(f" count: {issue['count']}")
|
||||
if not grouped_compact.get(locale_code):
|
||||
self.stdout.write("- no issues found")
|
||||
self.stdout.write("")
|
||||
|
||||
snippet_summary = run.summary.get("snippets") or {}
|
||||
if snippet_summary:
|
||||
self.stdout.write("Snippet issues:")
|
||||
for model_name, count in snippet_summary.items():
|
||||
self.stdout.write(f"- {model_name}: {count}")
|
||||
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(
|
||||
f"Audit run {run.pk} completed. Total URLs checked: {run.total_urls_checked}. Issues found: {run.issues_found}."
|
||||
)
|
||||
)
|
||||
Reference in New Issue
Block a user