Add multilingual audit CI pipeline + extract mandelblog_content_guard

This commit is contained in:
2026-03-29 20:49:42 +02:00
parent 2a51989fa4
commit 1f05011a63
104 changed files with 3372 additions and 6 deletions

View File

@@ -0,0 +1,99 @@
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import json
from pathlib import Path
def load_json(path: Path) -> dict:
return json.loads(path.read_text())
def locale_rows(payload: dict) -> list[tuple[str, dict]]:
summary = payload.get("summary", {})
return [(locale, data) for locale, data in summary.items() if locale != "snippets"]
def print_error(payload: dict) -> int:
error = payload.get("error")
if error:
print(f"AUDIT ERROR: {error}")
return 2
return 0
def print_summary(payload: dict) -> tuple[int, int]:
total_block = 0
total_warn = 0
for locale, data in locale_rows(payload):
sev = data.get("by_severity", {})
block = int(sev.get("block", 0) or 0)
warn = int(sev.get("warn", 0) or 0)
log = int(sev.get("log", 0) or 0)
total_block += block
total_warn += warn
print(
f"LOCALE {locale}: issues_found={data.get('issues_found', 0)} "
f"issues_remaining={data.get('remaining_issues', 0)} "
f"block={block} warn={warn} log={log}"
)
return total_block, total_warn
def print_regressions(current: dict, previous: dict) -> None:
prev_summary = {locale: data for locale, data in locale_rows(previous)}
regressions = []
for locale, data in locale_rows(current):
prev = prev_summary.get(locale, {})
cur_remaining = int(data.get("remaining_issues", 0) or 0)
prev_remaining = int(prev.get("remaining_issues", 0) or 0)
cur_sev = data.get("by_severity", {})
prev_sev = prev.get("by_severity", {})
delta = {
"remaining": cur_remaining - prev_remaining,
"block": int(cur_sev.get("block", 0) or 0) - int(prev_sev.get("block", 0) or 0),
"warn": int(cur_sev.get("warn", 0) or 0) - int(prev_sev.get("warn", 0) or 0),
"log": int(cur_sev.get("log", 0) or 0) - int(prev_sev.get("log", 0) or 0),
}
if any(value > 0 for value in delta.values()):
regressions.append((locale, delta))
if regressions:
print("REGRESSIONS:")
for locale, delta in regressions:
print(
f"- {locale}: remaining={delta['remaining']:+d} block={delta['block']:+d} "
f"warn={delta['warn']:+d} log={delta['log']:+d}"
)
else:
print("REGRESSIONS: none")
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument("--json", required=True, help="Current multilingual audit JSON file")
parser.add_argument("--previous-json", help="Optional previous audit JSON file for regression comparison")
args = parser.parse_args()
current = load_json(Path(args.json))
error_status = print_error(current)
if error_status:
return error_status
total_block, total_warn = print_summary(current)
if args.previous_json:
prev_path = Path(args.previous_json)
if prev_path.exists():
print_regressions(current, load_json(prev_path))
else:
print("REGRESSIONS: previous artifact not found")
if total_block > 0:
return 2
if total_warn > 0:
return 1
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,72 @@
#!/usr/bin/env bash
set -euo pipefail
: "${STAGING_AUDIT_HOST:?STAGING_AUDIT_HOST is required}"
: "${STAGING_AUDIT_PROJECT_DIR:?STAGING_AUDIT_PROJECT_DIR is required}"
: "${STAGING_AUDIT_MANAGE:?STAGING_AUDIT_MANAGE is required}"
mkdir -p artifacts
SSH_OPTS=${SSH_OPTS:-"-o StrictHostKeyChecking=accept-new"}
if [[ -n "${STAGING_SSH_KEYFILE:-}" ]]; then
SSH_OPTS="$SSH_OPTS -i ${STAGING_SSH_KEYFILE}"
fi
AUDIT_TIMEOUT_SECONDS=${AUDIT_TIMEOUT_SECONDS:-300}
OUT_FILE="artifacts/multilingual-audit.json"
TMP_FILE="${OUT_FILE}.tmp"
write_failure_json() {
python3 - <<PY > "$OUT_FILE"
import json
print(json.dumps({
"run_id": None,
"total_urls_checked": 0,
"issues_found": 0,
"summary": {},
"issues": {},
"error": ${1@Q}
}, indent=2))
PY
}
REMOTE_CMD="cd '${STAGING_AUDIT_PROJECT_DIR}' && '${STAGING_AUDIT_MANAGE}' audit_locales --format=json"
set +e
SSH_OPTS="$SSH_OPTS" STAGING_AUDIT_HOST="$STAGING_AUDIT_HOST" REMOTE_CMD="$REMOTE_CMD" AUDIT_TIMEOUT_SECONDS="$AUDIT_TIMEOUT_SECONDS" python3 - <<'PY' > "$TMP_FILE"
import os
import shlex
import subprocess
import sys
ssh_opts = shlex.split(os.environ["SSH_OPTS"])
cmd = ["ssh", *ssh_opts, os.environ["STAGING_AUDIT_HOST"], os.environ["REMOTE_CMD"]]
try:
proc = subprocess.run(
cmd,
check=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
timeout=int(os.environ["AUDIT_TIMEOUT_SECONDS"]),
)
sys.stdout.write(proc.stdout)
sys.stderr.write(proc.stderr)
except subprocess.TimeoutExpired as exc:
sys.stderr.write(exc.stderr or "")
raise SystemExit(124)
except subprocess.CalledProcessError as exc:
sys.stdout.write(exc.stdout or "")
sys.stderr.write(exc.stderr or "")
raise SystemExit(exc.returncode)
PY
rc=$?
set -e
if [[ $rc -eq 0 ]]; then
mv "$TMP_FILE" "$OUT_FILE"
exit 0
fi
rm -f "$TMP_FILE"
if [[ $rc -eq 124 ]]; then
write_failure_json "Remote multilingual audit timed out after ${AUDIT_TIMEOUT_SECONDS}s"
else
write_failure_json "Remote multilingual audit failed with exit status ${rc}"
fi
exit $rc