Add multilingual audit CI pipeline + extract mandelblog_content_guard

This commit is contained in:
2026-03-29 20:49:42 +02:00
parent 2a51989fa4
commit 1f05011a63
104 changed files with 3372 additions and 6 deletions

44
Jenkinsfile vendored
View File

@@ -9,6 +9,10 @@ pipeline {
environment { environment {
PYENVPIPELINE_VIRTUALENV = '1' PYENVPIPELINE_VIRTUALENV = '1'
GIT_SSH_COMMAND = 'ssh -o StrictHostKeyChecking=accept-new' GIT_SSH_COMMAND = 'ssh -o StrictHostKeyChecking=accept-new'
STAGING_AUDIT_HOST = 'root@49.12.204.96'
STAGING_AUDIT_PROJECT_DIR = '/home/www-mandelstudio/mandelstudio'
STAGING_AUDIT_MANAGE = '/var/lib/virtualenv/mandelstudio/bin/manage.py'
STAGING_AUDIT_SSH_CREDENTIALS_ID = 'staging-root-ssh'
} }
stages { stages {
@@ -74,7 +78,7 @@ pipeline {
steps { steps {
sh ''' sh '''
. .venv/bin/activate . .venv/bin/activate
python -m compileall -q setup.py mandelstudio python -m compileall -q setup.py mandelstudio mandelblog_content_guard
''' '''
} }
post { post {
@@ -86,6 +90,40 @@ pipeline {
} }
} }
} }
stage('Deploy Staging') {
steps {
echo 'Triggering staging deploy for mandelstudio after successful CI build.'
build job: 'deploy-project-stg',
wait: true,
propagate: true,
parameters: [string(name: 'PROJECT_NAME', value: 'mandelstudio')]
}
}
stage('Post-Deploy Multilingual Audit') {
options {
timeout(time: 10, unit: 'MINUTES')
}
steps {
sh 'mkdir -p artifacts'
withCredentials([sshUserPrivateKey(credentialsId: env.STAGING_AUDIT_SSH_CREDENTIALS_ID, keyFileVariable: 'STAGING_SSH_KEYFILE')]) {
sh './scripts/run_remote_multilingual_audit.sh'
}
script {
int status = sh(script: 'python3 scripts/multilingual_audit_ci.py --json artifacts/multilingual-audit.json', returnStatus: true)
if (status == 2) {
error('Block-level multilingual issues detected or audit execution failed.')
}
if (status == 1) {
unstable('Warn-level multilingual issues detected.')
}
}
}
post {
always {
archiveArtifacts artifacts: 'artifacts/multilingual-audit.json', onlyIfSuccessful: false
}
}
}
} }
post { post {
always { always {
@@ -97,10 +135,6 @@ pipeline {
. .venv/bin/activate . .venv/bin/activate
pip install coverage pip install coverage
''' '''
echo 'Triggering staging deploy for mandelstudio after successful CI build.'
build job: 'deploy-project-stg',
wait: false,
parameters: [string(name: 'PROJECT_NAME', value: 'mandelstudio')]
} }
failure { failure {
emailext subject: "JENKINS-NOTIFICATION: ${currentBuild.currentResult}: Job '${env.JOB_NAME} #${env.BUILD_NUMBER}'", emailext subject: "JENKINS-NOTIFICATION: ${currentBuild.currentResult}: Job '${env.JOB_NAME} #${env.BUILD_NUMBER}'",

View File

@@ -0,0 +1,62 @@
#!/usr/bin/env groovy
pipeline {
agent { label 'external_pool' }
triggers {
cron('H 2 * * *')
}
options {
disableConcurrentBuilds()
skipDefaultCheckout(true)
}
environment {
STAGING_AUDIT_HOST = 'root@49.12.204.96'
STAGING_AUDIT_PROJECT_DIR = '/home/www-mandelstudio/mandelstudio'
STAGING_AUDIT_MANAGE = '/var/lib/virtualenv/mandelstudio/bin/manage.py'
STAGING_AUDIT_SSH_CREDENTIALS_ID = 'staging-root-ssh'
}
stages {
stage('Checkout') {
steps {
withCredentials([sshUserPrivateKey(credentialsId: 'gitea-ssh', keyFileVariable: 'GIT_KEYFILE')]) {
sh '''
export GIT_SSH_COMMAND="ssh -i $GIT_KEYFILE -o StrictHostKeyChecking=accept-new"
if [ -d .git ]; then
git remote set-url origin ssh://git@git.mandelblog.com:2222/salt/mandelstudio.git
git fetch --tags --force --progress origin +refs/heads/master:refs/remotes/origin/master
else
git clone ssh://git@git.mandelblog.com:2222/salt/mandelstudio.git .
git fetch --tags --force --progress origin +refs/heads/master:refs/remotes/origin/master
fi
git checkout -f refs/remotes/origin/master
'''
}
}
}
stage('Nightly Multilingual Audit') {
options {
timeout(time: 10, unit: 'MINUTES')
}
steps {
sh 'mkdir -p artifacts && [ -f artifacts/multilingual-audit.json ] && cp artifacts/multilingual-audit.json artifacts/previous-multilingual-audit.json || true'
withCredentials([sshUserPrivateKey(credentialsId: env.STAGING_AUDIT_SSH_CREDENTIALS_ID, keyFileVariable: 'STAGING_SSH_KEYFILE')]) {
sh './scripts/run_remote_multilingual_audit.sh'
}
script {
int status = sh(script: 'python3 scripts/multilingual_audit_ci.py --json artifacts/multilingual-audit.json --previous-json artifacts/previous-multilingual-audit.json', returnStatus: true)
if (status == 2) {
error('Block-level multilingual issues detected or audit execution failed.')
}
if (status == 1) {
unstable('Warn-level multilingual issues detected.')
}
}
}
post {
always {
archiveArtifacts artifacts: 'artifacts/multilingual-audit.json,artifacts/previous-multilingual-audit.json', onlyIfSuccessful: false
}
}
}
}
}

View File

@@ -0,0 +1 @@
default_app_config = "mandelblog_content_guard.apps.MandelblogContentGuardConfig"

View File

@@ -0,0 +1,25 @@
from .base import BaseLanguageAgent
from .de import GermanAgent
from .en import EnglishAgent
from .es import SpanishAgent
from .fr import FrenchAgent
from .it import ItalianAgent
from .nl import DutchAgent
from .pt import PortugueseAgent
from .ru import RussianAgent
AGENT_REGISTRY = {
"nl": DutchAgent,
"en": EnglishAgent,
"de": GermanAgent,
"fr": FrenchAgent,
"es": SpanishAgent,
"it": ItalianAgent,
"pt": PortugueseAgent,
"ru": RussianAgent,
}
def get_language_agent(locale_code: str) -> BaseLanguageAgent:
agent_class = AGENT_REGISTRY.get(locale_code, BaseLanguageAgent)
return agent_class()

View File

@@ -0,0 +1,187 @@
from __future__ import annotations
import re
from collections import defaultdict
from typing import Any
from django.utils.module_loading import import_string
from ..settings import get_rewrite_backend
class BaseLanguageAgent:
locale = "nl"
tone = "business"
preferred_formality = "neutral"
cta_defaults: dict[str, str] = {}
vocabulary_map: dict[str, str] = {}
contextual_vocabulary_map: dict[str, dict[str, str]] = {}
cleanup_patterns: tuple[tuple[re.Pattern[str], str], ...] = (
(
re.compile(
r"""^.*?\bis\s+(?:German|Spanish|French|Italian|Portuguese|Dutch),\s+not\s+Dutch.*?(?::\s*|\"\.\s*)(?P<quote>.+?)\"?\.?\s*$""",
re.IGNORECASE,
),
"{quote}",
),
(
re.compile(
r"""^.*?\btranslation\s+from\s+.*?(?::\s*|\"\.\s*)(?P<quote>.+?)\"?\.?\s*$""",
re.IGNORECASE,
),
"{quote}",
),
(
re.compile(
r"""^.*?\btraducid[oa]\s+al\s+.*?(?::\s*|\"\.\s*)(?P<quote>.+?)\"?\.?\s*$""",
re.IGNORECASE,
),
"{quote}",
),
(
re.compile(
r"""^.*?\bперевод\s+с\s+.*?(?::\s*|\"\.\s*)(?P<quote>.+?)\"?\.?\s*$""",
re.IGNORECASE,
),
"{quote}",
),
(
re.compile(
r"""^\s*La\s+entrada\s+\"?(?P<quote>.+?)\"?\s+está\s+en\s+alemán.*$""",
re.IGNORECASE,
),
"{quote}",
),
)
def __init__(self) -> None:
self.backend = self._load_backend()
def _load_backend(self):
backend_path = get_rewrite_backend()
if not backend_path:
return None
return import_string(backend_path)
def backend_prompt(self, field_path: str, text: str) -> str:
return (
f"Rewrite the following {self.locale} website copy for a small-business "
f"website in a natural, professional, sales-driven tone. Preserve meaning, "
f"remove translation artifacts, keep it concise, and do not add commentary.\n"
f"Field: {field_path}\n"
f"Locale: {self.locale}\n"
f"Tone: {self.tone}\n"
f"Formality: {self.preferred_formality}\n"
f"Text: {text}"
)
def _contextual_replacements(self, field_path: str) -> dict[str, str]:
lowered = field_path.lower()
replacements: dict[str, str] = {}
for token, mapping in self.contextual_vocabulary_map.items():
if token in lowered:
replacements.update(mapping)
return replacements
def post_cleanup_text(self, text: str, field_path: str = "") -> str:
return text
def _apply_replacements(self, text: str, replacements: dict[str, str]) -> str:
cleaned = text
phrase_replacements = {}
token_replacements = {}
for source, target in replacements.items():
if not source:
continue
if re.fullmatch(r"[\wÀ-ÿ-]+", source, flags=re.UNICODE):
token_replacements[source] = target
else:
phrase_replacements[source] = target
for source, target in sorted(phrase_replacements.items(), key=lambda item: len(item[0]), reverse=True):
cleaned = cleaned.replace(source, target)
for source, target in sorted(token_replacements.items(), key=lambda item: len(item[0]), reverse=True):
pattern = re.compile(rf"(?<![\wÀ-ÿ-]){re.escape(source)}(?![\wÀ-ÿ-])", re.UNICODE)
cleaned = pattern.sub(target, cleaned)
return cleaned
def cleanup_text(self, text: str, field_path: str = "") -> str:
cleaned = text.strip()
for pattern, replacement in self.cleanup_patterns:
match = pattern.match(cleaned)
if not match:
continue
cleaned = replacement.format(**match.groupdict()).strip()
cleaned = self._apply_replacements(cleaned, self.vocabulary_map)
cleaned = self._apply_replacements(cleaned, self._contextual_replacements(field_path))
cleaned = self.post_cleanup_text(cleaned, field_path=field_path)
return re.sub(r"\s+", " ", cleaned).strip()
def normalize_cta(self, text: str, field_path: str = "") -> str:
normalized = self.cleanup_text(text, field_path=field_path)
lowered = normalized.lower()
for keyword, replacement in self.cta_defaults.items():
if keyword in lowered:
return replacement
return normalized
def rewrite(self, text: str, field_path: str = "", issues: list[Any] | None = None) -> str:
cleaned = self.cleanup_text(text, field_path=field_path)
lowered_path = field_path.lower()
if any(token in lowered_path for token in ("cta", "button", "link_text", "submit")):
cleaned = self.normalize_cta(cleaned, field_path=field_path)
elif issues and any(
issue.issue_type in {"generic_badge_label", "foreign_ui_label", "weak_marketing_copy", "mixed_locale_heading"}
for issue in issues
):
cleaned = self.cleanup_text(cleaned, field_path=field_path)
if self.backend:
rewritten = self.backend(
locale=self.locale,
field_path=field_path,
text=cleaned,
prompt=self.backend_prompt(field_path, cleaned),
)
if isinstance(rewritten, str) and rewritten.strip():
cleaned = rewritten.strip()
return cleaned
def process_block(self, block_data: Any, field_path: str = "", issue_map: dict[str, list[Any]] | None = None):
issue_map = issue_map or {}
if isinstance(block_data, dict):
changed = False
output = {}
for key, value in block_data.items():
child_path = f"{field_path}.{key}" if field_path else str(key)
new_value, child_changed = self.process_block(value, child_path, issue_map)
output[key] = new_value
changed = changed or child_changed
return output, changed
if isinstance(block_data, list):
changed = False
output = []
for index, value in enumerate(block_data):
child_path = f"{field_path}[{index}]"
new_value, child_changed = self.process_block(value, child_path, issue_map)
output.append(new_value)
changed = changed or child_changed
return output, changed
if isinstance(block_data, str):
issues = issue_map.get(field_path, [])
needs_rewrite = bool(issues) or any(
token in field_path for token in ("cta", "button", "label", "placeholder", "help_text")
)
if not needs_rewrite:
cleaned = self.cleanup_text(block_data)
return cleaned, cleaned != block_data
rewritten = self.rewrite(block_data, field_path=field_path, issues=issues)
return rewritten, rewritten != block_data
return block_data, False
def build_issue_map(self, issues: list[Any]) -> dict[str, list[Any]]:
issue_map: dict[str, list[Any]] = defaultdict(list)
for issue in issues:
if issue.field_path:
issue_map[issue.field_path].append(issue)
return issue_map

View File

@@ -0,0 +1,23 @@
from .base import BaseLanguageAgent
from ..normalizers import normalize_de_text
from ..system_strings import build_system_vocabulary
class GermanAgent(BaseLanguageAgent):
locale = "de"
tone = "professional and trustworthy"
preferred_formality = "formal Sie"
vocabulary_map = {
**build_system_vocabulary("de", ("transparent_investment",)),
}
cta_defaults = {
"starter": "Starter-Gespräch planen",
"business": "Beratungsgespräch planen",
"support": "Support anfragen",
"service": "Dienstleistungen anzeigen",
"project": "Projekt starten",
"kontakt": "Einführungsgespräch planen",
}
def post_cleanup_text(self, text: str, field_path: str = "") -> str:
return normalize_de_text(text, field_path=field_path)

View File

@@ -0,0 +1,34 @@
from .base import BaseLanguageAgent
from ..normalizers import normalize_en_text
from ..system_strings import build_contextual_system_vocabulary, build_system_vocabulary
class EnglishAgent(BaseLanguageAgent):
locale = "en"
tone = "business-friendly and direct"
preferred_formality = "neutral"
vocabulary_map = {
**build_system_vocabulary("en", ("plan_badge", "services_badge", "transparent_label", "transparent_investment")),
}
_system_contextual = build_contextual_system_vocabulary("en", ("plan_badge", "services_badge", "transparent_label"))
contextual_vocabulary_map = {
"badge": {**_system_contextual.get("badge", {})},
"label": {**_system_contextual.get("label", {})},
"metric": {**_system_contextual.get("metric", {})},
"stat": {**_system_contextual.get("stat", {})},
"title": {**_system_contextual.get("title", {})},
"heading": {**_system_contextual.get("heading", {})},
"rendered": {**_system_contextual.get("rendered", {})},
}
cta_defaults = {
"starter": "Book starter call",
"business": "Book business call",
"support": "View support",
"service": "View services",
"project": "Start your project",
"quote": "Request a quote",
"contact": "Book intro call",
}
def post_cleanup_text(self, text: str, field_path: str = "") -> str:
return normalize_en_text(text, field_path=field_path)

View File

@@ -0,0 +1,43 @@
from .base import BaseLanguageAgent
from ..normalizers import normalize_es_text
from ..system_strings import build_contextual_system_vocabulary, build_system_vocabulary
class SpanishAgent(BaseLanguageAgent):
locale = "es"
tone = "clear and business-focused"
preferred_formality = "formal"
vocabulary_map = {
**build_system_vocabulary(
"es",
(
"plan_badge",
"response_time",
"without_commitment",
"transparent_label",
"transparent_investment",
),
),
}
_system_contextual = build_contextual_system_vocabulary("es", ("plan_badge", "transparent_label"))
contextual_vocabulary_map = {
"badge": {**_system_contextual.get("badge", {})},
"label": {**_system_contextual.get("label", {})},
"metric": {**_system_contextual.get("metric", {})},
"stat": {**_system_contextual.get("stat", {})},
"title": {**_system_contextual.get("title", {})},
"heading": {**_system_contextual.get("heading", {})},
"rendered": {**_system_contextual.get("rendered", {})},
}
cta_defaults = {
"starter": "Reservar llamada inicial",
"business": "Reservar llamada comercial",
"support": "Solicitar soporte",
"service": "Mostrar los servicios",
"project": "Inicia tu proyecto",
"quote": "Solicitar propuesta",
"contact": "Planificar la reunión inicial",
}
def post_cleanup_text(self, text: str, field_path: str = "") -> str:
return normalize_es_text(text, field_path=field_path)

View File

@@ -0,0 +1,66 @@
from .base import BaseLanguageAgent
from ..system_strings import build_contextual_system_vocabulary, build_system_vocabulary
class FrenchAgent(BaseLanguageAgent):
locale = "fr"
tone = "professional and commercial"
preferred_formality = "formal"
cta_defaults = {
"starter": "Planifier lentretien de départ",
"business": "Planifier lentretien commercial",
"support": "Voir le support",
"service": "Afficher les services",
"project": "Lancez votre projet",
"devis": "Demander un devis",
"contact": "Planifier léchange",
}
vocabulary_map = {
**build_system_vocabulary("fr"),
"SERVICES": "PRESTATIONS",
"New": "Nouveau",
"Popular": "Populaire",
"Erstes Produktionsprojekt erfolgreich abgeschlossen.": "Premier projet de production livré avec succès.",
"Von Kickoff bis zum Launch mit einem klaren Umfang.": "Du cadrage au lancement avec un périmètre clair.",
"Demande d'admission initiale": "Planifier un échange initial",
"Geschäftsprozess besprechen": "Échanger sur votre processus métier",
"Entretien d'accueil": "Entretien initial",
"Vraag over diensten": "Question sur les services",
"Konkrete erste Schätzung": "Première estimation concrète",
"Ansatz, der zu Ihrem Budget passt": "Approche adaptée à votre budget",
"Detailliertes Seitenlayout": "Structure détaillée des pages",
"Investition": "investissement",
"Unverbindliches Gespräch, klares Angebot": "Sans engagement, offre claire",
"Bereit, mit der Business-Website zu starten?": "Prêt à démarrer votre site dentreprise ?",
"Planifier un échange business": "Planifier un échange commercial",
"Aucune carte bancaire requise": "Sans engagement",
}
_system_contextual = build_contextual_system_vocabulary("fr")
contextual_vocabulary_map = {
"badge": {
**_system_contextual.get("badge", {}),
"Popular": "Le plus demandé",
},
"label": {
**_system_contextual.get("label", {}),
"Popular": "Le plus demandé",
},
"metric": {
**_system_contextual.get("metric", {}),
},
"stat": {
**_system_contextual.get("stat", {}),
},
"title": {
**_system_contextual.get("title", {}),
"SERVICES": "PRESTATIONS",
},
"heading": {
**_system_contextual.get("heading", {}),
"SERVICES": "PRESTATIONS",
},
"rendered": {
**_system_contextual.get("rendered", {}),
"SERVICES": "PRESTATIONS",
},
}

View File

@@ -0,0 +1,42 @@
from .base import BaseLanguageAgent
from ..normalizers import normalize_it_text
from ..system_strings import build_contextual_system_vocabulary, build_system_vocabulary
class ItalianAgent(BaseLanguageAgent):
locale = "it"
tone = "professional and approachable"
preferred_formality = "polite"
vocabulary_map = {
**build_system_vocabulary(
"it",
(
"weeks_1_2",
"without_commitment",
"transparent_label",
"transparent_investment",
"customization_integrations",
"multilingual_rollout",
),
),
}
_system_contextual = build_contextual_system_vocabulary("it", ("transparent_label",))
contextual_vocabulary_map = {
"badge": {**_system_contextual.get("badge", {})},
"label": {**_system_contextual.get("label", {})},
"metric": {**_system_contextual.get("metric", {})},
"stat": {**_system_contextual.get("stat", {})},
"rendered": {**_system_contextual.get("rendered", {})},
}
cta_defaults = {
"starter": "Prenota una call iniziale",
"business": "Pianifica la call business",
"support": "Richiedi supporto",
"service": "Mostra i servizi",
"project": "Avvia il tuo progetto",
"quote": "Richiedi una proposta",
"contact": "Pianifica la riunione introduttiva",
}
def post_cleanup_text(self, text: str, field_path: str = "") -> str:
return normalize_it_text(text, field_path=field_path)

View File

@@ -0,0 +1,20 @@
from .base import BaseLanguageAgent
from ..normalizers import normalize_nl_text
class DutchAgent(BaseLanguageAgent):
locale = "nl"
tone = "zakelijk en duidelijk"
preferred_formality = "je/jij professioneel"
cta_defaults = {
"starter": "Plan startergesprek",
"business": "Plan zakelijk gesprek",
"support": "Bekijk support",
"service": "Bekijk diensten",
"project": "Start jouw project",
"contact": "Plan kennismaking",
"offerte": "Vraag voorstel aan",
}
def post_cleanup_text(self, text: str, field_path: str = "") -> str:
return normalize_nl_text(text, field_path=field_path)

View File

@@ -0,0 +1,111 @@
from .base import BaseLanguageAgent
from ..system_strings import build_contextual_system_vocabulary, build_system_vocabulary
class PortugueseAgent(BaseLanguageAgent):
locale = "pt"
tone = "business-focused and practical"
preferred_formality = "neutral"
cta_defaults = {
"starter": "Agendar chamada inicial",
"business": "Agendar chamada comercial",
"support": "Ver suporte",
"service": "Ver serviços",
"project": "Iniciar o seu projeto",
"proposta": "Pedir proposta",
"contact": "Agendar reunião introdutória",
}
vocabulary_map = {
**build_system_vocabulary("pt"),
"SERVICES": "SERVIÇOS",
"New": "Novo",
"Popular": "Em destaque",
"Siti web e negozi online": "Sites e lojas online",
"Siti web e negozi online che sono rapidamente online e facili da gestire": "Sites e lojas online que ficam no ar rapidamente e são fáceis de gerir",
"Caso de cliente en directo": "Caso real de cliente",
"El primer proyecto de producción finalizado con éxito.": "O primeiro projeto de produção foi concluído com sucesso.",
"Más sobre el proceso": "Mais sobre o processo",
"Modifiez simplement vous-même.": "Edite facilmente por conta própria.",
"Opciones de la tienda web Mantenimiento y soporte Suporte mensal opcional para atualizações e estabilidade.": "Opções da loja online Manutenção e suporte Suporte mensal opcional para atualizações e estabilidade.",
"Opciones de la tienda web": "Opções da loja online",
"Planes de soporte": "Planos de suporte",
"Multilingüe": "Multilingue",
"Suivi + corrections": "Acompanhamento e correções",
"Mejoras mensuales": "Melhorias mensais",
"¿A qué velocidad puede comenzar?": "Com que rapidez podem começar?",
"¿Puedo editar textos e imágenes yo mismo?": "Posso editar textos e imagens por conta própria?",
"Einzelhandelsunternehmer": "Comerciante",
"lifestyle": "estilo de vida",
"À partir de 3 750 €": "A partir de 3.750 €",
"Transparente sobre o planejamento, o processo e a gestão.": "Clareza sobre o planeamento, o processo e a gestão.",
"Einzelhandelsinhaber Petite boutique en ligne Forfaits de services (à partir de) Pontos de partida transparentes.": "Comerciantes Pequena loja online Pacotes de serviço (a partir de) Pontos de partida claros.",
"Unsere Serviços": "Os nossos serviços",
"Unsere Serviços: vom schnellen Start bis zu skalierbarem Wachstum": "Os nossos serviços: do lançamento rápido ao crescimento escalável",
"Elija el camino": "Escolha o caminho certo",
"Elija el camino que corresponda a su fase: sitio de inicio, sitio empresarial, tienda en línea o soporte continuo.": "Escolha o caminho certo para a sua fase: site inicial, site empresarial, loja online ou suporte contínuo.",
"Début en direct": "Lançamento rápido",
"Demande d'admission initiale": "Agendar conversa inicial",
"Site Web d'Entreprise": "Site empresarial",
"Hablar sobre el proceso empresarial": "Falar sobre o processo do negócio",
"Mise en place de boutique en ligne": "Implementação de loja online",
"Maintenance & gestion": "Manutenção e gestão",
"Afficher le plan de soutien": "Ver suporte",
"Introducción multilingüe": "Lançamento multilingue",
"Forfaits de services (à partir de)": "Pacotes de serviço (a partir de)",
"Schnell online mit einer starken Basis": "Rápido online com uma base sólida",
"Startseite + Kernseiten": "Página inicial + páginas essenciais",
"Optimizado para móviles": "Otimizado para mobile",
"Gestisca lei stesso il contenuto": "Gerir o conteúdo com autonomia",
"Detailliertes Seitenlayout": "Estrutura detalhada das páginas",
"Unverbindliches Gespräch, klares Angebot": "Sem compromisso, proposta clara",
"Mehr Struktur und Konversion": "Mais estrutura e foco em conversão",
"Sections axées sur la conversion": "Secções orientadas para conversão",
"Base prête pour le SEO": "Base pronta para SEO",
"Katalog + Kasse": "Catálogo + checkout",
"Zahlungen und Auftragsfluss": "Pagamentos e fluxo de encomendas",
"Wachstumsbereite Grundlage": "Base pronta para crescimento",
"Soporte y crecimiento": "Suporte e crescimento",
"Amélioration continue": "Melhoria contínua",
"Desde 149 € al mes.": "Desde 149 € por mês.",
"Ab 2.250 €": "A partir de 2.250 €",
"Boutique en ligne": "Loja online",
"Sales-ready mit skalierbarem Stack": "Preparada para vender com uma base escalável",
"Agendar conversa sobre o serviço Ver resultados do projeto 1-2 Wochen Début en direct 4.9/5 Kundenschätzung 100% Bearbeitbar Visão geral dos serviços Cada serviço é projetado para melhorar a faturação, a confiança e a controlabilidade.": "Agendar conversa sobre o serviço Ver resultados do projeto 1 a 2 semanas Lançamento rápido 4.9/5 Avaliação dos clientes 100% Editável Visão geral dos serviços Cada serviço foi concebido para aumentar a faturação, reforçar a confiança e dar mais controlo à sua equipa.",
"Site inicial Schnell online mit einer starken Basis A partir de 1.250 € Agendar chamada inicial Startseite + Kernseiten Optimizado para móviles Gestisca lei stesso il contenuto Recomendado Site Web d'Entreprise Mehr Struktur und Konversion Ab 2.250 € Agendar chamada comercial Detailliertes Seitenlayout Sections axées sur la conversion Base prête pour le SEO Boutique en ligne Sales-ready mit skalierbarem Stack À partir de 3 750 € Iniciar o processo da loja online Katalog + Kasse Zahlungen und Auftragsfluss Wachstumsbereite Grundlage Soporte y crecimiento Amélioration continue Desde 149 € al mes.": "Site inicial Rápido online com uma base sólida A partir de 1.250 € Agendar chamada inicial Página inicial + páginas essenciais Otimizado para mobile Gerir o conteúdo com autonomia Recomendado Site empresarial Mais estrutura e foco em conversão A partir de 2.250 € Agendar chamada comercial Estrutura detalhada das páginas Secções orientadas para conversão Base pronta para SEO Loja online Preparada para vender com uma base escalável A partir de 3.750 € Iniciar o processo da loja online Catálogo + checkout Pagamentos e fluxo de encomendas Base pronta para crescimento Suporte e crescimento Melhoria contínua Desde 149 € por mês.",
"Perguntas frequentes Transparente sobre o planejamento, o processo e a gestão.": "Perguntas frequentes Clareza sobre o planeamento, o processo e a gestão.",
'Ver serviços New La entrada "Unterstützung oder Erweiterung" está en alemán, no en neerlandés.': "Ver serviços Novo Suporte ou expansão",
"Unterstützung oder Erweiterung": "Suporte ou expansão",
'La entrada "Unterstützung oder Erweiterung"': "Suporte ou expansão",
'La entrada "Unterstützung oder Erweiterung" está en alemán, no en neerlandés. Traducido al francés, es: "Suporte ou expansão".': "Suporte ou expansão",
"Sem cartão de crédito": "Sem compromisso",
}
_system_contextual = build_contextual_system_vocabulary("pt")
contextual_vocabulary_map = {
"badge": {
**_system_contextual.get("badge", {}),
"Popular": "Escolha frequente",
},
"label": {
**_system_contextual.get("label", {}),
"Popular": "Escolha frequente",
},
"metric": {
**_system_contextual.get("metric", {}),
},
"stat": {
**_system_contextual.get("stat", {}),
},
"title": {
"SERVICES": "SERVIÇOS",
"Popular": "Em destaque",
},
"heading": {
"SERVICES": "SERVIÇOS",
"Popular": "Em destaque",
},
"rendered": {
**_system_contextual.get("rendered", {}),
"SERVICES": "SERVIÇOS",
"Popular": "Em destaque",
},
}

View File

@@ -0,0 +1,39 @@
from .base import BaseLanguageAgent
from ..normalizers import normalize_ru_text
from ..system_strings import build_contextual_system_vocabulary, build_system_vocabulary
class RussianAgent(BaseLanguageAgent):
locale = "ru"
tone = "professional and confident"
preferred_formality = "neutral polite"
vocabulary_map = {
**build_system_vocabulary(
"ru",
(
"customization_integrations",
"detailed_page_structure",
"without_commitment",
),
),
}
_system_contextual = build_contextual_system_vocabulary("ru", ("plan_badge", "transparent_label"))
contextual_vocabulary_map = {
"badge": {**_system_contextual.get("badge", {})},
"label": {**_system_contextual.get("label", {})},
"metric": {**_system_contextual.get("metric", {})},
"stat": {**_system_contextual.get("stat", {})},
"rendered": {**_system_contextual.get("rendered", {})},
}
cta_defaults = {
"starter": "Запланировать стартовую консультацию",
"business": "Обсудить бизнес-проект",
"support": "Посмотреть поддержку",
"service": "Посмотреть услуги",
"project": "Запустить свой проект",
"contact": "Отправить запрос",
"quote": "Получить предложение",
}
def post_cleanup_text(self, text: str, field_path: str = "") -> str:
return normalize_ru_text(text, field_path=field_path)

View File

@@ -0,0 +1,16 @@
from __future__ import annotations
from .agents import get_language_agent
from .validators.multilingual import validate_ai_text_or_raise
def guard_ai_output(locale_code: str, field_path: str, value: str) -> str:
validate_ai_text_or_raise(locale_code, field_path, value)
return value
def rewrite_ai_output(locale_code: str, field_path: str, value: str) -> str:
agent = get_language_agent(locale_code)
rewritten = agent.rewrite(value, field_path=field_path)
validate_ai_text_or_raise(locale_code, field_path, rewritten)
return rewritten

View File

@@ -0,0 +1,10 @@
from django.apps import AppConfig
class MandelblogContentGuardConfig(AppConfig):
default_auto_field = "django.db.models.BigAutoField"
name = "mandelblog_content_guard"
verbose_name = "MandelBlog Content Guard"
def ready(self):
from . import signals # noqa: F401

View File

@@ -0,0 +1,3 @@
from .visible_text import VisibleTextExtractor, extract_visible_rendered_text, normalize_text
__all__ = ["VisibleTextExtractor", "extract_visible_rendered_text", "normalize_text"]

View File

@@ -0,0 +1,85 @@
from __future__ import annotations
import html
import re
from html.parser import HTMLParser
VISIBLE_TEXT_TAGS = {"h1", "h2", "h3", "h4", "h5", "h6", "p", "button", "a", "label", "li"}
IGNORED_TAGS = {"script", "style", "noscript", "template"}
def html_unescape(value: str) -> str:
return html.unescape(value)
def normalize_text(value: str) -> str:
return re.sub(r"\s+", " ", html_unescape(value)).strip()
class VisibleTextExtractor(HTMLParser):
def __init__(self) -> None:
super().__init__(convert_charrefs=True)
self.ignored_depth = 0
self.hidden_stack: list[bool] = []
self.visible_tag_stack: list[str] = []
self.current_chunks: list[str] = []
self.lines: list[str] = []
def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
lowered = tag.lower()
attrs_dict = {key.lower(): (value or "") for key, value in attrs}
if lowered in IGNORED_TAGS:
self.ignored_depth += 1
return
self.hidden_stack.append(self._is_hidden(attrs_dict))
if lowered in VISIBLE_TEXT_TAGS and not self.ignored_depth and not any(self.hidden_stack):
self.visible_tag_stack.append(lowered)
def handle_endtag(self, tag: str) -> None:
lowered = tag.lower()
if lowered in IGNORED_TAGS and self.ignored_depth:
self.ignored_depth -= 1
return
if lowered in VISIBLE_TEXT_TAGS and self.visible_tag_stack:
self.visible_tag_stack.pop()
self._flush_line()
if self.hidden_stack:
self.hidden_stack.pop()
def handle_data(self, data: str) -> None:
if self.ignored_depth or any(self.hidden_stack) or not self.visible_tag_stack:
return
normalized = normalize_text(data)
if normalized:
self.current_chunks.append(normalized)
def handle_comment(self, data: str) -> None:
return
def close(self) -> None:
super().close()
self._flush_line()
def _flush_line(self) -> None:
if not self.current_chunks:
return
line = normalize_text(" ".join(self.current_chunks))
if line:
self.lines.append(line)
self.current_chunks = []
@staticmethod
def _is_hidden(attrs: dict[str, str]) -> bool:
if "hidden" in attrs:
return True
if attrs.get("aria-hidden", "").lower() == "true":
return True
style = attrs.get("style", "").replace(" ", "").lower()
return "display:none" in style or "visibility:hidden" in style
def extract_visible_rendered_text(body: str) -> str:
parser = VisibleTextExtractor()
parser.feed(body)
parser.close()
return "\n".join(parser.lines)

View File

@@ -0,0 +1,95 @@
from __future__ import annotations
from django.contrib import messages
from django.http import HttpResponseRedirect
from wagtail import hooks
from .types import format_issue, split_issues
from .validators.multilingual import validate_page, validate_posted_snippet, validate_snippet_instance
def _flash_issues(request, level, prefix: str, issues):
preview = issues[:6]
for issue in preview:
messages.add_message(request, level, f"{prefix}: {format_issue(issue)}")
remaining = len(issues) - len(preview)
if remaining > 0:
messages.add_message(request, level, f"{prefix}: {remaining} more issue(s) not shown.")
@hooks.register("before_publish_page")
def prevent_corrupt_multilingual_publish(request, page):
issues = validate_page(page)
blocking, warnings = split_issues(issues)
if warnings:
_flash_issues(request, messages.WARNING, "Content guard warning", warnings)
if not blocking:
return None
_flash_issues(request, messages.ERROR, "Publishing blocked", blocking)
return HttpResponseRedirect(request.path)
@hooks.register("after_edit_page")
def warn_on_corrupt_multilingual_draft(request, page):
blocking, warnings = split_issues(validate_page(page))
if blocking:
_flash_issues(request, messages.WARNING, "Draft warning", blocking)
if warnings:
_flash_issues(request, messages.WARNING, "Draft warning", warnings)
def _snippet_locale_code(instance, request) -> str:
posted_locale = request.POST.get("locale") if request.method == "POST" else None
if posted_locale:
return posted_locale
locale = getattr(instance, "locale", None)
if locale is not None and getattr(locale, "language_code", None):
return locale.language_code
return "nl"
def _validate_snippet_request(request, instance):
if request.method != "POST":
return None
issues = validate_posted_snippet(_snippet_locale_code(instance, request), request.POST.dict())
blocking, warnings = split_issues(issues)
if warnings:
_flash_issues(request, messages.WARNING, "Snippet warning", warnings)
if not blocking:
return None
_flash_issues(request, messages.ERROR, "Snippet save blocked", blocking)
return HttpResponseRedirect(request.path)
@hooks.register("before_create_snippet")
def prevent_corrupt_snippet_create(request, model):
instance = model()
posted_locale = request.GET.get("locale") or request.POST.get("locale")
if posted_locale and hasattr(instance, "locale_id"):
from wagtail.models import Locale
instance.locale = Locale.objects.get(language_code=posted_locale)
return _validate_snippet_request(request, instance)
@hooks.register("before_edit_snippet")
def prevent_corrupt_snippet_edit(request, instance):
return _validate_snippet_request(request, instance)
def _warn_saved_snippet(request, instance):
blocking, warnings = split_issues(validate_snippet_instance(instance))
if blocking:
_flash_issues(request, messages.WARNING, "Snippet integrity warning", blocking)
if warnings:
_flash_issues(request, messages.WARNING, "Snippet integrity warning", warnings)
@hooks.register("after_create_snippet")
def warn_on_saved_snippet_create(request, instance):
_warn_saved_snippet(request, instance)
@hooks.register("after_edit_snippet")
def warn_on_saved_snippet_edit(request, instance):
_warn_saved_snippet(request, instance)

View File

@@ -0,0 +1,163 @@
from __future__ import annotations
import json
from collections import defaultdict
from django.core.management.base import BaseCommand
from ...settings import audit_default_locales
from ...validators.multilingual import audit_locales
class Command(BaseCommand):
help = "Audit all public locale pages for multilingual integrity issues."
def add_arguments(self, parser):
parser.add_argument(
"--locale",
action="append",
dest="locales",
help="Limit the audit to one or more locale codes. Repeat the flag for multiple locales.",
)
parser.add_argument(
"--url",
action="append",
dest="urls",
help="Limit the audit to one or more public page URLs. Repeat the flag for multiple URLs.",
)
parser.add_argument(
"--fix",
action="store_true",
help="Apply known safe replacements and republish changed content.",
)
parser.add_argument(
"--rewrite",
action="store_true",
help="Rewrite flagged content through the locale agent system.",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Preview rewrite changes without saving content.",
)
parser.add_argument(
"--format",
choices=["text", "json"],
default="text",
help="Output format.",
)
def handle(self, *args, **options):
locale_codes = options["locales"] or audit_default_locales()
run = audit_locales(
locale_codes,
fix=options["fix"],
rewrite=options["rewrite"],
dry_run=options["dry_run"],
url_filters=options["urls"],
)
grouped = defaultdict(list)
for issue in run.issues.all().order_by("locale_code", "url", "field_path"):
grouped[issue.locale_code].append(issue)
grouped_compact = defaultdict(list)
for locale_code, issues in grouped.items():
bucket = {}
for issue in issues:
key = (
issue.url,
issue.issue_type,
issue.bad_value,
issue.replacement,
)
extra = issue.extra or {}
if key not in bucket:
bucket[key] = {
"url": issue.url,
"title": issue.title,
"severity": issue.severity,
"issue_type": issue.issue_type,
"field_paths": set([issue.field_path] if issue.field_path else []),
"bad_value": issue.bad_value,
"replacement": issue.replacement,
"fixed": issue.fixed,
"sources": set([extra.get("source")] if extra.get("source") else []),
"count": extra.get("count", 1),
}
else:
if issue.field_path:
bucket[key]["field_paths"].add(issue.field_path)
if extra.get("source"):
bucket[key]["sources"].add(extra["source"])
bucket[key]["count"] += extra.get("count", 1)
grouped_compact[locale_code] = [
{
**entry,
"field_paths": sorted(entry["field_paths"]),
"sources": sorted(entry["sources"]),
}
for entry in bucket.values()
]
if options["format"] == "json":
payload = {
"run_id": run.pk,
"total_urls_checked": run.total_urls_checked,
"issues_found": run.issues_found,
"summary": run.summary,
"issues": {
locale_code: grouped_compact.get(locale_code, [])
for locale_code in locale_codes
},
}
self.stdout.write(json.dumps(payload, indent=2, ensure_ascii=False))
return
for locale_code in locale_codes:
locale_summary = run.summary.get(locale_code, {})
self.stdout.write(f"Locale: {locale_code}")
self.stdout.write(
f"URLs checked: {locale_summary.get('total_urls_checked', 0)}"
)
self.stdout.write(
f"Issues found: {locale_summary.get('issues_found', 0)}"
)
self.stdout.write(
f"Severity: {locale_summary.get('by_severity', {})}"
)
if options["fix"]:
self.stdout.write(
f"Issues auto-fixed: {locale_summary.get('issues_fixed', 0)}"
)
if options["rewrite"]:
self.stdout.write(
f"Rewrite mode: {'dry-run' if options['dry_run'] else 'apply'}"
)
for issue in grouped_compact.get(locale_code, []):
target = issue["url"] or issue["title"] or "object"
self.stdout.write(
f"- {target} -> {issue['issue_type']}: {issue['bad_value']}"
)
if issue.get("replacement"):
self.stdout.write(f" after: {issue['replacement']}")
if issue.get("field_paths"):
self.stdout.write(f" fields: {', '.join(issue['field_paths'][:5])}")
if issue.get("sources"):
self.stdout.write(f" sources: {', '.join(issue['sources'])}")
if issue.get("count"):
self.stdout.write(f" count: {issue['count']}")
if not grouped_compact.get(locale_code):
self.stdout.write("- no issues found")
self.stdout.write("")
snippet_summary = run.summary.get("snippets") or {}
if snippet_summary:
self.stdout.write("Snippet issues:")
for model_name, count in snippet_summary.items():
self.stdout.write(f"- {model_name}: {count}")
self.stdout.write(
self.style.SUCCESS(
f"Audit run {run.pk} completed. Total URLs checked: {run.total_urls_checked}. Issues found: {run.issues_found}."
)
)

View File

@@ -0,0 +1,19 @@
from __future__ import annotations
from django.core.exceptions import ValidationError
class MultilingualValidationMixin:
"""Opt-in mixin for project models that want explicit clean()-time enforcement."""
def clean(self):
from .types import format_issue
from .validators.multilingual import validate_snippet_instance
super_clean = getattr(super(), "clean", None)
if callable(super_clean):
super_clean()
issues = validate_snippet_instance(self)
blocking = [issue for issue in issues if issue.blocks]
if blocking:
raise ValidationError({"content_guard": [format_issue(issue) for issue in blocking]})

View File

@@ -0,0 +1,15 @@
from .de import normalize_de_text
from .en import normalize_en_text
from .es import normalize_es_text
from .it import normalize_it_text
from .nl import normalize_nl_text
from .ru import normalize_ru_text
__all__ = [
"normalize_de_text",
"normalize_en_text",
"normalize_es_text",
"normalize_it_text",
"normalize_nl_text",
"normalize_ru_text",
]

View File

@@ -0,0 +1,58 @@
from __future__ import annotations
import re
DE_LINE_REPLACEMENTS = {
"Häufig gestellte Fragen Transparent über Planung, Vorgehensweise und Management.": "Häufig gestellte Fragen Klarheit über Planung, Vorgehensweise und Management.",
"Einführungsmeeting planen Projekte anzeigen Unverbindliches Gespräch, klares Angebot Wir entwickeln schnelle Websites und Webshops, die Ihr Team selbst pflegen kann.": "Erstgespräch planen · Projekte ansehen · Unverbindliches Gespräch mit klarem Angebot. Wir entwickeln schnelle Websites und Webshops, die Ihr Team selbst pflegen kann.",
"Einführungsmeeting planen Dienstleistungen anzeigen Verbindlich und klar Wir entwickeln schnelle Websites und Webshops, die Ihr Team selbst pflegen kann.": "Erstgespräch planen · Dienstleistungen anzeigen · Unverbindliches Gespräch mit klarem Angebot. Wir entwickeln schnelle Websites und Webshops, die Ihr Team selbst pflegen kann.",
"Steuern 0,00 € Korb ansehen Kasse Kontakt KONTAKT Lass uns dein Projekt konkret machen Einführungsmeeting planen Dienstleistungen anzeigen So können Sie Kontakt aufnehmen Wählen Sie die Route, die zu Ihrer Frage passt.": "Steuern 0,00 € Korb ansehen Kasse Kontakt KONTAKT Lassen Sie uns Ihr Projekt konkret machen Erstgespräch planen Dienstleistungen anzeigen So können Sie Kontakt aufnehmen Wählen Sie den Weg, der zu Ihrer Frage passt.",
"Steuern 0,00 € Korb ansehen Kasse Starter Website PLAN Starter Website Plan Starter-Gespräch planen Alle Dienstleistungen anzeigen Was du bekommst Startseite + Kernseiten Professionelle Basis, die sofort Vertrauen schafft.": "Steuern 0,00 € Korb ansehen Kasse Starter-Website PLAN Starter-Website Starter-Gespräch planen Alle Dienstleistungen anzeigen Was Sie erhalten Startseite + Kernseiten Professionelle Basis, die sofort Vertrauen schafft.",
"Steuern 0,00 € Korb ansehen Kasse Business Website PLAN Business Website Plan Beratungsgespräch planen Alle Dienstleistungen anzeigen Was du bekommst Detailliertes Seitenlayout Mehr Platz für Dienstleistungen, Fälle und Lead-Flows.": "Steuern 0,00 € Korb ansehen Kasse Business-Website PLAN Business-Website Beratungsgespräch planen Alle Dienstleistungen anzeigen Was Sie erhalten Detailliertes Seitenlayout Mehr Platz für Dienstleistungen, Referenzen und Lead-Flows.",
}
DE_PHRASE_REPLACEMENTS = {
"New": "Neu",
"Einführungsmeeting": "Erstgespräch",
"Intakegespräch": "Erstgespräch",
"SEO-ready basis": "SEO-optimierte Basis",
"Sales-ready mit skalierbarem Stack": "Verkaufsbereit mit skalierbarer Architektur",
"Continuous Verbesserung": "Kontinuierliche Verbesserung",
"Was du bekommst": "Was Sie erhalten",
"Starter Website": "Starter-Website",
"Business Website": "Business-Website",
"Support & Wachstum": "Support & Wachstum",
"Lass uns dein Projekt konkret machen": "Lassen Sie uns Ihr Projekt konkret machen",
"Wählen Sie die Route, die zu Ihrer Frage passt.": "Wählen Sie den Weg, der zu Ihrer Frage passt.",
"Verbindlich und klar": "Unverbindliches Gespräch mit klarem Angebot",
"Unverbindliches Gespräch, klares Angebot": "Unverbindliches Gespräch mit klarem Angebot",
}
def _apply_boundary_replacements(text: str, replacements: dict[str, str]) -> str:
cleaned = text
phrase_replacements = {}
token_replacements = {}
for source, target in replacements.items():
if re.fullmatch(r"[\wÀ-ÿ-]+", source, flags=re.UNICODE):
token_replacements[source] = target
else:
phrase_replacements[source] = target
for source, target in sorted(phrase_replacements.items(), key=lambda item: len(item[0]), reverse=True):
cleaned = cleaned.replace(source, target)
for source, target in sorted(token_replacements.items(), key=lambda item: len(item[0]), reverse=True):
pattern = re.compile(rf"(?<![\wÀ-ÿ-]){re.escape(source)}(?![\wÀ-ÿ-])", re.UNICODE)
cleaned = pattern.sub(target, cleaned)
return cleaned
def normalize_de_text(text: str, field_path: str = "") -> str:
cleaned = text
for source, target in DE_LINE_REPLACEMENTS.items():
if cleaned == source:
return target
cleaned = _apply_boundary_replacements(cleaned, DE_PHRASE_REPLACEMENTS)
return cleaned

View File

@@ -0,0 +1,28 @@
from __future__ import annotations
import re
EN_LINE_REPLACEMENTS = {
"Service packages (from) Transparent starting points.": "Service packages (from) Clear starting points.",
"Frequently Asked Questions Transparent about planning, approach, and management.": "Frequently Asked Questions Clear guidance on planning, approach, and management.",
"After your intake Clear scope and steps Clear planning Transparent investment Name * E-mail * Company * Project details Book business call Ready to start with Business Website?": "After your intake Clear scope and steps Clear planning Transparent pricing Name * E-mail * Company * Project details Book business call Ready to start with Business Website?",
"After your intake Clear scope and steps Clear planning Transparent investment Name * E-mail * Company * Project details Book starter call Ready to start with Starter Website?": "After your intake Clear scope and steps Clear planning Transparent pricing Name * E-mail * Company * Project details Book starter call Ready to start with Starter Website?",
"After your intake Clear scope and steps Clear planning Transparent investment Name * E-mail * Company * Project details Request support plan Ready to start with Support & Growth?": "After your intake Clear scope and steps Clear planning Transparent pricing Name * E-mail * Company * Project details Request support plan Ready to start with Support & Growth?",
"After your intake Clear scope and steps Clear planning Transparent investment Name * E-mail * Company * Project details Start webshop project Ready to start with Webshop?": "After your intake Clear scope and steps Clear planning Transparent pricing Name * E-mail * Company * Project details Start webshop project Ready to start with Webshop?",
}
EN_PHRASE_REPLACEMENTS = {
"Transparent investment": "Transparent pricing",
"Transparent about planning, approach, and management.": "Clear guidance on planning, approach, and management.",
"Transparent starting points.": "Clear starting points.",
}
def normalize_en_text(text: str, field_path: str = "") -> str:
if text in EN_LINE_REPLACEMENTS:
return EN_LINE_REPLACEMENTS[text]
cleaned = text
for source, target in sorted(EN_PHRASE_REPLACEMENTS.items(), key=lambda item: len(item[0]), reverse=True):
cleaned = cleaned.replace(source, target)
return re.sub(r"\s+", " ", cleaned).strip()

View File

@@ -0,0 +1,31 @@
from __future__ import annotations
import re
ES_LINE_REPLACEMENTS = {
"Transparente sobre la planificación, el proceso y la gestión.": "Transparencia sobre la planificación, el proceso y la gestión.",
"<p>Transparente sobre la planificación, el proceso y la gestión.</p>": "<p>Transparencia sobre la planificación, el proceso y la gestión.</p>",
"Preguntas frecuentes Transparente sobre la planificación, el proceso y la gestión.": "Preguntas frecuentes Transparencia sobre la planificación, el proceso y la gestión.",
"Preguntas frecuentes Transparenteee sobre la planificación, el proceso y la gestión.": "Preguntas frecuentes Transparencia sobre la planificación, el proceso y la gestión.",
"Planificar la reunión inicial Mostrar los proyectos Unverbindliches Gespräch, klares Angebot Construimos sitios web y tiendas online rápidas que tu equipo puede gestionar sin complicaciones.": "Planificar la reunión inicial · Mostrar los proyectos · Conversación sin compromiso con propuesta clara. Construimos sitios web y tiendas online rápidas que tu equipo puede gestionar sin complicaciones.",
}
ES_PHRASE_REPLACEMENTS = {
"Transparenteee": "Transparente",
"Transparent": "Transparente",
"Unverbindliches Gespräch, klares Angebot": "Conversación sin compromiso con propuesta clara",
}
def normalize_es_text(text: str, field_path: str = "") -> str:
if text in ES_LINE_REPLACEMENTS:
return ES_LINE_REPLACEMENTS[text]
cleaned = text
for source, target in sorted(ES_PHRASE_REPLACEMENTS.items(), key=lambda item: len(item[0]), reverse=True):
if re.fullmatch(r"[\wÀ-ÿ-]+", source, flags=re.UNICODE):
pattern = re.compile(rf"(?<![\wÀ-ÿ-]){re.escape(source)}(?![\wÀ-ÿ-])", re.UNICODE)
cleaned = pattern.sub(target, cleaned)
else:
cleaned = cleaned.replace(source, target)
return re.sub(r"\s+", " ", cleaned).strip()

View File

@@ -0,0 +1,24 @@
from __future__ import annotations
import re
IT_LINE_REPLACEMENTS = {
"Richiedi un piano di supporto Mostra i progetti Unverbindliches Gespräch, klares Angebot Realizziamo siti web e negozi online veloci che il tuo team può gestire in autonomia.": "Richiedi un piano di supporto · Mostra i progetti · Colloquio senza impegno con proposta chiara. Realizziamo siti web e negozi online veloci che il tuo team può gestire in autonomia.",
"Dopo il colloquio iniziale Obiettivi chiari e tappe Planificación clara Transparente Investition Nome * Email * Azienda * Dettagli del progetto Richiedi un piano di supporto Pronto a iniziare con supporto e crescita?": "Dopo il colloquio iniziale Obiettivi chiari e tappe Pianificazione chiara Investimento trasparente Nome * Email * Azienda * Dettagli del progetto Richiedi un piano di supporto Pronto a iniziare con supporto e crescita?",
"Mehrsprachiger Rollout-Plan Anpassung & Integrationen Integrazioni API, flussi di lavoro specifici e blocchi personalizzati adattati alla sua azienda.": "Piano di lancio multilingue Personalizzazioni e integrazioni Integrazioni API, flussi di lavoro specifici e blocchi personalizzati adattati alla sua azienda.",
}
IT_PHRASE_REPLACEMENTS = {
"Planificación clara": "Pianificazione chiara",
"Unverbindliches Gespräch, klares Angebot": "Colloquio senza impegno con proposta chiara",
}
def normalize_it_text(text: str, field_path: str = "") -> str:
if text in IT_LINE_REPLACEMENTS:
return IT_LINE_REPLACEMENTS[text]
cleaned = text
for source, target in sorted(IT_PHRASE_REPLACEMENTS.items(), key=lambda item: len(item[0]), reverse=True):
cleaned = cleaned.replace(source, target)
return re.sub(r"\s+", " ", cleaned).strip()

View File

@@ -0,0 +1,15 @@
from __future__ import annotations
import re
NL_PHRASE_REPLACEMENTS = {
"PLAN": "PLAN",
}
def normalize_nl_text(text: str, field_path: str = "") -> str:
cleaned = text
for source, target in NL_PHRASE_REPLACEMENTS.items():
cleaned = cleaned.replace(source, target)
return re.sub(r"\s+", " ", cleaned).strip()

View File

@@ -0,0 +1,24 @@
from __future__ import annotations
import re
RU_LINE_REPLACEMENTS = {
"План многоязычного запуска Anpassung & Integrationen Интеграции API, специфические рабочие процессы и индивидуальные блоки, адаптированные под вашу компанию.": "План многоязычного запуска Настройка и интеграции Интеграции API, специфические рабочие процессы и индивидуальные блоки, адаптированные под вашу компанию.",
"Запланировать звонок по бизнес-сайту Detailliertes Seitenlayout Разделы, ориентированные на конверсию Base prête pour le SEO Boutique en ligne Для проектов с товарами, оплатой и дальнейшим развитием e-commerce.": "Запланировать звонок по бизнес-сайту Детальная структура страниц Разделы, ориентированные на конверсию Основа, готовая для SEO Интернет-магазин Для проектов с товарами, оплатой и дальнейшим развитием e-commerce.",
"Связаться с нами Посмотреть проекты Unverbindliches Gespräch, klares Angebot Мы создаём быстрые сайты и интернет-магазины, которыми ваша команда может управлять самостоятельно.": "Связаться с нами · Посмотреть проекты · Без обязательств, понятное предложение. Мы создаём быстрые сайты и интернет-магазины, которыми ваша команда может управлять самостоятельно.",
}
RU_PHRASE_REPLACEMENTS = {
"Base prête pour le SEO": "Основа, готовая для SEO",
"Unverbindliches Gespräch, klares Angebot": "Без обязательств, понятное предложение",
}
def normalize_ru_text(text: str, field_path: str = "") -> str:
if text in RU_LINE_REPLACEMENTS:
return RU_LINE_REPLACEMENTS[text]
cleaned = text
for source, target in sorted(RU_PHRASE_REPLACEMENTS.items(), key=lambda item: len(item[0]), reverse=True):
cleaned = cleaned.replace(source, target)
return re.sub(r"\s+", " ", cleaned).strip()

View File

@@ -0,0 +1,79 @@
from __future__ import annotations
"""
Reusable configuration helpers for mandelblog_content_guard.
Supported Django settings:
- CONTENT_GUARD_STRICT: bool
- CONTENT_GUARD_BLOCK_MEDIUM: bool
- CONTENT_GUARD_LOCALES: list[str]
- CONTENT_GUARD_REWRITE_ENABLED: bool
- CONTENT_GUARD_REWRITE_BACKEND: dotted path | None
"""
from django.conf import settings
DEFAULT_LOCALES = ["nl", "en", "de", "fr", "es", "it", "pt", "ru"]
SEVERITY = {
"CRITICAL": "block",
"HIGH": "block",
"MEDIUM": "warn",
"LOW": "log",
}
ISSUE_LEVELS = {
"known_bad_pattern": "CRITICAL",
"wrong_language_fragment": "CRITICAL",
"rendered_bad_pattern": "CRITICAL",
"rendered_wrong_language": "CRITICAL",
"render_status": "CRITICAL",
"language_heuristic": "CRITICAL",
"cta_language_mismatch": "HIGH",
"form_language_mismatch": "HIGH",
"empty_form_copy": "HIGH",
"placeholder_value": "HIGH",
"rewrite_candidate": "MEDIUM",
"weak_marketing_copy": "MEDIUM",
"foreign_ui_label": "MEDIUM",
"generic_badge_label": "MEDIUM",
"mixed_locale_heading": "MEDIUM",
"cta_tone_check": "MEDIUM",
}
def strict_mode_enabled() -> bool:
return getattr(settings, "CONTENT_GUARD_STRICT", True)
def block_medium_enabled() -> bool:
return getattr(settings, "CONTENT_GUARD_BLOCK_MEDIUM", False)
def audit_default_locales() -> list[str]:
return list(getattr(settings, "CONTENT_GUARD_LOCALES", DEFAULT_LOCALES))
def rewrite_enabled() -> bool:
return getattr(settings, "CONTENT_GUARD_REWRITE_ENABLED", True)
def get_rewrite_backend() -> str | None:
return getattr(settings, "CONTENT_GUARD_REWRITE_BACKEND", None)
def classify_issue(issue_type: str) -> str:
return ISSUE_LEVELS.get(issue_type, "LOW")
def severity_for_issue(issue_type: str) -> str:
return SEVERITY[classify_issue(issue_type)]
def should_block_issue(issue_type: str) -> bool:
level = classify_issue(issue_type)
if level in {"CRITICAL", "HIGH"}:
return True
if level == "MEDIUM":
return block_medium_enabled() and strict_mode_enabled()
return False

View File

@@ -0,0 +1,26 @@
from __future__ import annotations
from functools import lru_cache
from django.db.models.signals import pre_save
from django.dispatch import receiver
from wagtail.models import Page
from wagtail.snippets.models import get_snippet_models
from .validators.multilingual import validate_instance_or_raise
@lru_cache(maxsize=1)
def _snippet_models():
return tuple(get_snippet_models())
def _is_snippet_instance(instance) -> bool:
instance_model = instance.__class__
return any(model == instance_model for model in _snippet_models())
@receiver(pre_save)
def enforce_multilingual_integrity(sender, instance, **kwargs):
if isinstance(instance, Page) or _is_snippet_instance(instance):
validate_instance_or_raise(instance)

View File

@@ -0,0 +1,368 @@
from __future__ import annotations
from collections.abc import Iterable
SYSTEM_STRING_SPECS = {
"plan_badge": {
"sources": ("PLAN",),
"issue_type": "generic_badge_label",
"translations": {
"en": "Package",
"fr": "FORFAIT",
"es": "Paquete",
"ru": "Пакет",
},
"canonical_by_locale": {
"de": ("PLAN",),
"nl": ("PLAN",),
"it": ("PIANO",),
},
"contexts": {
"en": {
"badge": "Package",
"label": "Package",
"title": "Package",
"heading": "Package",
"rendered": "Package",
},
"fr": {
"badge": "FORFAIT",
"label": "FORFAIT",
"title": "FORFAIT",
"heading": "FORFAIT",
"rendered": "FORFAIT",
},
"es": {
"badge": "Paquete",
"label": "Paquete",
"title": "Paquete",
"heading": "Paquete",
"rendered": "Paquete",
},
"ru": {
"badge": "Пакет",
"label": "Пакет",
"title": "Пакет",
"heading": "Пакет",
"rendered": "Пакет",
},
},
},
"services_badge": {
"sources": ("SERVICES",),
"issue_type": "generic_badge_label",
"translations": {
"en": "Services",
"fr": "PRESTATIONS",
"pt": "SERVIÇOS",
},
"contexts": {
"en": {
"badge": "Services",
"label": "Services",
"title": "Services",
"heading": "Services",
"rendered": "Services",
},
"fr": {
"badge": "PRESTATIONS",
"label": "PRESTATIONS",
"title": "PRESTATIONS",
"heading": "PRESTATIONS",
"rendered": "PRESTATIONS",
},
"pt": {
"badge": "SERVIÇOS",
"label": "SERVIÇOS",
"title": "SERVIÇOS",
"heading": "SERVIÇOS",
"rendered": "SERVIÇOS",
},
},
},
"response_time": {
"sources": ("Reaktionszeit",),
"issue_type": "foreign_ui_label",
"translations": {
"en": "Response time",
"fr": "Temps de réponse",
"es": "Tiempo de respuesta",
"it": "Tempo di risposta",
"ru": "Время ответа",
},
},
"average_delivery": {
"sources": ("Durchschnittliche Lieferung",),
"issue_type": "foreign_ui_label",
"translations": {
"en": "Average delivery time",
"fr": "Délai moyen de livraison",
"es": "Plazo medio de entrega",
"it": "Tempo medio di consegna",
"ru": "Средний срок запуска",
},
},
"without_commitment": {
"sources": ("Unverbindlich",),
"issue_type": "foreign_ui_label",
"translations": {
"en": "No obligation",
"fr": "Sans engagement",
"es": "Sin compromiso",
"it": "Senza impegno",
"pt": "Sem compromisso",
"ru": "Без обязательств",
},
},
"transparent_label": {
"sources": ("Transparent",),
"issue_type": "foreign_ui_label",
"translations": {
"en": "Clear",
"fr": "Clair",
"es": "Transparente",
"it": "Chiaro",
"pt": "Transparente",
"ru": "Прозрачно",
},
"contexts": {
"en": {
"badge": "Clear",
"label": "Clear",
"metric": "Clear",
"stat": "Clear",
"rendered": "Clear",
},
"fr": {
"badge": "Clair",
"label": "Clair",
"metric": "Clair",
"stat": "Clair",
"rendered": "Clair",
},
"es": {
"badge": "Transparente",
"label": "Transparente",
"metric": "Transparente",
"stat": "Transparente",
"rendered": "Transparente",
},
"it": {
"badge": "Chiaro",
"label": "Chiaro",
"metric": "Chiaro",
"stat": "Chiaro",
"rendered": "Chiaro",
},
"pt": {
"badge": "Clara",
"label": "Clara",
"metric": "Investimento claro",
"stat": "Investimento claro",
"rendered": "Investimento claro",
},
"ru": {
"badge": "Прозрачно",
"label": "Прозрачно",
"metric": "Прозрачно",
"stat": "Прозрачно",
"rendered": "Прозрачно",
},
},
},
"weeks_1_2": {
"sources": ("1-2 Wochen",),
"issue_type": "weak_marketing_copy",
"translations": {
"fr": "1 à 2 semaines",
"es": "1-2 semanas",
"it": "1-2 settimane",
"pt": "1 a 2 semanas",
},
"contexts": {
"fr": {
"metric": "1 à 2 semaines",
"stat": "1 à 2 semaines",
},
"es": {
"metric": "1-2 semanas",
"stat": "1-2 semanas",
},
"it": {
"metric": "1-2 settimane",
"stat": "1-2 settimane",
},
"pt": {
"metric": "1 a 2 semanas",
"stat": "1 a 2 semanas",
},
},
},
"weeks_2_4": {
"sources": ("2-4 Wochen",),
"issue_type": "foreign_ui_label",
"translations": {
"fr": "2 à 4 semaines",
},
"contexts": {
"fr": {
"metric": "2 à 4 semaines",
"stat": "2 à 4 semaines",
},
},
},
"days_label": {
"sources": ("Tages",),
"issue_type": "weak_marketing_copy",
"translations": {
"fr": "jours",
"pt": "dias",
},
},
"customer_reviews": {
"sources": ("Kundenschätzung",),
"issue_type": "foreign_ui_label",
"translations": {
"en": "Customer rating",
"fr": "Avis clients",
"es": "Valoración de clientes",
"it": "Valutazione clienti",
"pt": "Avaliação dos clientes",
"ru": "Оценка клиентов",
},
},
"editable_label": {
"sources": ("Bearbeitbar",),
"issue_type": "foreign_ui_label",
"translations": {
"en": "Editable",
"fr": "Modifiable",
"es": "Editable",
"it": "Modificabile",
"pt": "Editável",
"ru": "Редактируемо",
},
},
"core_pages_label": {
"sources": ("Startseite + Kernseiten",),
"issue_type": "foreign_ui_label",
"translations": {
"pt": "Página inicial + páginas essenciais",
},
},
"detailed_page_structure": {
"sources": ("Detailliertes Seitenlayout",),
"issue_type": "foreign_ui_label",
"translations": {
"fr": "Structure détaillée des pages",
"es": "Estructura detallada de páginas",
"it": "Struttura dettagliata delle pagine",
"pt": "Estrutura detalhada das páginas",
"ru": "Детальная структура страниц",
},
},
"business_process_cta": {
"sources": ("Geschäftsprozess besprechen",),
"issue_type": "foreign_ui_label",
"translations": {
"fr": "Échanger sur votre processus métier",
"es": "Hablar sobre el proceso del negocio",
"pt": "Falar sobre o processo do negócio",
},
},
"multilingual_rollout": {
"sources": ("Mehrsprachige Einführung", "Mehrsprachiger Rollout-Plan"),
"issue_type": "foreign_ui_label",
"translations": {
"fr": "Déploiement multilingue",
"it": "Lancio multilingue",
"ru": "Многоязычный запуск",
},
},
"customization_integrations": {
"sources": ("Anpassung & Integrationen",),
"issue_type": "foreign_ui_label",
"translations": {
"fr": "Personnalisation & intégrations",
"es": "Personalización e integraciones",
"it": "Personalizzazioni e integrazioni",
"pt": "Personalização e integrações",
"ru": "Настройка и интеграции",
},
},
"transparent_investment": {
"sources": ("Transparente Investition",),
"issue_type": "foreign_ui_label",
"translations": {
"de": "Transparente Investition",
"en": "Transparent pricing",
"fr": "Investissement transparent",
"es": "Inversión transparente",
"it": "Investimento trasparente",
"pt": "Investimento transparente",
"ru": "Прозрачный бюджет",
},
},
}
def build_system_vocabulary(locale_code: str, keys: Iterable[str] | None = None) -> dict[str, str]:
vocabulary: dict[str, str] = {}
selected_keys = tuple(keys or SYSTEM_STRING_SPECS.keys())
for key in selected_keys:
spec = SYSTEM_STRING_SPECS[key]
target = spec.get("translations", {}).get(locale_code)
if not target:
continue
for source in spec["sources"]:
vocabulary[source] = target
return vocabulary
def build_contextual_system_vocabulary(locale_code: str, keys: Iterable[str] | None = None) -> dict[str, dict[str, str]]:
contextual: dict[str, dict[str, str]] = {}
selected_keys = tuple(keys or SYSTEM_STRING_SPECS.keys())
for key in selected_keys:
spec = SYSTEM_STRING_SPECS[key]
locale_contexts = spec.get("contexts", {}).get(locale_code, {})
if not locale_contexts:
continue
source = spec["sources"][0]
for context_name, replacement in locale_contexts.items():
contextual.setdefault(context_name, {})[source] = replacement
return contextual
def build_system_rewrite_candidates(keys: Iterable[str] | None = None) -> dict[str, str]:
candidates: dict[str, str] = {}
selected_keys = tuple(keys or SYSTEM_STRING_SPECS.keys())
for key in selected_keys:
spec = SYSTEM_STRING_SPECS[key]
for source in spec["sources"]:
candidates[source] = spec["issue_type"]
return candidates
def all_system_sources() -> set[str]:
sources: set[str] = set()
for spec in SYSTEM_STRING_SPECS.values():
sources.update(spec["sources"])
return sources
def is_canonical_system_string(locale_code: str, source: str) -> bool:
for spec in SYSTEM_STRING_SPECS.values():
if source in spec.get("canonical_by_locale", {}).get(locale_code, ()):
return True
if locale_code == "de":
return source in all_system_sources()
replacement = system_string_replacement(locale_code, source)
return bool(replacement and replacement == source)
def system_string_replacement(locale_code: str, source: str) -> str:
for spec in SYSTEM_STRING_SPECS.values():
if source not in spec["sources"]:
continue
return spec.get("translations", {}).get(locale_code, "")
return ""

View File

@@ -0,0 +1,56 @@
from __future__ import annotations
import json
from django.test import SimpleTestCase
from mandelblog_content_guard.agents import get_language_agent
from mandelblog_content_guard.extractors.visible_text import extract_visible_rendered_text
from mandelblog_content_guard.system_strings import build_system_rewrite_candidates, build_system_vocabulary
from mandelblog_content_guard.validators.multilingual import validate_text_nodes
class PackageLevelContentGuardTests(SimpleTestCase):
def test_system_string_replacement_catalog(self):
self.assertEqual(build_system_vocabulary("fr")["PLAN"], "FORFAIT")
self.assertEqual(build_system_vocabulary("pt")["Unverbindlich"], "Sem compromisso")
self.assertEqual(build_system_rewrite_candidates()["PLAN"], "generic_badge_label")
def test_canonical_source_suppression(self):
nl_issues = validate_text_nodes("nl", [("body.badge", "PLAN")])
it_issues = validate_text_nodes("it", [("body.badge", "PIANO")])
self.assertFalse(any(issue.bad_value == "PLAN" for issue in nl_issues))
self.assertFalse(any(issue.bad_value == "PIANO" for issue in it_issues))
def test_visible_text_extraction(self):
html = """
<html><body>
<script>var x = 1;</script>
<style>.hidden{display:none}</style>
<h1>Visible heading</h1>
<p aria-hidden="true">Invisible text</p>
<a href="#">Visible link</a>
</body></html>
"""
extracted = extract_visible_rendered_text(html)
self.assertIn("Visible heading", extracted)
self.assertIn("Visible link", extracted)
self.assertNotIn("Invisible text", extracted)
self.assertNotIn("var x", extracted)
def test_locale_normalizers(self):
de_agent = get_language_agent("de")
en_agent = get_language_agent("en")
self.assertEqual(de_agent.rewrite("Was du bekommst", "body.heading"), "Was Sie erhalten")
self.assertEqual(en_agent.rewrite("PLAN", "body.badge"), "Package")
def test_audit_json_contract_shape(self):
payload = {
"run_id": 1,
"summary": {"en": {"total_urls_checked": 1, "issues_found": 0, "issues_fixed": 0, "remaining_issues": 0, "by_severity": {"block": 0, "warn": 0, "log": 0}}},
"issues": {"en": []},
}
rendered = json.dumps(payload)
parsed = json.loads(rendered)
self.assertEqual(sorted(parsed.keys()), ["issues", "run_id", "summary"])
self.assertIn("by_severity", parsed["summary"]["en"])

View File

@@ -0,0 +1,65 @@
from __future__ import annotations
from dataclasses import asdict, dataclass
from typing import Any
from .settings import classify_issue, severity_for_issue, should_block_issue
@dataclass
class AuditIssue:
severity: str
issue_type: str
field_path: str
bad_value: str
replacement: str = ""
extra: dict[str, Any] | None = None
@property
def level(self) -> str:
return classify_issue(self.issue_type)
@property
def blocks(self) -> bool:
return self.severity == "block" or should_block_issue(self.issue_type)
def asdict(self) -> dict[str, Any]:
data = asdict(self)
data["extra"] = data.get("extra") or {}
data["level"] = self.level
return data
def make_issue(issue_type: str, field_path: str, bad_value: str, replacement: str = "", extra: dict[str, Any] | None = None) -> AuditIssue:
return AuditIssue(
severity=severity_for_issue(issue_type),
issue_type=issue_type,
field_path=field_path,
bad_value=bad_value,
replacement=replacement,
extra=extra or {},
)
def dedupe_issues(issues: list[AuditIssue]) -> list[AuditIssue]:
seen = set()
deduped = []
for issue in issues:
key = (issue.severity, issue.issue_type, issue.field_path, issue.bad_value)
if key in seen:
continue
seen.add(key)
deduped.append(issue)
return deduped
def split_issues(issues: list[AuditIssue]) -> tuple[list[AuditIssue], list[AuditIssue]]:
blocking = [issue for issue in issues if issue.blocks]
warnings = [issue for issue in issues if not issue.blocks]
return blocking, warnings
def format_issue(issue: AuditIssue) -> str:
suffix = f" -> {issue.replacement}" if issue.replacement else ""
return f"[{issue.level}] {issue.field_path}: {issue.bad_value}{suffix}"

View File

@@ -0,0 +1,452 @@
from __future__ import annotations
import logging
import re
from collections import Counter
from typing import Any
from urllib.error import HTTPError, URLError
from urllib.request import Request, urlopen
from django.core.exceptions import ValidationError
from django.utils import timezone
from wagtail.models import Page, Site
from wagtail.snippets.models import get_snippet_models
from ..agents import get_language_agent
from ..extractors.visible_text import extract_visible_rendered_text, normalize_text
from ..settings import audit_default_locales, rewrite_enabled
from ..types import dedupe_issues, format_issue, make_issue
from .rules.cta import validate_cta
from .rules.forms import validate_form_copy
from .rules.language import detect_language_mismatch
from .rules.patterns import (
GLOBAL_BAD_PATTERNS,
KNOWN_REPLACEMENTS,
LOCALE_FORBIDDEN,
validate_patterns,
)
from mandelstudio.models import LocaleAuditIssue, LocaleAuditRun
logger = logging.getLogger("mandelstudio.multilingual")
def expected_locale(instance: Any) -> str:
locale = getattr(instance, "locale", None)
if locale is not None and getattr(locale, "language_code", None):
return locale.language_code
return "nl"
def iter_text_nodes(value: Any, path: str = ""):
if value is None:
return
if isinstance(value, str):
yield path, value
return
if hasattr(value, "raw_data"):
yield from iter_text_nodes(list(value.raw_data), path)
return
if isinstance(value, list):
for index, item in enumerate(value):
yield from iter_text_nodes(item, f"{path}[{index}]")
return
if isinstance(value, dict):
for key, item in value.items():
child_path = f"{path}.{key}" if path else str(key)
yield from iter_text_nodes(item, child_path)
def extract_instance_text(instance: Any) -> list[tuple[str, str]]:
nodes: list[tuple[str, str]] = []
for field_name in ["title", "seo_title", "search_description"]:
value = getattr(instance, field_name, None)
if isinstance(value, str) and value.strip():
nodes.append((field_name, value))
for field_name in ["body", "content", "footer", "mini_footer"]:
if hasattr(instance, field_name):
nodes.extend(list(iter_text_nodes(getattr(instance, field_name), field_name)))
return nodes
def validate_text_nodes(locale_code: str, nodes: list[tuple[str, str]]):
issues = []
for field_path, raw_text in nodes:
normalized = normalize_text(raw_text)
if not normalized:
continue
issues.extend(validate_patterns(locale_code, field_path, normalized))
issues.extend(validate_cta(locale_code, field_path, normalized))
issues.extend(validate_form_copy(locale_code, field_path, normalized))
if len(normalized) >= 80:
mismatch = detect_language_mismatch(locale_code, normalized)
if mismatch:
issues.append(make_issue("language_heuristic", field_path, mismatch["message"]))
return dedupe_issues(issues)
REWRITE_REVIEW_TYPES = {
"known_bad_pattern",
"wrong_language_fragment",
"rendered_bad_pattern",
"rendered_wrong_language",
"rewrite_candidate",
"weak_marketing_copy",
"foreign_ui_label",
"generic_badge_label",
"mixed_locale_heading",
"cta_language_mismatch",
}
def validate_page(page: Page):
return validate_text_nodes(expected_locale(page), extract_instance_text(page.specific))
def validate_snippet_instance(instance: Any):
return validate_text_nodes(expected_locale(instance), extract_instance_text(instance))
def validate_posted_snippet(locale_code: str, payload: dict[str, Any]):
nodes = [(key, value) for key, value in payload.items() if isinstance(value, str)]
return validate_text_nodes(locale_code, nodes)
def _replace_known_strings(value: Any, locale_code: str):
changes = []
if isinstance(value, str):
new = value
for bad, replacements in KNOWN_REPLACEMENTS.items():
replacement = replacements.get(locale_code)
if replacement and bad in new:
new = new.replace(bad, replacement)
changes.append({"bad": bad, "replacement": replacement})
return new, changes, new != value
if isinstance(value, list):
out = []
changed = False
for item in value:
new_item, item_changes, item_changed = _replace_known_strings(item, locale_code)
out.append(new_item)
changes.extend(item_changes)
changed = changed or item_changed
return out, changes, changed
if isinstance(value, dict):
out = {}
changed = False
for key, item in value.items():
new_item, item_changes, item_changed = _replace_known_strings(item, locale_code)
out[key] = new_item
changes.extend(item_changes)
changed = changed or item_changed
return out, changes, changed
return value, changes, False
def apply_known_replacements(instance: Any, locale_code: str):
changes = []
for field_name in ["title", "seo_title", "search_description"]:
value = getattr(instance, field_name, None)
if not isinstance(value, str):
continue
new_value, field_changes, changed = _replace_known_strings(value, locale_code)
if changed:
setattr(instance, field_name, new_value)
changes.extend({"field": field_name, **change} for change in field_changes)
for field_name in ["body", "content", "footer", "mini_footer"]:
if not hasattr(instance, field_name):
continue
field_value = getattr(instance, field_name)
if hasattr(field_value, "raw_data"):
new_raw, field_changes, changed = _replace_known_strings(list(field_value.raw_data), locale_code)
if changed:
setattr(instance, field_name, new_raw)
changes.extend({"field": field_name, **change} for change in field_changes)
elif isinstance(field_value, str):
new_value, field_changes, changed = _replace_known_strings(field_value, locale_code)
if changed:
setattr(instance, field_name, new_value)
changes.extend({"field": field_name, **change} for change in field_changes)
if not changes:
return []
if isinstance(instance, Page):
revision = instance.save_revision()
if instance.live:
revision.publish()
return changes
instance.save()
return changes
def rewrite_with_agent(instance: Any, locale_code: str, issues, *, dry_run: bool = False):
if not rewrite_enabled():
return []
agent = get_language_agent(locale_code)
issue_map = agent.build_issue_map(issues)
changes = []
for field_name in ["title", "seo_title", "search_description"]:
value = getattr(instance, field_name, None)
if not isinstance(value, str):
continue
field_issues = issue_map.get(field_name, [])
rewritten = agent.rewrite(value, field_path=field_name, issues=field_issues)
if rewritten != value:
setattr(instance, field_name, rewritten)
changes.append({"field": field_name, "before": value, "after": rewritten, "method": "agent"})
for field_name in ["body", "content", "footer", "mini_footer"]:
if not hasattr(instance, field_name):
continue
field_value = getattr(instance, field_name)
if hasattr(field_value, "raw_data"):
rewritten, changed = agent.process_block(list(field_value.raw_data), field_name, issue_map)
if changed:
setattr(instance, field_name, rewritten)
changes.append({"field": field_name, "method": "agent"})
elif isinstance(field_value, str):
rewritten = agent.rewrite(field_value, field_path=field_name, issues=issue_map.get(field_name, []))
if rewritten != field_value:
setattr(instance, field_name, rewritten)
changes.append({"field": field_name, "before": field_value, "after": rewritten, "method": "agent"})
if not changes or dry_run:
return changes
if isinstance(instance, Page):
revision = instance.save_revision()
if instance.live:
revision.publish()
return changes
instance.save()
return changes
def enumerate_public_pages(locale_codes: list[str] | None = None, url_filters: list[str] | None = None):
result = {}
site = Site.objects.order_by("id").first()
site_root = getattr(site, "root_page", None)
normalized_filters = set(url_filters or [])
for locale_code in (locale_codes or audit_default_locales()):
locale_root_path = None
if site_root is not None:
translated_root = (
Page.objects.filter(
translation_key=site_root.translation_key,
locale__language_code=locale_code,
)
.specific()
.first()
)
chosen_root = translated_root or site_root
locale_root_path = getattr(chosen_root, "path", None)
qs = (
Page.objects.filter(locale__language_code=locale_code)
.live()
.public()
.specific()
.order_by("path")
)
pages = []
for page in qs:
page_url = getattr(page, "url", None)
if not page_url:
continue
if locale_root_path and not page.path.startswith(locale_root_path):
continue
if normalized_filters and page_url not in normalized_filters:
continue
pages.append(page)
result[locale_code] = pages
return result
def fetch_rendered_text(page: Page):
page_url = getattr(page, "url", None)
if not page_url:
return 598, "missing page URL"
if str(page_url).startswith("http"):
full_url = page_url
else:
try:
site = page.get_site()
except Site.DoesNotExist:
site = None
site = site or Site.objects.order_by("id").first()
if site is None or not getattr(site, "root_url", None):
return 598, "missing site root_url"
full_url = f"{site.root_url}{page_url}"
request = Request(full_url, headers={"User-Agent": "mandelstudio-audit/1.0"})
try:
with urlopen(request, timeout=30) as response:
status = response.getcode()
body = response.read().decode("utf-8", errors="replace")
except HTTPError as exc:
status = exc.code
body = exc.read().decode("utf-8", errors="replace")
except URLError as exc:
status = 599
body = str(exc)
text = extract_visible_rendered_text(body)
return status, text
def iter_rendered_lines(rendered_text: str) -> list[str]:
lines = []
for chunk in re.split(r"(?<=[\.\!\?])\s+|\s{2,}", rendered_text):
normalized = normalize_text(chunk)
if normalized:
lines.append(normalized)
return lines
def validate_rendered_output(locale_code: str, rendered_text: str, status_code: int):
issues = []
if status_code != 200:
issues.append(make_issue("render_status", "rendered", str(status_code)))
source_counter = Counter()
for line in iter_rendered_lines(rendered_text):
line_issues = validate_patterns(locale_code, "rendered", line)
for issue in line_issues:
issue.bad_value = line
issue.extra = {**(issue.extra or {}), "source": "rendered"}
source_counter[(issue.issue_type, issue.bad_value)] += 1
issues.extend(line_issues)
for issue in issues:
if issue.extra is not None:
issue.extra["count"] = source_counter.get((issue.issue_type, issue.bad_value), 1)
for fragment in GLOBAL_BAD_PATTERNS:
if fragment in rendered_text:
issue = make_issue("rendered_bad_pattern", "rendered", fragment, KNOWN_REPLACEMENTS.get(fragment, {}).get(locale_code, ""))
issue.extra = {"source": "rendered", "count": 1}
issues.append(issue)
for fragment in LOCALE_FORBIDDEN.get(locale_code, ()):
if fragment in rendered_text:
issue = make_issue("rendered_wrong_language", "rendered", fragment, KNOWN_REPLACEMENTS.get(fragment, {}).get(locale_code, ""))
issue.extra = {"source": "rendered", "count": 1}
issues.append(issue)
return dedupe_issues(issues)
def annotate_rewrite_previews(locale_code: str, issues):
agent = get_language_agent(locale_code)
for issue in issues:
if issue.issue_type not in REWRITE_REVIEW_TYPES:
continue
if issue.replacement:
continue
preview = agent.rewrite(issue.bad_value, field_path=issue.field_path, issues=[issue])
if preview and preview != issue.bad_value:
issue.replacement = preview
issue.extra = {**(issue.extra or {}), "review_candidate": True}
return issues
def validate_instance_or_raise(instance: Any):
issues = validate_page(instance) if isinstance(instance, Page) else validate_snippet_instance(instance)
blocking = [issue for issue in issues if issue.blocks]
if not blocking:
return issues
raise ValidationError({"content_guard": [format_issue(issue) for issue in blocking]})
def validate_ai_text_or_raise(locale_code: str, field_path: str, value: str):
issues = validate_text_nodes(locale_code, [(field_path, value)])
blocking = [issue for issue in issues if issue.blocks]
if not blocking:
return issues
raise ValidationError({"content_guard": [format_issue(issue) for issue in blocking]})
def record_issues(run: LocaleAuditRun, locale_code: str, obj: Any, issues, *, fixed: bool = False) -> None:
for issue in issues:
LocaleAuditIssue.objects.create(
run=run,
locale_code=locale_code,
object_id=getattr(obj, "pk", None),
object_type=obj.__class__.__name__,
url=getattr(obj, "url", "") or "",
title=getattr(obj, "title", str(obj))[:255],
severity=issue.severity,
issue_type=issue.issue_type,
field_path=issue.field_path,
bad_value=issue.bad_value,
replacement=issue.replacement,
fixed=fixed,
extra=issue.extra or {},
)
def audit_locales(locale_codes: list[str], fix: bool = False, rewrite: bool = False, dry_run: bool = False, url_filters: list[str] | None = None) -> LocaleAuditRun:
run = LocaleAuditRun.objects.create(locale_codes=locale_codes, fix_enabled=fix or rewrite)
pages_by_locale = enumerate_public_pages(locale_codes, url_filters=url_filters)
summary: dict[str, Any] = {}
total_checked = 0
total_issues = 0
pages_with_issues = 0
for locale_code, pages in pages_by_locale.items():
locale_summary = {"total_urls_checked": len(pages), "issues_found": 0, "issues_fixed": 0, "remaining_issues": 0, "by_severity": {"block": 0, "warn": 0, "log": 0}}
for page in pages:
total_checked += 1
status_code, rendered = fetch_rendered_text(page)
issues = dedupe_issues(validate_page(page) + validate_rendered_output(locale_code, rendered, status_code))
if rewrite:
issues = annotate_rewrite_previews(locale_code, issues)
initial_issue_count = len(issues)
fixed_changes = []
if issues and fix:
fixed_changes = apply_known_replacements(page.specific, locale_code)
if fixed_changes:
record_issues(run, locale_code, page, issues, fixed=True)
status_code, rendered = fetch_rendered_text(page.specific)
issues = dedupe_issues(validate_page(page.specific) + validate_rendered_output(locale_code, rendered, status_code))
if rewrite:
issues = annotate_rewrite_previews(locale_code, issues)
if issues and rewrite:
rewrite_changes = rewrite_with_agent(page.specific, locale_code, issues, dry_run=dry_run)
if rewrite_changes:
record_issues(run, locale_code, page, issues, fixed=not dry_run)
if not dry_run:
status_code, rendered = fetch_rendered_text(page.specific)
issues = dedupe_issues(validate_page(page.specific) + validate_rendered_output(locale_code, rendered, status_code))
issues = annotate_rewrite_previews(locale_code, issues)
if issues:
pages_with_issues += 1
record_issues(run, locale_code, page, issues)
locale_summary["issues_found"] += initial_issue_count
locale_summary["issues_fixed"] += initial_issue_count - len(issues)
locale_summary["remaining_issues"] += len(issues)
for issue in issues:
locale_summary["by_severity"][issue.severity] = locale_summary["by_severity"].get(issue.severity, 0) + 1
total_issues += initial_issue_count
summary[locale_code] = locale_summary
snippet_summary = {}
for model in get_snippet_models():
count = 0
for instance in model.objects.all():
issues = validate_snippet_instance(instance)
if rewrite:
issues = annotate_rewrite_previews(expected_locale(instance), issues)
if issues and rewrite:
rewrite_changes = rewrite_with_agent(instance, expected_locale(instance), issues, dry_run=dry_run)
if rewrite_changes and not dry_run:
issues = validate_snippet_instance(instance)
if not issues:
continue
count += len(issues)
record_issues(run, expected_locale(instance), instance, issues)
if count:
snippet_summary[model.__name__] = count
total_issues += count
summary["snippets"] = snippet_summary
run.total_urls_checked = total_checked
run.issues_found = total_issues
run.pages_with_issues = pages_with_issues
run.summary = summary
run.finished_at = timezone.now()
run.save(update_fields=["total_urls_checked", "issues_found", "pages_with_issues", "summary", "finished_at"])
logger.info("Completed multilingual audit run %s", run.pk)
return run

View File

@@ -0,0 +1,146 @@
from __future__ import annotations
import re
from ...types import make_issue
CTA_RULES = {
"nl": (
r"^Plan ",
r"^Bekijk ",
r"^Vraag ",
r"^Bespreek ",
r"^Contact$",
r"^Start ",
r"^Meer ",
r"^Verstuur ",
r"^Neem ",
),
"en": (
r"^Book ",
r"^View ",
r"^Schedule ",
r"^Start ",
r"^Talk ",
r"^Discuss ",
r"^Contact$",
r"^Explore ",
r"^Learn ",
r"^Request ",
r"^Send ",
),
"de": (
r"^Plan",
r"^Mehr",
r"^Support",
r"^Start",
r"^Kontakt",
r"^Gespr",
r"^Kostenlose",
r"^Anfrage",
r"^Projekte",
r"^Verein",
r"^Besprech",
r"^Anzeig",
r"^Ansehen",
r"^Technisch",
r"^Unterst",
r"^Unsere",
r"^Service",
r"^Dienstleistungen",
r"^Erstgespräch",
r"^Einführ",
r"^Anpassung",
r"^Ansichts",
r"^Prozess",
r"^Pakete",
r"^Demo",
r"^Alle ",
r"^Ein ",
r"^Webshop",
),
"fr": (
r"^Planifier",
r"^Voir",
r"^Découvrir",
r"^Demander",
r"^Lancer",
r"^Démarrer",
r"^Contacter",
r"^Contact$",
r"^Parler",
r"^Lancez",
r"^Prendre",
r"^Envoyer",
r"^Afficher",
),
"es": (
r"^Reservar",
r"^Ver",
r"^Solicitar",
r"^Inicia",
r"^Hablar",
r"^Descubrir",
r"^Contactar",
r"^Planificar",
r"^Programe",
r"^Concertar",
r"^Enviar",
r"^Mostrar",
r"^Comenta",
),
"it": (
r"^Prenota",
r"^Vedi",
r"^Avvia",
r"^Richiedi",
r"^Contatta",
r"^Contatto$",
r"^Scopri",
r"^Pianifica",
r"^Invia",
r"^Mostra",
r"^Parla",
r"^Parliamo",
),
"pt": (
r"^Agendar",
r"^Ver",
r"^Iniciar",
r"^Pedir",
r"^Contactar",
r"^Falar",
r"^Explorar",
r"^Marcar",
r"^Solicitar",
r"^Enviar",
r"^Mostrar",
),
"ru": (
r"^Заплан",
r"^Посмотр",
r"^Запуст",
r"^Связ",
r"^Подробнее",
r"^Показать",
r"^Отправ",
r"^Получ",
r"^Запрос",
),
}
CTA_FIELDS = {
"cta_text",
"primary_cta_text",
"secondary_cta_text",
"submit_button_text",
}
def validate_cta(locale_code: str, field_path: str, normalized: str):
last_segment = field_path.split(".")[-1]
if last_segment not in CTA_FIELDS:
return []
if any(re.search(pattern, normalized) for pattern in CTA_RULES.get(locale_code, ())):
return []
return [make_issue("cta_language_mismatch", field_path, normalized)]

View File

@@ -0,0 +1,21 @@
from __future__ import annotations
from ...types import make_issue
from .patterns import PLACEHOLDER_VALUES
from .language import detect_language_mismatch
FORM_FIELDS = {"label", "placeholder", "help_text"}
def validate_form_copy(locale_code: str, field_path: str, normalized: str):
last_segment = field_path.split(".")[-1]
if last_segment not in FORM_FIELDS:
return []
issues = []
if normalized in PLACEHOLDER_VALUES or normalized == "":
issues.append(make_issue("empty_form_copy", field_path, normalized))
mismatch = detect_language_mismatch(locale_code, normalized)
if mismatch:
issues.append(make_issue("form_language_mismatch", field_path, mismatch["message"]))
return issues

View File

@@ -0,0 +1,43 @@
from __future__ import annotations
import re
STOPWORDS = {
"nl": {"de", "het", "een", "en", "voor", "met", "van", "je", "wij", "niet"},
"en": {"the", "and", "for", "with", "your", "you", "from", "that", "this", "not"},
"de": {"der", "die", "das", "und", "mit", "für", "nicht", "eine", "ist", "sie"},
"fr": {"le", "la", "les", "et", "avec", "pour", "vous", "une", "pas", "des"},
"es": {"el", "la", "los", "las", "con", "para", "una", "que", "del", "por"},
"it": {"il", "la", "con", "per", "una", "che", "del", "non", "gli", "dei"},
"pt": {"o", "a", "os", "as", "com", "para", "uma", "que", "não", "dos"},
"ru": {"и", "в", "на", "с", "для", "что", "это", "как", "по", "не"},
}
def _tokenize(text: str) -> list[str]:
text = re.sub(r"<[^>]+>", " ", text)
return re.findall(r"[\w\u0400-\u04FF']+", text.lower())
def detect_language_mismatch(locale_code: str, text: str):
tokens = _tokenize(text)
if len(tokens) < 12:
return None
scores = {code: sum(1 for token in tokens if token in words) for code, words in STOPWORDS.items()}
expected = scores.get(locale_code, 0)
foreign_locale, foreign_score = max(scores.items(), key=lambda item: item[1])
if foreign_locale == locale_code:
return None
if expected >= foreign_score:
return None
if foreign_score >= 6 and foreign_score >= expected + 4:
return {
"severity": "block",
"message": f"expected={locale_code}, detected={foreign_locale}, score={foreign_score}, expected_score={expected}",
}
if expected == 0 and foreign_score >= 5:
return {
"severity": "warn",
"message": f"expected={locale_code}, detected={foreign_locale}, score={foreign_score}, expected_score={expected}",
}
return None

View File

@@ -0,0 +1,269 @@
from __future__ import annotations
import re
from ...types import make_issue
from ...system_strings import (
build_system_rewrite_candidates,
is_canonical_system_string,
system_string_replacement,
)
GLOBAL_BAD_PATTERNS = (
"The Spanish translation",
"The Spanish translation of",
"As the input",
"The input",
"Poiché l'input",
'Unternehmen" è tedesco',
"Support anzeigen",
"Starter intake",
"Business intake",
"Plan Starter intake",
"Plan Business intake",
"Plan de admisión",
"None",
)
LOCALE_FORBIDDEN = {
"nl": ("Starter intake", "Business intake", "Poiché", "Correo electrónico", "Mostrar los servicios", "Plan de admisión"),
"en": ("Starter intake", "Business intake", "Poiché", "Correo electrónico", "Mostrar los servicios", "Questions fréquemment posées", "Plan de admisión"),
"de": ("Starter intake", "Business intake", "Poiché", "Correo electrónico", "Mostrar los servicios", "Questions fréquemment posées", "Plan de admisión"),
"fr": ("Starter intake", "Business intake", "Poiché", "Correo electrónico", "Mostrar los servicios", "Plan de admisión", "Support anzeigen"),
"es": ("Poiché", 'Unternehmen" è tedesco', "Support anzeigen", "Questions fréquemment posées"),
"it": ("Poiché l'input", "Consulta inicial sin compromiso", "Mostrar los servicios", "Questions fréquentes", "Plan de admisión", "Correo electrónico"),
"pt": ("Poiché l'input", "Consulta inicial sin compromiso", "Mostrar los servicios", "Correo electrónico", 'Unternehmen" è tedesco', "Questions fréquemment posées"),
"ru": ("Poiché l'input", "Consulta inicial sin compromiso", "Correo electrónico", 'Unternehmen" è tedesco', "Mostrar los servicios"),
}
PLACEHOLDER_VALUES = {"None", "-", "N/A", "null"}
GENERIC_BADGE_LABELS = {
"New",
"Popular",
"PLAN",
"PIANO",
"SERVICES",
}
GLOBAL_REWRITE_CANDIDATES = {
**build_system_rewrite_candidates(
(
"days_label",
"average_delivery",
"response_time",
"without_commitment",
"transparent_label",
"weeks_1_2",
"customer_reviews",
"editable_label",
"core_pages_label",
"detailed_page_structure",
"business_process_cta",
"multilingual_rollout",
"customization_integrations",
"transparent_investment",
)
),
}
LOCALE_REWRITE_CANDIDATES = {
"en": {
"Service packages (from) Transparent starting points.": "foreign_ui_label",
"Frequently Asked Questions Transparent about planning, approach, and management.": "foreign_ui_label",
"Transparent investment": "foreign_ui_label",
},
"de": {
"New": "weak_marketing_copy",
"Intakegespräch": "weak_marketing_copy",
"SEO-ready basis": "foreign_ui_label",
"Sales-ready mit skalierbarem Stack": "foreign_ui_label",
"Continuous Verbesserung": "foreign_ui_label",
"Was du bekommst": "weak_marketing_copy",
"Einführungsmeeting": "weak_marketing_copy",
"Starter Website": "weak_marketing_copy",
"Business Website": "weak_marketing_copy",
"Häufig gestellte Fragen Transparent über Planung, Vorgehensweise und Management.": "foreign_ui_label",
},
"es": {
"Preguntas frecuentes Transparente sobre la planificación, el proceso y la gestión.": "foreign_ui_label",
"Unverbindliches Gespräch, klares Angebot": "foreign_ui_label",
},
"pt": {
"Siti web e negozi online": "mixed_locale_heading",
"Caso de cliente en directo": "weak_marketing_copy",
"El primer proyecto de producción finalizado con éxito.": "weak_marketing_copy",
"Más sobre el proceso": "foreign_ui_label",
"Modifiez simplement vous-même.": "foreign_ui_label",
"Opciones de la tienda web": "foreign_ui_label",
"Planes de soporte": "foreign_ui_label",
"Multilingüe": "foreign_ui_label",
"Unsere Serviços": "mixed_locale_heading",
"Elija el camino": "mixed_locale_heading",
"Début en direct": "foreign_ui_label",
"Demande d'admission initiale": "foreign_ui_label",
"Site Web d'Entreprise": "foreign_ui_label",
"Hablar sobre el proceso empresarial": "foreign_ui_label",
"Mise en place de boutique en ligne": "foreign_ui_label",
"Maintenance & gestion": "foreign_ui_label",
"Afficher le plan de soutien": "foreign_ui_label",
"Introducción multilingüe": "foreign_ui_label",
"Forfaits de services (à partir de)": "mixed_locale_heading",
"Kundenschätzung": "foreign_ui_label",
"Gestisca lei stesso il contenuto": "foreign_ui_label",
"Optimizado para móviles": "foreign_ui_label",
"Schnell online mit einer starken Basis": "weak_marketing_copy",
"La entrada \"Unterstützung oder Erweiterung\"": "foreign_ui_label",
"Suivi + corrections": "foreign_ui_label",
"Mejoras mensuales": "foreign_ui_label",
"¿A qué velocidad puede comenzar?": "foreign_ui_label",
"¿Puedo editar textos e imágenes yo mismo?": "foreign_ui_label",
"Transparente sobre o planejamento, o processo e a gestão.": "foreign_ui_label",
"Ab 2.250 €": "foreign_ui_label",
"Boutique en ligne": "foreign_ui_label",
"Sales-ready mit skalierbarem Stack": "foreign_ui_label",
},
"fr": {
"Erstes Produktionsprojekt erfolgreich abgeschlossen.": "weak_marketing_copy",
"Von Kickoff bis zum Launch mit einem klaren Umfang.": "foreign_ui_label",
"Demande d'admission initiale": "weak_marketing_copy",
"Entretien d'accueil": "weak_marketing_copy",
"Vraag over diensten": "foreign_ui_label",
"Konkrete erste Schätzung": "foreign_ui_label",
"Ansatz, der zu Ihrem Budget passt": "foreign_ui_label",
**build_system_rewrite_candidates(("weeks_2_4",)),
"Bereit, mit der Business-Website zu starten?": "foreign_ui_label",
},
"it": {
"Planificación clara": "foreign_ui_label",
"Mehrsprachiger Rollout-Plan": "foreign_ui_label",
"Unverbindliches Gespräch, klares Angebot": "foreign_ui_label",
},
"ru": {
"Base prête pour le SEO": "foreign_ui_label",
"Unverbindliches Gespräch, klares Angebot": "foreign_ui_label",
},
}
KNOWN_REPLACEMENTS = {
"Starter intake": {
"nl": "Plan startergesprek",
"en": "Book starter call",
"de": "Starter-Gespräch planen",
"fr": "Planifier lentretien de départ",
"es": "Reservar llamada inicial",
"it": "Prenota una chiamata iniziale",
"pt": "Agendar chamada inicial",
"ru": "Запланировать стартовый звонок",
},
"Business intake": {
"nl": "Plan zakelijk gesprek",
"en": "Book business call",
"de": "Beratungsgespräch planen",
"fr": "Planifier lentretien commercial",
"es": "Reservar llamada comercial",
"it": "Prenota una chiamata commerciale",
"pt": "Agendar chamada comercial",
"ru": "Запланировать деловой звонок",
},
"Plan Starter intake": {
"nl": "Plan startergesprek",
"en": "Book starter call",
"de": "Starter-Gespräch planen",
"fr": "Planifier lentretien de départ",
"es": "Reservar llamada inicial",
"it": "Prenota una chiamata iniziale",
"pt": "Agendar chamada inicial",
"ru": "Запланировать стартовый звонок",
},
"Plan Business intake": {
"nl": "Plan zakelijk gesprek",
"en": "Book business call",
"de": "Beratungsgespräch planen",
"fr": "Planifier lentretien commercial",
"es": "Reservar llamada comercial",
"it": "Prenota una chiamata commerciale",
"pt": "Agendar chamada comercial",
"ru": "Запланировать деловой звонок",
},
"Mostrar los servicios": {
"es": "Mostrar los servicios",
"it": "Vedi servizi",
"pt": "Ver serviços",
"ru": "Показать услуги",
},
"Correo electrónico": {"pt": "E-mail", "ru": "Электронная почта"},
'Unternehmen" è tedesco, non olandese. La traduzione spagnola di "Unternehmen" è "empresa".': {
"pt": "Empresa",
"ru": "Компания",
},
'Poiché l\'input "Unverbindliche Erstberatung" è in tedesco (non in olandese), la traduzione in spagnolo è: "Consulta inicial sin compromiso".': {
"it": "Senza impegno",
"pt": "Sem compromisso",
"ru": "Без обязательств",
"es": "Consulta inicial sin compromiso",
},
}
def _contains_fragment(text: str, fragment: str) -> bool:
if re.fullmatch(r"[\wÀ-ÿ-]+", fragment, flags=re.UNICODE):
pattern = re.compile(rf"(?<![\wÀ-ÿ-]){re.escape(fragment)}(?![\wÀ-ÿ-])", re.UNICODE)
return bool(pattern.search(text))
return fragment in text
def validate_patterns(locale_code: str, field_path: str, normalized: str):
issues = []
for fragment in GLOBAL_BAD_PATTERNS:
if _contains_fragment(normalized, fragment):
issues.append(
make_issue(
"known_bad_pattern",
field_path,
fragment,
KNOWN_REPLACEMENTS.get(fragment, {}).get(locale_code, ""),
)
)
for fragment in LOCALE_FORBIDDEN.get(locale_code, ()):
if _contains_fragment(normalized, fragment):
issues.append(
make_issue(
"wrong_language_fragment",
field_path,
fragment,
KNOWN_REPLACEMENTS.get(fragment, {}).get(locale_code, ""),
)
)
if normalized in GENERIC_BADGE_LABELS and not is_canonical_system_string(locale_code, normalized):
issues.append(
make_issue(
"generic_badge_label",
field_path,
normalized,
system_string_replacement(locale_code, normalized),
)
)
for fragment, issue_type in GLOBAL_REWRITE_CANDIDATES.items():
if _contains_fragment(normalized, fragment):
if is_canonical_system_string(locale_code, fragment):
continue
issues.append(
make_issue(
issue_type,
field_path,
fragment,
system_string_replacement(locale_code, fragment),
)
)
for fragment, issue_type in LOCALE_REWRITE_CANDIDATES.get(locale_code, {}).items():
if _contains_fragment(normalized, fragment):
issues.append(
make_issue(
issue_type,
field_path,
fragment,
system_string_replacement(locale_code, fragment),
)
)
return issues

7
mandelstudio/apps.py Normal file
View File

@@ -0,0 +1,7 @@
from django.apps import AppConfig
class MandelstudioConfig(AppConfig):
default_auto_field = "django.db.models.BigAutoField"
name = "mandelstudio"
verbose_name = "Mandelstudio"

View File

@@ -0,0 +1 @@
from mandelblog_content_guard import * # noqa: F401,F403

View File

@@ -0,0 +1 @@
from mandelblog_content_guard.agents import * # noqa: F401,F403

View File

@@ -0,0 +1 @@
from mandelblog_content_guard.agents.base import * # noqa: F401,F403

View File

@@ -0,0 +1 @@
from mandelblog_content_guard.agents.de import * # noqa: F401,F403

View File

@@ -0,0 +1 @@
from mandelblog_content_guard.agents.en import * # noqa: F401,F403

View File

@@ -0,0 +1 @@
from mandelblog_content_guard.agents.es import * # noqa: F401,F403

View File

@@ -0,0 +1 @@
from mandelblog_content_guard.agents.fr import * # noqa: F401,F403

View File

@@ -0,0 +1 @@
from mandelblog_content_guard.agents.it import * # noqa: F401,F403

View File

@@ -0,0 +1 @@
from mandelblog_content_guard.agents.nl import * # noqa: F401,F403

View File

@@ -0,0 +1 @@
from mandelblog_content_guard.agents.pt import * # noqa: F401,F403

View File

@@ -0,0 +1 @@
from mandelblog_content_guard.agents.ru import * # noqa: F401,F403

View File

@@ -0,0 +1 @@
from mandelblog_content_guard.ai import * # noqa: F401,F403

View File

@@ -0,0 +1 @@
from mandelblog_content_guard.hooks import * # noqa: F401,F403

View File

@@ -0,0 +1 @@
from mandelblog_content_guard.mixins import * # noqa: F401,F403

View File

@@ -0,0 +1 @@
from mandelblog_content_guard.normalizers import * # noqa: F401,F403

View File

@@ -0,0 +1 @@
from mandelblog_content_guard.normalizers.de import * # noqa: F401,F403

View File

@@ -0,0 +1 @@
from mandelblog_content_guard.normalizers.en import * # noqa: F401,F403

View File

@@ -0,0 +1 @@
from mandelblog_content_guard.normalizers.es import * # noqa: F401,F403

View File

@@ -0,0 +1 @@
from mandelblog_content_guard.normalizers.it import * # noqa: F401,F403

View File

@@ -0,0 +1 @@
from mandelblog_content_guard.normalizers.nl import * # noqa: F401,F403

View File

@@ -0,0 +1 @@
from mandelblog_content_guard.normalizers.ru import * # noqa: F401,F403

View File

@@ -0,0 +1 @@
from mandelblog_content_guard.settings import * # noqa: F401,F403

View File

@@ -0,0 +1 @@
from mandelblog_content_guard.signals import * # noqa: F401,F403

View File

@@ -0,0 +1 @@
from mandelblog_content_guard.system_strings import * # noqa: F401,F403

View File

@@ -0,0 +1 @@
from mandelblog_content_guard.types import * # noqa: F401,F403

View File

@@ -0,0 +1 @@
from mandelblog_content_guard.validators import * # noqa: F401,F403

View File

@@ -0,0 +1 @@
from mandelblog_content_guard.validators.multilingual import * # noqa: F401,F403

View File

@@ -0,0 +1 @@
from mandelblog_content_guard.validators.rules import * # noqa: F401,F403

View File

@@ -0,0 +1 @@
from mandelblog_content_guard.validators.rules.cta import * # noqa: F401,F403

View File

@@ -0,0 +1 @@
from mandelblog_content_guard.validators.rules.forms import * # noqa: F401,F403

View File

@@ -0,0 +1 @@
from mandelblog_content_guard.validators.rules.language import * # noqa: F401,F403

View File

@@ -0,0 +1 @@
from mandelblog_content_guard.validators.rules.patterns import * # noqa: F401,F403

View File

@@ -1,5 +1,11 @@
import os import os
import sys import sys
from pathlib import Path
PROJECT_ROOT = Path(__file__).resolve().parent.parent
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))
def _patch_legacy_django_translation_aliases(): def _patch_legacy_django_translation_aliases():

View File

View File

@@ -0,0 +1 @@
from mandelblog_content_guard.management.commands.audit_locales import Command # noqa: F401

101
mandelstudio/models.py Normal file
View File

@@ -0,0 +1,101 @@
import uuid
from django.db import models
from django.utils.translation import gettext_lazy as _
from wagtail.admin.panels import FieldPanel
from wagtail.blocks import RichTextBlock
from wagtail.contrib.settings.models import BaseSiteSetting
from wagtail.fields import StreamField
from wagtail.models import Locale, Site, TranslatableMixin
from wagtail.snippets.models import register_snippet
from ocyan.plugin.wagtail.block_plugin import get_extra_ocyan_settings_blocks
from ocyan.plugin.wagtail.blocks import (
AboutUsBlock,
HeadedPagelistBlock,
HeadedRichTextBlock,
)
from mandelblog_content_guard.mixins import MultilingualValidationMixin
@register_snippet
class LocalizedFooterContent(MultilingualValidationMixin, TranslatableMixin, models.Model):
title = models.CharField(max_length=120, default="Footer content")
site = models.ForeignKey(
Site, on_delete=models.CASCADE, related_name="localized_footer_contents"
)
locale = models.ForeignKey(Locale, on_delete=models.PROTECT, related_name="+")
translation_key = models.UUIDField(default=uuid.uuid4, editable=False)
footer = StreamField(
[
("about_us", AboutUsBlock()),
("text", HeadedRichTextBlock()),
("page_list", HeadedPagelistBlock()),
]
+ get_extra_ocyan_settings_blocks(),
default=list,
use_json_field=True,
)
mini_footer = StreamField(
[("text", RichTextBlock())],
default=list,
use_json_field=True,
)
panels = [
FieldPanel("title"),
FieldPanel("site"),
FieldPanel("locale"),
FieldPanel("footer"),
FieldPanel("mini_footer"),
]
class Meta(TranslatableMixin.Meta):
verbose_name = _("Localized footer content")
verbose_name_plural = _("Localized footer contents")
constraints = [
models.UniqueConstraint(
fields=["site", "locale"],
name="unique_localized_footer_per_site_locale",
),
]
def __str__(self):
return f"{self.site.hostname} [{self.locale.language_code}]"
class LocaleAuditRun(models.Model):
started_at = models.DateTimeField(auto_now_add=True)
finished_at = models.DateTimeField(null=True, blank=True)
locale_codes = models.JSONField(default=list, blank=True)
fix_enabled = models.BooleanField(default=False)
total_urls_checked = models.PositiveIntegerField(default=0)
issues_found = models.PositiveIntegerField(default=0)
pages_with_issues = models.PositiveIntegerField(default=0)
summary = models.JSONField(default=dict, blank=True)
class Meta:
ordering = ["-started_at"]
class LocaleAuditIssue(models.Model):
run = models.ForeignKey(
LocaleAuditRun, related_name="issues", on_delete=models.CASCADE
)
locale_code = models.CharField(max_length=12)
object_id = models.PositiveIntegerField(null=True, blank=True)
object_type = models.CharField(max_length=128, blank=True)
url = models.TextField(blank=True)
title = models.CharField(max_length=255, blank=True)
severity = models.CharField(max_length=16)
issue_type = models.CharField(max_length=64)
field_path = models.CharField(max_length=512, blank=True)
bad_value = models.TextField(blank=True)
replacement = models.TextField(blank=True)
fixed = models.BooleanField(default=False)
extra = models.JSONField(default=dict, blank=True)
class Meta:
ordering = ["locale_code", "url", "field_path"]

View File

@@ -9,6 +9,7 @@ https://docs.djangoproject.com/en/2.0/ref/settings/
""" """
from pathlib import Path from pathlib import Path
import sys
from configtype.jsonconfig import setup_search_paths from configtype.jsonconfig import setup_search_paths
@@ -20,7 +21,10 @@ setup_search_paths("/etc/ocyan/", str(_project_app_path))
from ocyan.main.settings import * # pylint:disable=W0401,W0614 from ocyan.main.settings import * # pylint:disable=W0401,W0614
INSTALLED_APPS = ["mandelstudio"] + INSTALLED_APPS INSTALLED_APPS = [
"mandelblog_content_guard.apps.MandelblogContentGuardConfig",
"mandelstudio",
] + INSTALLED_APPS
# Enable request language negotiation. # Enable request language negotiation.
if "django.middleware.locale.LocaleMiddleware" not in MIDDLEWARE: if "django.middleware.locale.LocaleMiddleware" not in MIDDLEWARE:
@@ -64,3 +68,14 @@ ACTIVE_VERTICAL = "agency"
# Wagtail content internationalization in admin # Wagtail content internationalization in admin
WAGTAIL_I18N_ENABLED = True WAGTAIL_I18N_ENABLED = True
WAGTAIL_CONTENT_LANGUAGES = LANGUAGES WAGTAIL_CONTENT_LANGUAGES = LANGUAGES
CONTENT_GUARD_STRICT = True
CONTENT_GUARD_BLOCK_MEDIUM = False
CONTENT_GUARD_LOCALES = [code for code, _label in LANGUAGES]
CONTENT_GUARD_REWRITE_ENABLED = True
CONTENT_GUARD_REWRITE_BACKEND = None
if "test" in sys.argv:
MIGRATION_MODULES = globals().get("MIGRATION_MODULES", {}).copy()
MIGRATION_MODULES["template_engine"] = "mandelstudio.test_migrations.template_engine"
TEST_RUNNER = "django.test.runner.DiscoverRunner"

View File

@@ -0,0 +1,2 @@
from importlib import import_module
Migration = import_module("ocyan.plugin.template_engine.engine.migrations.0001_initial").Migration

View File

@@ -0,0 +1,2 @@
from importlib import import_module
Migration = import_module("ocyan.plugin.template_engine.engine.migrations.0002_templateenginesitesettings").Migration

View File

@@ -0,0 +1,2 @@
from importlib import import_module
Migration = import_module("ocyan.plugin.template_engine.engine.migrations.0003_templateenginesitesettings_nav_items").Migration

View File

@@ -0,0 +1,2 @@
from importlib import import_module
Migration = import_module("ocyan.plugin.template_engine.engine.migrations.0004_alter_basehomepage_body_alter_basestandardpage_body").Migration

View File

@@ -0,0 +1,2 @@
from importlib import import_module
Migration = import_module("ocyan.plugin.template_engine.engine.migrations.0005_templateenginesitesettings_header_variant_and_more").Migration

View File

@@ -0,0 +1,2 @@
from importlib import import_module
Migration = import_module("ocyan.plugin.template_engine.engine.migrations.0006_templateenginesitesettings_footer_dynamic_fields").Migration

View File

@@ -0,0 +1,2 @@
from importlib import import_module
Migration = import_module("ocyan.plugin.template_engine.engine.migrations.0007_templateenginesitesettings_header_cta_fields").Migration

View File

@@ -0,0 +1,2 @@
from importlib import import_module
Migration = import_module("ocyan.plugin.template_engine.engine.migrations.0008_templateenginesitesettings_footer_bottom_links_and_more").Migration

View File

@@ -0,0 +1,2 @@
from importlib import import_module
Migration = import_module("ocyan.plugin.template_engine.engine.migrations.0009_alter_basehomepage_body_alter_basestandardpage_body_and_more").Migration

View File

@@ -0,0 +1,2 @@
from importlib import import_module
Migration = import_module("ocyan.plugin.template_engine.engine.migrations.0010_enginepage_and_more").Migration

View File

@@ -0,0 +1,2 @@
from importlib import import_module
Migration = import_module("ocyan.plugin.template_engine.engine.migrations.0011_alter_basehomepage_body_alter_basestandardpage_body_and_more").Migration

View File

@@ -0,0 +1,2 @@
from importlib import import_module
Migration = import_module("ocyan.plugin.template_engine.engine.migrations.0012_alter_basehomepage_body_alter_basestandardpage_body_and_more").Migration

View File

@@ -0,0 +1,2 @@
from importlib import import_module
Migration = import_module("ocyan.plugin.template_engine.engine.migrations.0013_engineblockpreset").Migration

View File

@@ -0,0 +1,2 @@
from importlib import import_module
Migration = import_module("ocyan.plugin.template_engine.engine.migrations.0014_alter_basehomepage_body_alter_basestandardpage_body_and_more").Migration

View File

@@ -0,0 +1,21 @@
from django.db import migrations
def _ensure_navitem_table(apps, schema_editor):
try:
model = apps.get_model("template_engine", "TemplateEngineNavItem")
except LookupError:
return
existing = set(schema_editor.connection.introspection.table_names())
if model._meta.db_table not in existing:
schema_editor.create_model(model)
class Migration(migrations.Migration):
dependencies = [
("template_engine", "0014_alter_basehomepage_body_alter_basestandardpage_body_and_more"),
]
operations = [
migrations.RunPython(_ensure_navitem_table, migrations.RunPython.noop),
]

View File

@@ -0,0 +1,2 @@
from importlib import import_module
Migration = import_module("ocyan.plugin.template_engine.engine.migrations.0016_alter_basehomepage_body_alter_basestandardpage_body_and_more").Migration

View File

@@ -0,0 +1 @@

View File

@@ -0,0 +1,181 @@
from __future__ import annotations
import json
from io import StringIO
from unittest import mock
from django.core.management import call_command
from django.test import SimpleTestCase, override_settings
from mandelblog_content_guard.agents import get_language_agent
from mandelblog_content_guard.ai import rewrite_ai_output
from mandelblog_content_guard.system_strings import build_system_rewrite_candidates, build_system_vocabulary
from mandelblog_content_guard.types import split_issues
from mandelblog_content_guard.validators.multilingual import extract_visible_rendered_text, validate_text_nodes
class ContentGuardRuleTests(SimpleTestCase):
def test_mixed_language_detection_blocks(self):
issues = validate_text_nodes(
"pt",
[("body.hero_text", 'Poiché l\'input "Unverbindliche Erstberatung" è in tedesco')],
)
blocking, _warnings = split_issues(issues)
self.assertTrue(blocking)
self.assertTrue(any(issue.issue_type == "known_bad_pattern" for issue in blocking))
def test_cta_mismatch_detection_blocks(self):
issues = validate_text_nodes("en", [("body.cta_text", "Plan kennismaking")])
blocking, _warnings = split_issues(issues)
self.assertTrue(any(issue.issue_type == "cta_language_mismatch" for issue in blocking))
def test_form_validation_blocks_wrong_language(self):
issues = validate_text_nodes("ru", [("body.form.label", "Correo electrónico")])
blocking, _warnings = split_issues(issues)
self.assertTrue(any(issue.issue_type in {"known_bad_pattern", "form_language_mismatch"} for issue in blocking))
@override_settings(CONTENT_GUARD_BLOCK_MEDIUM=True)
def test_medium_can_be_blocked_in_strict_mode(self):
issues = validate_text_nodes(
"en",
[("body.summary", "le la les et avec pour vous une pas des extra words to trigger heuristic")],
)
blocking, _warnings = split_issues(issues)
self.assertTrue(any(issue.issue_type == "language_heuristic" for issue in blocking))
def test_language_agent_registry(self):
agent = get_language_agent("pt")
self.assertEqual(agent.locale, "pt")
self.assertEqual(agent.normalize_cta("contact"), "Agendar reunião introdutória")
def test_agent_rewrite_cleans_explanation_artifact(self):
agent = get_language_agent("ru")
text = 'Soporte y crecimiento" is Spanish, not Dutch. The translation from Spanish to Russian is: "Поддержка и рост".'
self.assertEqual(agent.rewrite(text, "body.headline"), "Поддержка и рост")
def test_portuguese_agent_contextual_badge_rewrite(self):
agent = get_language_agent("pt")
self.assertEqual(agent.rewrite("SERVICES", "body.cards[0].badge"), "SERVIÇOS")
self.assertEqual(agent.rewrite("Transparent", "body.metrics[0].label"), "Investimento claro")
def test_french_agent_contextual_badge_rewrite(self):
agent = get_language_agent("fr")
self.assertEqual(agent.rewrite("PLAN", "body.cards[0].badge"), "FORFAIT")
self.assertEqual(agent.rewrite("Transparent", "body.cards[0].label"), "Clair")
def test_german_agent_normalizes_non_system_copy(self):
agent = get_language_agent("de")
self.assertEqual(agent.rewrite("New", "body.cards[0].badge"), "Neu")
self.assertEqual(agent.rewrite("Intakegespräch", "body.stats[0].label"), "Erstgespräch")
self.assertEqual(agent.rewrite("Was du bekommst", "body.heading"), "Was Sie erhalten")
self.assertEqual(
agent.rewrite("Sales-ready mit skalierbarem Stack", "body.cards[0].text"),
"Verkaufsbereit mit skalierbarer Architektur",
)
self.assertEqual(
agent.rewrite(
"Einführungsmeeting planen Projekte anzeigen Unverbindliches Gespräch, klares Angebot Wir entwickeln schnelle Websites und Webshops, die Ihr Team selbst pflegen kann.",
"rendered",
),
"Erstgespräch planen · Projekte ansehen · Unverbindliches Gespräch mit klarem Angebot. Wir entwickeln schnelle Websites und Webshops, die Ihr Team selbst pflegen kann.",
)
def test_rewrite_ai_output_validates_result(self):
rewritten = rewrite_ai_output(
"pt",
"body.cta_text",
'Sem cartão de crédito" is not Dutch; . The translation from German to Spanish is: "Sem cartão de crédito".',
)
self.assertEqual(rewritten, "Sem compromisso")
def test_portuguese_rewrite_candidates_are_detected(self):
issues = validate_text_nodes(
"pt",
[("body.hero_text", "Siti web e negozi online che sono rapidamente online e facili da gestire")],
)
self.assertTrue(any(issue.issue_type == "mixed_locale_heading" for issue in issues))
def test_french_foreign_ui_label_is_detected(self):
issues = validate_text_nodes(
"fr",
[("body.metric_label", "Durchschnittliche Lieferung")],
)
self.assertTrue(any(issue.issue_type == "foreign_ui_label" for issue in issues))
def test_de_canonical_system_strings_are_not_rewrite_candidates(self):
issues = validate_text_nodes(
"de",
[("body.metric_label", "Durchschnittliche Lieferung"), ("body.badge", "PLAN")],
)
self.assertFalse(any(issue.bad_value == "Durchschnittliche Lieferung" for issue in issues))
self.assertFalse(any(issue.bad_value == "PLAN" for issue in issues))
def test_extract_visible_rendered_text_ignores_hidden_script_and_style(self):
html = """
<html><body>
<style>.x{color:red}</style>
<script>var foo = 'bar';</script>
<h1>Visible title</h1>
<p style="display:none">Hidden copy</p>
<div aria-hidden="true"><p>Also hidden</p></div>
<a href="#">Visible link</a>
<noscript>Nope</noscript>
</body></html>
"""
extracted = extract_visible_rendered_text(html)
self.assertIn("Visible title", extracted)
self.assertIn("Visible link", extracted)
self.assertNotIn("Hidden copy", extracted)
self.assertNotIn("Also hidden", extracted)
self.assertNotIn("var foo", extracted)
def test_system_strings_are_centralized_for_fr_and_pt(self):
self.assertEqual(build_system_vocabulary("fr")["PLAN"], "FORFAIT")
self.assertEqual(build_system_vocabulary("fr")["Reaktionszeit"], "Temps de réponse")
self.assertEqual(build_system_vocabulary("pt")["Transparent"], "Transparente")
self.assertEqual(build_system_vocabulary("fr")["Transparente Investition"], "Investissement transparent")
self.assertEqual(build_system_vocabulary("pt")["Transparente Investition"], "Investimento transparente")
self.assertEqual(build_system_rewrite_candidates()["Durchschnittliche Lieferung"], "foreign_ui_label")
class AuditLocalesCommandTests(SimpleTestCase):
@mock.patch("mandelblog_content_guard.management.commands.audit_locales.audit_locales")
def test_json_output(self, audit_locales_mock):
run = mock.Mock()
run.pk = 12
run.total_urls_checked = 2
run.issues_found = 1
run.summary = {"en": {"total_urls_checked": 2, "issues_found": 1, "by_severity": {"block": 1}}}
issue = mock.Mock(
url="/en/contact/",
title="Contact",
severity="block",
issue_type="wrong_language_fragment",
field_path="body.form.label",
bad_value="Correo electrónico",
replacement="Email",
fixed=False,
)
run.issues.all.return_value.order_by.return_value = [issue]
audit_locales_mock.return_value = run
out = StringIO()
call_command("audit_locales", "--locale", "en", "--format=json", stdout=out)
rendered = out.getvalue().strip()
payload = json.loads(rendered)
self.assertEqual(payload["run_id"], 12)
self.assertEqual(payload["issues"]["en"][0]["bad_value"], "Correo electrónico")
@mock.patch("mandelblog_content_guard.management.commands.audit_locales.audit_locales")
def test_rewrite_flags_are_forwarded(self, audit_locales_mock):
run = mock.Mock()
run.pk = 13
run.total_urls_checked = 1
run.issues_found = 0
run.summary = {"pt": {"total_urls_checked": 1, "issues_found": 0, "issues_fixed": 0, "by_severity": {"block": 0, "warn": 0, "log": 0}}}
run.issues.all.return_value.order_by.return_value = []
audit_locales_mock.return_value = run
out = StringIO()
call_command("audit_locales", "--locale", "pt", "--rewrite", "--dry-run", stdout=out)
audit_locales_mock.assert_called_once_with(["pt"], fix=False, rewrite=True, dry_run=True)

View File

Some files were not shown because too many files have changed in this diff Show More