Add multilingual audit CI pipeline + extract mandelblog_content_guard

This commit is contained in:
2026-03-29 20:49:42 +02:00
parent 2a51989fa4
commit 1f05011a63
104 changed files with 3372 additions and 6 deletions

View File

@@ -0,0 +1,25 @@
from .base import BaseLanguageAgent
from .de import GermanAgent
from .en import EnglishAgent
from .es import SpanishAgent
from .fr import FrenchAgent
from .it import ItalianAgent
from .nl import DutchAgent
from .pt import PortugueseAgent
from .ru import RussianAgent
AGENT_REGISTRY = {
"nl": DutchAgent,
"en": EnglishAgent,
"de": GermanAgent,
"fr": FrenchAgent,
"es": SpanishAgent,
"it": ItalianAgent,
"pt": PortugueseAgent,
"ru": RussianAgent,
}
def get_language_agent(locale_code: str) -> BaseLanguageAgent:
agent_class = AGENT_REGISTRY.get(locale_code, BaseLanguageAgent)
return agent_class()

View File

@@ -0,0 +1,187 @@
from __future__ import annotations
import re
from collections import defaultdict
from typing import Any
from django.utils.module_loading import import_string
from ..settings import get_rewrite_backend
class BaseLanguageAgent:
locale = "nl"
tone = "business"
preferred_formality = "neutral"
cta_defaults: dict[str, str] = {}
vocabulary_map: dict[str, str] = {}
contextual_vocabulary_map: dict[str, dict[str, str]] = {}
cleanup_patterns: tuple[tuple[re.Pattern[str], str], ...] = (
(
re.compile(
r"""^.*?\bis\s+(?:German|Spanish|French|Italian|Portuguese|Dutch),\s+not\s+Dutch.*?(?::\s*|\"\.\s*)(?P<quote>.+?)\"?\.?\s*$""",
re.IGNORECASE,
),
"{quote}",
),
(
re.compile(
r"""^.*?\btranslation\s+from\s+.*?(?::\s*|\"\.\s*)(?P<quote>.+?)\"?\.?\s*$""",
re.IGNORECASE,
),
"{quote}",
),
(
re.compile(
r"""^.*?\btraducid[oa]\s+al\s+.*?(?::\s*|\"\.\s*)(?P<quote>.+?)\"?\.?\s*$""",
re.IGNORECASE,
),
"{quote}",
),
(
re.compile(
r"""^.*?\bперевод\s+с\s+.*?(?::\s*|\"\.\s*)(?P<quote>.+?)\"?\.?\s*$""",
re.IGNORECASE,
),
"{quote}",
),
(
re.compile(
r"""^\s*La\s+entrada\s+\"?(?P<quote>.+?)\"?\s+está\s+en\s+alemán.*$""",
re.IGNORECASE,
),
"{quote}",
),
)
def __init__(self) -> None:
self.backend = self._load_backend()
def _load_backend(self):
backend_path = get_rewrite_backend()
if not backend_path:
return None
return import_string(backend_path)
def backend_prompt(self, field_path: str, text: str) -> str:
return (
f"Rewrite the following {self.locale} website copy for a small-business "
f"website in a natural, professional, sales-driven tone. Preserve meaning, "
f"remove translation artifacts, keep it concise, and do not add commentary.\n"
f"Field: {field_path}\n"
f"Locale: {self.locale}\n"
f"Tone: {self.tone}\n"
f"Formality: {self.preferred_formality}\n"
f"Text: {text}"
)
def _contextual_replacements(self, field_path: str) -> dict[str, str]:
lowered = field_path.lower()
replacements: dict[str, str] = {}
for token, mapping in self.contextual_vocabulary_map.items():
if token in lowered:
replacements.update(mapping)
return replacements
def post_cleanup_text(self, text: str, field_path: str = "") -> str:
return text
def _apply_replacements(self, text: str, replacements: dict[str, str]) -> str:
cleaned = text
phrase_replacements = {}
token_replacements = {}
for source, target in replacements.items():
if not source:
continue
if re.fullmatch(r"[\wÀ-ÿ-]+", source, flags=re.UNICODE):
token_replacements[source] = target
else:
phrase_replacements[source] = target
for source, target in sorted(phrase_replacements.items(), key=lambda item: len(item[0]), reverse=True):
cleaned = cleaned.replace(source, target)
for source, target in sorted(token_replacements.items(), key=lambda item: len(item[0]), reverse=True):
pattern = re.compile(rf"(?<![\wÀ-ÿ-]){re.escape(source)}(?![\wÀ-ÿ-])", re.UNICODE)
cleaned = pattern.sub(target, cleaned)
return cleaned
def cleanup_text(self, text: str, field_path: str = "") -> str:
cleaned = text.strip()
for pattern, replacement in self.cleanup_patterns:
match = pattern.match(cleaned)
if not match:
continue
cleaned = replacement.format(**match.groupdict()).strip()
cleaned = self._apply_replacements(cleaned, self.vocabulary_map)
cleaned = self._apply_replacements(cleaned, self._contextual_replacements(field_path))
cleaned = self.post_cleanup_text(cleaned, field_path=field_path)
return re.sub(r"\s+", " ", cleaned).strip()
def normalize_cta(self, text: str, field_path: str = "") -> str:
normalized = self.cleanup_text(text, field_path=field_path)
lowered = normalized.lower()
for keyword, replacement in self.cta_defaults.items():
if keyword in lowered:
return replacement
return normalized
def rewrite(self, text: str, field_path: str = "", issues: list[Any] | None = None) -> str:
cleaned = self.cleanup_text(text, field_path=field_path)
lowered_path = field_path.lower()
if any(token in lowered_path for token in ("cta", "button", "link_text", "submit")):
cleaned = self.normalize_cta(cleaned, field_path=field_path)
elif issues and any(
issue.issue_type in {"generic_badge_label", "foreign_ui_label", "weak_marketing_copy", "mixed_locale_heading"}
for issue in issues
):
cleaned = self.cleanup_text(cleaned, field_path=field_path)
if self.backend:
rewritten = self.backend(
locale=self.locale,
field_path=field_path,
text=cleaned,
prompt=self.backend_prompt(field_path, cleaned),
)
if isinstance(rewritten, str) and rewritten.strip():
cleaned = rewritten.strip()
return cleaned
def process_block(self, block_data: Any, field_path: str = "", issue_map: dict[str, list[Any]] | None = None):
issue_map = issue_map or {}
if isinstance(block_data, dict):
changed = False
output = {}
for key, value in block_data.items():
child_path = f"{field_path}.{key}" if field_path else str(key)
new_value, child_changed = self.process_block(value, child_path, issue_map)
output[key] = new_value
changed = changed or child_changed
return output, changed
if isinstance(block_data, list):
changed = False
output = []
for index, value in enumerate(block_data):
child_path = f"{field_path}[{index}]"
new_value, child_changed = self.process_block(value, child_path, issue_map)
output.append(new_value)
changed = changed or child_changed
return output, changed
if isinstance(block_data, str):
issues = issue_map.get(field_path, [])
needs_rewrite = bool(issues) or any(
token in field_path for token in ("cta", "button", "label", "placeholder", "help_text")
)
if not needs_rewrite:
cleaned = self.cleanup_text(block_data)
return cleaned, cleaned != block_data
rewritten = self.rewrite(block_data, field_path=field_path, issues=issues)
return rewritten, rewritten != block_data
return block_data, False
def build_issue_map(self, issues: list[Any]) -> dict[str, list[Any]]:
issue_map: dict[str, list[Any]] = defaultdict(list)
for issue in issues:
if issue.field_path:
issue_map[issue.field_path].append(issue)
return issue_map

View File

@@ -0,0 +1,23 @@
from .base import BaseLanguageAgent
from ..normalizers import normalize_de_text
from ..system_strings import build_system_vocabulary
class GermanAgent(BaseLanguageAgent):
locale = "de"
tone = "professional and trustworthy"
preferred_formality = "formal Sie"
vocabulary_map = {
**build_system_vocabulary("de", ("transparent_investment",)),
}
cta_defaults = {
"starter": "Starter-Gespräch planen",
"business": "Beratungsgespräch planen",
"support": "Support anfragen",
"service": "Dienstleistungen anzeigen",
"project": "Projekt starten",
"kontakt": "Einführungsgespräch planen",
}
def post_cleanup_text(self, text: str, field_path: str = "") -> str:
return normalize_de_text(text, field_path=field_path)

View File

@@ -0,0 +1,34 @@
from .base import BaseLanguageAgent
from ..normalizers import normalize_en_text
from ..system_strings import build_contextual_system_vocabulary, build_system_vocabulary
class EnglishAgent(BaseLanguageAgent):
locale = "en"
tone = "business-friendly and direct"
preferred_formality = "neutral"
vocabulary_map = {
**build_system_vocabulary("en", ("plan_badge", "services_badge", "transparent_label", "transparent_investment")),
}
_system_contextual = build_contextual_system_vocabulary("en", ("plan_badge", "services_badge", "transparent_label"))
contextual_vocabulary_map = {
"badge": {**_system_contextual.get("badge", {})},
"label": {**_system_contextual.get("label", {})},
"metric": {**_system_contextual.get("metric", {})},
"stat": {**_system_contextual.get("stat", {})},
"title": {**_system_contextual.get("title", {})},
"heading": {**_system_contextual.get("heading", {})},
"rendered": {**_system_contextual.get("rendered", {})},
}
cta_defaults = {
"starter": "Book starter call",
"business": "Book business call",
"support": "View support",
"service": "View services",
"project": "Start your project",
"quote": "Request a quote",
"contact": "Book intro call",
}
def post_cleanup_text(self, text: str, field_path: str = "") -> str:
return normalize_en_text(text, field_path=field_path)

View File

@@ -0,0 +1,43 @@
from .base import BaseLanguageAgent
from ..normalizers import normalize_es_text
from ..system_strings import build_contextual_system_vocabulary, build_system_vocabulary
class SpanishAgent(BaseLanguageAgent):
locale = "es"
tone = "clear and business-focused"
preferred_formality = "formal"
vocabulary_map = {
**build_system_vocabulary(
"es",
(
"plan_badge",
"response_time",
"without_commitment",
"transparent_label",
"transparent_investment",
),
),
}
_system_contextual = build_contextual_system_vocabulary("es", ("plan_badge", "transparent_label"))
contextual_vocabulary_map = {
"badge": {**_system_contextual.get("badge", {})},
"label": {**_system_contextual.get("label", {})},
"metric": {**_system_contextual.get("metric", {})},
"stat": {**_system_contextual.get("stat", {})},
"title": {**_system_contextual.get("title", {})},
"heading": {**_system_contextual.get("heading", {})},
"rendered": {**_system_contextual.get("rendered", {})},
}
cta_defaults = {
"starter": "Reservar llamada inicial",
"business": "Reservar llamada comercial",
"support": "Solicitar soporte",
"service": "Mostrar los servicios",
"project": "Inicia tu proyecto",
"quote": "Solicitar propuesta",
"contact": "Planificar la reunión inicial",
}
def post_cleanup_text(self, text: str, field_path: str = "") -> str:
return normalize_es_text(text, field_path=field_path)

View File

@@ -0,0 +1,66 @@
from .base import BaseLanguageAgent
from ..system_strings import build_contextual_system_vocabulary, build_system_vocabulary
class FrenchAgent(BaseLanguageAgent):
locale = "fr"
tone = "professional and commercial"
preferred_formality = "formal"
cta_defaults = {
"starter": "Planifier lentretien de départ",
"business": "Planifier lentretien commercial",
"support": "Voir le support",
"service": "Afficher les services",
"project": "Lancez votre projet",
"devis": "Demander un devis",
"contact": "Planifier léchange",
}
vocabulary_map = {
**build_system_vocabulary("fr"),
"SERVICES": "PRESTATIONS",
"New": "Nouveau",
"Popular": "Populaire",
"Erstes Produktionsprojekt erfolgreich abgeschlossen.": "Premier projet de production livré avec succès.",
"Von Kickoff bis zum Launch mit einem klaren Umfang.": "Du cadrage au lancement avec un périmètre clair.",
"Demande d'admission initiale": "Planifier un échange initial",
"Geschäftsprozess besprechen": "Échanger sur votre processus métier",
"Entretien d'accueil": "Entretien initial",
"Vraag over diensten": "Question sur les services",
"Konkrete erste Schätzung": "Première estimation concrète",
"Ansatz, der zu Ihrem Budget passt": "Approche adaptée à votre budget",
"Detailliertes Seitenlayout": "Structure détaillée des pages",
"Investition": "investissement",
"Unverbindliches Gespräch, klares Angebot": "Sans engagement, offre claire",
"Bereit, mit der Business-Website zu starten?": "Prêt à démarrer votre site dentreprise ?",
"Planifier un échange business": "Planifier un échange commercial",
"Aucune carte bancaire requise": "Sans engagement",
}
_system_contextual = build_contextual_system_vocabulary("fr")
contextual_vocabulary_map = {
"badge": {
**_system_contextual.get("badge", {}),
"Popular": "Le plus demandé",
},
"label": {
**_system_contextual.get("label", {}),
"Popular": "Le plus demandé",
},
"metric": {
**_system_contextual.get("metric", {}),
},
"stat": {
**_system_contextual.get("stat", {}),
},
"title": {
**_system_contextual.get("title", {}),
"SERVICES": "PRESTATIONS",
},
"heading": {
**_system_contextual.get("heading", {}),
"SERVICES": "PRESTATIONS",
},
"rendered": {
**_system_contextual.get("rendered", {}),
"SERVICES": "PRESTATIONS",
},
}

View File

@@ -0,0 +1,42 @@
from .base import BaseLanguageAgent
from ..normalizers import normalize_it_text
from ..system_strings import build_contextual_system_vocabulary, build_system_vocabulary
class ItalianAgent(BaseLanguageAgent):
locale = "it"
tone = "professional and approachable"
preferred_formality = "polite"
vocabulary_map = {
**build_system_vocabulary(
"it",
(
"weeks_1_2",
"without_commitment",
"transparent_label",
"transparent_investment",
"customization_integrations",
"multilingual_rollout",
),
),
}
_system_contextual = build_contextual_system_vocabulary("it", ("transparent_label",))
contextual_vocabulary_map = {
"badge": {**_system_contextual.get("badge", {})},
"label": {**_system_contextual.get("label", {})},
"metric": {**_system_contextual.get("metric", {})},
"stat": {**_system_contextual.get("stat", {})},
"rendered": {**_system_contextual.get("rendered", {})},
}
cta_defaults = {
"starter": "Prenota una call iniziale",
"business": "Pianifica la call business",
"support": "Richiedi supporto",
"service": "Mostra i servizi",
"project": "Avvia il tuo progetto",
"quote": "Richiedi una proposta",
"contact": "Pianifica la riunione introduttiva",
}
def post_cleanup_text(self, text: str, field_path: str = "") -> str:
return normalize_it_text(text, field_path=field_path)

View File

@@ -0,0 +1,20 @@
from .base import BaseLanguageAgent
from ..normalizers import normalize_nl_text
class DutchAgent(BaseLanguageAgent):
locale = "nl"
tone = "zakelijk en duidelijk"
preferred_formality = "je/jij professioneel"
cta_defaults = {
"starter": "Plan startergesprek",
"business": "Plan zakelijk gesprek",
"support": "Bekijk support",
"service": "Bekijk diensten",
"project": "Start jouw project",
"contact": "Plan kennismaking",
"offerte": "Vraag voorstel aan",
}
def post_cleanup_text(self, text: str, field_path: str = "") -> str:
return normalize_nl_text(text, field_path=field_path)

View File

@@ -0,0 +1,111 @@
from .base import BaseLanguageAgent
from ..system_strings import build_contextual_system_vocabulary, build_system_vocabulary
class PortugueseAgent(BaseLanguageAgent):
locale = "pt"
tone = "business-focused and practical"
preferred_formality = "neutral"
cta_defaults = {
"starter": "Agendar chamada inicial",
"business": "Agendar chamada comercial",
"support": "Ver suporte",
"service": "Ver serviços",
"project": "Iniciar o seu projeto",
"proposta": "Pedir proposta",
"contact": "Agendar reunião introdutória",
}
vocabulary_map = {
**build_system_vocabulary("pt"),
"SERVICES": "SERVIÇOS",
"New": "Novo",
"Popular": "Em destaque",
"Siti web e negozi online": "Sites e lojas online",
"Siti web e negozi online che sono rapidamente online e facili da gestire": "Sites e lojas online que ficam no ar rapidamente e são fáceis de gerir",
"Caso de cliente en directo": "Caso real de cliente",
"El primer proyecto de producción finalizado con éxito.": "O primeiro projeto de produção foi concluído com sucesso.",
"Más sobre el proceso": "Mais sobre o processo",
"Modifiez simplement vous-même.": "Edite facilmente por conta própria.",
"Opciones de la tienda web Mantenimiento y soporte Suporte mensal opcional para atualizações e estabilidade.": "Opções da loja online Manutenção e suporte Suporte mensal opcional para atualizações e estabilidade.",
"Opciones de la tienda web": "Opções da loja online",
"Planes de soporte": "Planos de suporte",
"Multilingüe": "Multilingue",
"Suivi + corrections": "Acompanhamento e correções",
"Mejoras mensuales": "Melhorias mensais",
"¿A qué velocidad puede comenzar?": "Com que rapidez podem começar?",
"¿Puedo editar textos e imágenes yo mismo?": "Posso editar textos e imagens por conta própria?",
"Einzelhandelsunternehmer": "Comerciante",
"lifestyle": "estilo de vida",
"À partir de 3 750 €": "A partir de 3.750 €",
"Transparente sobre o planejamento, o processo e a gestão.": "Clareza sobre o planeamento, o processo e a gestão.",
"Einzelhandelsinhaber Petite boutique en ligne Forfaits de services (à partir de) Pontos de partida transparentes.": "Comerciantes Pequena loja online Pacotes de serviço (a partir de) Pontos de partida claros.",
"Unsere Serviços": "Os nossos serviços",
"Unsere Serviços: vom schnellen Start bis zu skalierbarem Wachstum": "Os nossos serviços: do lançamento rápido ao crescimento escalável",
"Elija el camino": "Escolha o caminho certo",
"Elija el camino que corresponda a su fase: sitio de inicio, sitio empresarial, tienda en línea o soporte continuo.": "Escolha o caminho certo para a sua fase: site inicial, site empresarial, loja online ou suporte contínuo.",
"Début en direct": "Lançamento rápido",
"Demande d'admission initiale": "Agendar conversa inicial",
"Site Web d'Entreprise": "Site empresarial",
"Hablar sobre el proceso empresarial": "Falar sobre o processo do negócio",
"Mise en place de boutique en ligne": "Implementação de loja online",
"Maintenance & gestion": "Manutenção e gestão",
"Afficher le plan de soutien": "Ver suporte",
"Introducción multilingüe": "Lançamento multilingue",
"Forfaits de services (à partir de)": "Pacotes de serviço (a partir de)",
"Schnell online mit einer starken Basis": "Rápido online com uma base sólida",
"Startseite + Kernseiten": "Página inicial + páginas essenciais",
"Optimizado para móviles": "Otimizado para mobile",
"Gestisca lei stesso il contenuto": "Gerir o conteúdo com autonomia",
"Detailliertes Seitenlayout": "Estrutura detalhada das páginas",
"Unverbindliches Gespräch, klares Angebot": "Sem compromisso, proposta clara",
"Mehr Struktur und Konversion": "Mais estrutura e foco em conversão",
"Sections axées sur la conversion": "Secções orientadas para conversão",
"Base prête pour le SEO": "Base pronta para SEO",
"Katalog + Kasse": "Catálogo + checkout",
"Zahlungen und Auftragsfluss": "Pagamentos e fluxo de encomendas",
"Wachstumsbereite Grundlage": "Base pronta para crescimento",
"Soporte y crecimiento": "Suporte e crescimento",
"Amélioration continue": "Melhoria contínua",
"Desde 149 € al mes.": "Desde 149 € por mês.",
"Ab 2.250 €": "A partir de 2.250 €",
"Boutique en ligne": "Loja online",
"Sales-ready mit skalierbarem Stack": "Preparada para vender com uma base escalável",
"Agendar conversa sobre o serviço Ver resultados do projeto 1-2 Wochen Début en direct 4.9/5 Kundenschätzung 100% Bearbeitbar Visão geral dos serviços Cada serviço é projetado para melhorar a faturação, a confiança e a controlabilidade.": "Agendar conversa sobre o serviço Ver resultados do projeto 1 a 2 semanas Lançamento rápido 4.9/5 Avaliação dos clientes 100% Editável Visão geral dos serviços Cada serviço foi concebido para aumentar a faturação, reforçar a confiança e dar mais controlo à sua equipa.",
"Site inicial Schnell online mit einer starken Basis A partir de 1.250 € Agendar chamada inicial Startseite + Kernseiten Optimizado para móviles Gestisca lei stesso il contenuto Recomendado Site Web d'Entreprise Mehr Struktur und Konversion Ab 2.250 € Agendar chamada comercial Detailliertes Seitenlayout Sections axées sur la conversion Base prête pour le SEO Boutique en ligne Sales-ready mit skalierbarem Stack À partir de 3 750 € Iniciar o processo da loja online Katalog + Kasse Zahlungen und Auftragsfluss Wachstumsbereite Grundlage Soporte y crecimiento Amélioration continue Desde 149 € al mes.": "Site inicial Rápido online com uma base sólida A partir de 1.250 € Agendar chamada inicial Página inicial + páginas essenciais Otimizado para mobile Gerir o conteúdo com autonomia Recomendado Site empresarial Mais estrutura e foco em conversão A partir de 2.250 € Agendar chamada comercial Estrutura detalhada das páginas Secções orientadas para conversão Base pronta para SEO Loja online Preparada para vender com uma base escalável A partir de 3.750 € Iniciar o processo da loja online Catálogo + checkout Pagamentos e fluxo de encomendas Base pronta para crescimento Suporte e crescimento Melhoria contínua Desde 149 € por mês.",
"Perguntas frequentes Transparente sobre o planejamento, o processo e a gestão.": "Perguntas frequentes Clareza sobre o planeamento, o processo e a gestão.",
'Ver serviços New La entrada "Unterstützung oder Erweiterung" está en alemán, no en neerlandés.': "Ver serviços Novo Suporte ou expansão",
"Unterstützung oder Erweiterung": "Suporte ou expansão",
'La entrada "Unterstützung oder Erweiterung"': "Suporte ou expansão",
'La entrada "Unterstützung oder Erweiterung" está en alemán, no en neerlandés. Traducido al francés, es: "Suporte ou expansão".': "Suporte ou expansão",
"Sem cartão de crédito": "Sem compromisso",
}
_system_contextual = build_contextual_system_vocabulary("pt")
contextual_vocabulary_map = {
"badge": {
**_system_contextual.get("badge", {}),
"Popular": "Escolha frequente",
},
"label": {
**_system_contextual.get("label", {}),
"Popular": "Escolha frequente",
},
"metric": {
**_system_contextual.get("metric", {}),
},
"stat": {
**_system_contextual.get("stat", {}),
},
"title": {
"SERVICES": "SERVIÇOS",
"Popular": "Em destaque",
},
"heading": {
"SERVICES": "SERVIÇOS",
"Popular": "Em destaque",
},
"rendered": {
**_system_contextual.get("rendered", {}),
"SERVICES": "SERVIÇOS",
"Popular": "Em destaque",
},
}

View File

@@ -0,0 +1,39 @@
from .base import BaseLanguageAgent
from ..normalizers import normalize_ru_text
from ..system_strings import build_contextual_system_vocabulary, build_system_vocabulary
class RussianAgent(BaseLanguageAgent):
locale = "ru"
tone = "professional and confident"
preferred_formality = "neutral polite"
vocabulary_map = {
**build_system_vocabulary(
"ru",
(
"customization_integrations",
"detailed_page_structure",
"without_commitment",
),
),
}
_system_contextual = build_contextual_system_vocabulary("ru", ("plan_badge", "transparent_label"))
contextual_vocabulary_map = {
"badge": {**_system_contextual.get("badge", {})},
"label": {**_system_contextual.get("label", {})},
"metric": {**_system_contextual.get("metric", {})},
"stat": {**_system_contextual.get("stat", {})},
"rendered": {**_system_contextual.get("rendered", {})},
}
cta_defaults = {
"starter": "Запланировать стартовую консультацию",
"business": "Обсудить бизнес-проект",
"support": "Посмотреть поддержку",
"service": "Посмотреть услуги",
"project": "Запустить свой проект",
"contact": "Отправить запрос",
"quote": "Получить предложение",
}
def post_cleanup_text(self, text: str, field_path: str = "") -> str:
return normalize_ru_text(text, field_path=field_path)