Add multilingual audit CI pipeline + extract mandelblog_content_guard

This commit is contained in:
2026-03-29 20:49:42 +02:00
parent 643aca26d0
commit e3bafd3a73
104 changed files with 3372 additions and 6 deletions

View File

@@ -0,0 +1,146 @@
from __future__ import annotations
import re
from ...types import make_issue
CTA_RULES = {
"nl": (
r"^Plan ",
r"^Bekijk ",
r"^Vraag ",
r"^Bespreek ",
r"^Contact$",
r"^Start ",
r"^Meer ",
r"^Verstuur ",
r"^Neem ",
),
"en": (
r"^Book ",
r"^View ",
r"^Schedule ",
r"^Start ",
r"^Talk ",
r"^Discuss ",
r"^Contact$",
r"^Explore ",
r"^Learn ",
r"^Request ",
r"^Send ",
),
"de": (
r"^Plan",
r"^Mehr",
r"^Support",
r"^Start",
r"^Kontakt",
r"^Gespr",
r"^Kostenlose",
r"^Anfrage",
r"^Projekte",
r"^Verein",
r"^Besprech",
r"^Anzeig",
r"^Ansehen",
r"^Technisch",
r"^Unterst",
r"^Unsere",
r"^Service",
r"^Dienstleistungen",
r"^Erstgespräch",
r"^Einführ",
r"^Anpassung",
r"^Ansichts",
r"^Prozess",
r"^Pakete",
r"^Demo",
r"^Alle ",
r"^Ein ",
r"^Webshop",
),
"fr": (
r"^Planifier",
r"^Voir",
r"^Découvrir",
r"^Demander",
r"^Lancer",
r"^Démarrer",
r"^Contacter",
r"^Contact$",
r"^Parler",
r"^Lancez",
r"^Prendre",
r"^Envoyer",
r"^Afficher",
),
"es": (
r"^Reservar",
r"^Ver",
r"^Solicitar",
r"^Inicia",
r"^Hablar",
r"^Descubrir",
r"^Contactar",
r"^Planificar",
r"^Programe",
r"^Concertar",
r"^Enviar",
r"^Mostrar",
r"^Comenta",
),
"it": (
r"^Prenota",
r"^Vedi",
r"^Avvia",
r"^Richiedi",
r"^Contatta",
r"^Contatto$",
r"^Scopri",
r"^Pianifica",
r"^Invia",
r"^Mostra",
r"^Parla",
r"^Parliamo",
),
"pt": (
r"^Agendar",
r"^Ver",
r"^Iniciar",
r"^Pedir",
r"^Contactar",
r"^Falar",
r"^Explorar",
r"^Marcar",
r"^Solicitar",
r"^Enviar",
r"^Mostrar",
),
"ru": (
r"^Заплан",
r"^Посмотр",
r"^Запуст",
r"^Связ",
r"^Подробнее",
r"^Показать",
r"^Отправ",
r"^Получ",
r"^Запрос",
),
}
CTA_FIELDS = {
"cta_text",
"primary_cta_text",
"secondary_cta_text",
"submit_button_text",
}
def validate_cta(locale_code: str, field_path: str, normalized: str):
last_segment = field_path.split(".")[-1]
if last_segment not in CTA_FIELDS:
return []
if any(re.search(pattern, normalized) for pattern in CTA_RULES.get(locale_code, ())):
return []
return [make_issue("cta_language_mismatch", field_path, normalized)]

View File

@@ -0,0 +1,21 @@
from __future__ import annotations
from ...types import make_issue
from .patterns import PLACEHOLDER_VALUES
from .language import detect_language_mismatch
FORM_FIELDS = {"label", "placeholder", "help_text"}
def validate_form_copy(locale_code: str, field_path: str, normalized: str):
last_segment = field_path.split(".")[-1]
if last_segment not in FORM_FIELDS:
return []
issues = []
if normalized in PLACEHOLDER_VALUES or normalized == "":
issues.append(make_issue("empty_form_copy", field_path, normalized))
mismatch = detect_language_mismatch(locale_code, normalized)
if mismatch:
issues.append(make_issue("form_language_mismatch", field_path, mismatch["message"]))
return issues

View File

@@ -0,0 +1,43 @@
from __future__ import annotations
import re
STOPWORDS = {
"nl": {"de", "het", "een", "en", "voor", "met", "van", "je", "wij", "niet"},
"en": {"the", "and", "for", "with", "your", "you", "from", "that", "this", "not"},
"de": {"der", "die", "das", "und", "mit", "für", "nicht", "eine", "ist", "sie"},
"fr": {"le", "la", "les", "et", "avec", "pour", "vous", "une", "pas", "des"},
"es": {"el", "la", "los", "las", "con", "para", "una", "que", "del", "por"},
"it": {"il", "la", "con", "per", "una", "che", "del", "non", "gli", "dei"},
"pt": {"o", "a", "os", "as", "com", "para", "uma", "que", "não", "dos"},
"ru": {"и", "в", "на", "с", "для", "что", "это", "как", "по", "не"},
}
def _tokenize(text: str) -> list[str]:
text = re.sub(r"<[^>]+>", " ", text)
return re.findall(r"[\w\u0400-\u04FF']+", text.lower())
def detect_language_mismatch(locale_code: str, text: str):
tokens = _tokenize(text)
if len(tokens) < 12:
return None
scores = {code: sum(1 for token in tokens if token in words) for code, words in STOPWORDS.items()}
expected = scores.get(locale_code, 0)
foreign_locale, foreign_score = max(scores.items(), key=lambda item: item[1])
if foreign_locale == locale_code:
return None
if expected >= foreign_score:
return None
if foreign_score >= 6 and foreign_score >= expected + 4:
return {
"severity": "block",
"message": f"expected={locale_code}, detected={foreign_locale}, score={foreign_score}, expected_score={expected}",
}
if expected == 0 and foreign_score >= 5:
return {
"severity": "warn",
"message": f"expected={locale_code}, detected={foreign_locale}, score={foreign_score}, expected_score={expected}",
}
return None

View File

@@ -0,0 +1,269 @@
from __future__ import annotations
import re
from ...types import make_issue
from ...system_strings import (
build_system_rewrite_candidates,
is_canonical_system_string,
system_string_replacement,
)
GLOBAL_BAD_PATTERNS = (
"The Spanish translation",
"The Spanish translation of",
"As the input",
"The input",
"Poiché l'input",
'Unternehmen" è tedesco',
"Support anzeigen",
"Starter intake",
"Business intake",
"Plan Starter intake",
"Plan Business intake",
"Plan de admisión",
"None",
)
LOCALE_FORBIDDEN = {
"nl": ("Starter intake", "Business intake", "Poiché", "Correo electrónico", "Mostrar los servicios", "Plan de admisión"),
"en": ("Starter intake", "Business intake", "Poiché", "Correo electrónico", "Mostrar los servicios", "Questions fréquemment posées", "Plan de admisión"),
"de": ("Starter intake", "Business intake", "Poiché", "Correo electrónico", "Mostrar los servicios", "Questions fréquemment posées", "Plan de admisión"),
"fr": ("Starter intake", "Business intake", "Poiché", "Correo electrónico", "Mostrar los servicios", "Plan de admisión", "Support anzeigen"),
"es": ("Poiché", 'Unternehmen" è tedesco', "Support anzeigen", "Questions fréquemment posées"),
"it": ("Poiché l'input", "Consulta inicial sin compromiso", "Mostrar los servicios", "Questions fréquentes", "Plan de admisión", "Correo electrónico"),
"pt": ("Poiché l'input", "Consulta inicial sin compromiso", "Mostrar los servicios", "Correo electrónico", 'Unternehmen" è tedesco', "Questions fréquemment posées"),
"ru": ("Poiché l'input", "Consulta inicial sin compromiso", "Correo electrónico", 'Unternehmen" è tedesco', "Mostrar los servicios"),
}
PLACEHOLDER_VALUES = {"None", "-", "N/A", "null"}
GENERIC_BADGE_LABELS = {
"New",
"Popular",
"PLAN",
"PIANO",
"SERVICES",
}
GLOBAL_REWRITE_CANDIDATES = {
**build_system_rewrite_candidates(
(
"days_label",
"average_delivery",
"response_time",
"without_commitment",
"transparent_label",
"weeks_1_2",
"customer_reviews",
"editable_label",
"core_pages_label",
"detailed_page_structure",
"business_process_cta",
"multilingual_rollout",
"customization_integrations",
"transparent_investment",
)
),
}
LOCALE_REWRITE_CANDIDATES = {
"en": {
"Service packages (from) Transparent starting points.": "foreign_ui_label",
"Frequently Asked Questions Transparent about planning, approach, and management.": "foreign_ui_label",
"Transparent investment": "foreign_ui_label",
},
"de": {
"New": "weak_marketing_copy",
"Intakegespräch": "weak_marketing_copy",
"SEO-ready basis": "foreign_ui_label",
"Sales-ready mit skalierbarem Stack": "foreign_ui_label",
"Continuous Verbesserung": "foreign_ui_label",
"Was du bekommst": "weak_marketing_copy",
"Einführungsmeeting": "weak_marketing_copy",
"Starter Website": "weak_marketing_copy",
"Business Website": "weak_marketing_copy",
"Häufig gestellte Fragen Transparent über Planung, Vorgehensweise und Management.": "foreign_ui_label",
},
"es": {
"Preguntas frecuentes Transparente sobre la planificación, el proceso y la gestión.": "foreign_ui_label",
"Unverbindliches Gespräch, klares Angebot": "foreign_ui_label",
},
"pt": {
"Siti web e negozi online": "mixed_locale_heading",
"Caso de cliente en directo": "weak_marketing_copy",
"El primer proyecto de producción finalizado con éxito.": "weak_marketing_copy",
"Más sobre el proceso": "foreign_ui_label",
"Modifiez simplement vous-même.": "foreign_ui_label",
"Opciones de la tienda web": "foreign_ui_label",
"Planes de soporte": "foreign_ui_label",
"Multilingüe": "foreign_ui_label",
"Unsere Serviços": "mixed_locale_heading",
"Elija el camino": "mixed_locale_heading",
"Début en direct": "foreign_ui_label",
"Demande d'admission initiale": "foreign_ui_label",
"Site Web d'Entreprise": "foreign_ui_label",
"Hablar sobre el proceso empresarial": "foreign_ui_label",
"Mise en place de boutique en ligne": "foreign_ui_label",
"Maintenance & gestion": "foreign_ui_label",
"Afficher le plan de soutien": "foreign_ui_label",
"Introducción multilingüe": "foreign_ui_label",
"Forfaits de services (à partir de)": "mixed_locale_heading",
"Kundenschätzung": "foreign_ui_label",
"Gestisca lei stesso il contenuto": "foreign_ui_label",
"Optimizado para móviles": "foreign_ui_label",
"Schnell online mit einer starken Basis": "weak_marketing_copy",
"La entrada \"Unterstützung oder Erweiterung\"": "foreign_ui_label",
"Suivi + corrections": "foreign_ui_label",
"Mejoras mensuales": "foreign_ui_label",
"¿A qué velocidad puede comenzar?": "foreign_ui_label",
"¿Puedo editar textos e imágenes yo mismo?": "foreign_ui_label",
"Transparente sobre o planejamento, o processo e a gestão.": "foreign_ui_label",
"Ab 2.250 €": "foreign_ui_label",
"Boutique en ligne": "foreign_ui_label",
"Sales-ready mit skalierbarem Stack": "foreign_ui_label",
},
"fr": {
"Erstes Produktionsprojekt erfolgreich abgeschlossen.": "weak_marketing_copy",
"Von Kickoff bis zum Launch mit einem klaren Umfang.": "foreign_ui_label",
"Demande d'admission initiale": "weak_marketing_copy",
"Entretien d'accueil": "weak_marketing_copy",
"Vraag over diensten": "foreign_ui_label",
"Konkrete erste Schätzung": "foreign_ui_label",
"Ansatz, der zu Ihrem Budget passt": "foreign_ui_label",
**build_system_rewrite_candidates(("weeks_2_4",)),
"Bereit, mit der Business-Website zu starten?": "foreign_ui_label",
},
"it": {
"Planificación clara": "foreign_ui_label",
"Mehrsprachiger Rollout-Plan": "foreign_ui_label",
"Unverbindliches Gespräch, klares Angebot": "foreign_ui_label",
},
"ru": {
"Base prête pour le SEO": "foreign_ui_label",
"Unverbindliches Gespräch, klares Angebot": "foreign_ui_label",
},
}
KNOWN_REPLACEMENTS = {
"Starter intake": {
"nl": "Plan startergesprek",
"en": "Book starter call",
"de": "Starter-Gespräch planen",
"fr": "Planifier lentretien de départ",
"es": "Reservar llamada inicial",
"it": "Prenota una chiamata iniziale",
"pt": "Agendar chamada inicial",
"ru": "Запланировать стартовый звонок",
},
"Business intake": {
"nl": "Plan zakelijk gesprek",
"en": "Book business call",
"de": "Beratungsgespräch planen",
"fr": "Planifier lentretien commercial",
"es": "Reservar llamada comercial",
"it": "Prenota una chiamata commerciale",
"pt": "Agendar chamada comercial",
"ru": "Запланировать деловой звонок",
},
"Plan Starter intake": {
"nl": "Plan startergesprek",
"en": "Book starter call",
"de": "Starter-Gespräch planen",
"fr": "Planifier lentretien de départ",
"es": "Reservar llamada inicial",
"it": "Prenota una chiamata iniziale",
"pt": "Agendar chamada inicial",
"ru": "Запланировать стартовый звонок",
},
"Plan Business intake": {
"nl": "Plan zakelijk gesprek",
"en": "Book business call",
"de": "Beratungsgespräch planen",
"fr": "Planifier lentretien commercial",
"es": "Reservar llamada comercial",
"it": "Prenota una chiamata commerciale",
"pt": "Agendar chamada comercial",
"ru": "Запланировать деловой звонок",
},
"Mostrar los servicios": {
"es": "Mostrar los servicios",
"it": "Vedi servizi",
"pt": "Ver serviços",
"ru": "Показать услуги",
},
"Correo electrónico": {"pt": "E-mail", "ru": "Электронная почта"},
'Unternehmen" è tedesco, non olandese. La traduzione spagnola di "Unternehmen" è "empresa".': {
"pt": "Empresa",
"ru": "Компания",
},
'Poiché l\'input "Unverbindliche Erstberatung" è in tedesco (non in olandese), la traduzione in spagnolo è: "Consulta inicial sin compromiso".': {
"it": "Senza impegno",
"pt": "Sem compromisso",
"ru": "Без обязательств",
"es": "Consulta inicial sin compromiso",
},
}
def _contains_fragment(text: str, fragment: str) -> bool:
if re.fullmatch(r"[\wÀ-ÿ-]+", fragment, flags=re.UNICODE):
pattern = re.compile(rf"(?<![\wÀ-ÿ-]){re.escape(fragment)}(?![\wÀ-ÿ-])", re.UNICODE)
return bool(pattern.search(text))
return fragment in text
def validate_patterns(locale_code: str, field_path: str, normalized: str):
issues = []
for fragment in GLOBAL_BAD_PATTERNS:
if _contains_fragment(normalized, fragment):
issues.append(
make_issue(
"known_bad_pattern",
field_path,
fragment,
KNOWN_REPLACEMENTS.get(fragment, {}).get(locale_code, ""),
)
)
for fragment in LOCALE_FORBIDDEN.get(locale_code, ()):
if _contains_fragment(normalized, fragment):
issues.append(
make_issue(
"wrong_language_fragment",
field_path,
fragment,
KNOWN_REPLACEMENTS.get(fragment, {}).get(locale_code, ""),
)
)
if normalized in GENERIC_BADGE_LABELS and not is_canonical_system_string(locale_code, normalized):
issues.append(
make_issue(
"generic_badge_label",
field_path,
normalized,
system_string_replacement(locale_code, normalized),
)
)
for fragment, issue_type in GLOBAL_REWRITE_CANDIDATES.items():
if _contains_fragment(normalized, fragment):
if is_canonical_system_string(locale_code, fragment):
continue
issues.append(
make_issue(
issue_type,
field_path,
fragment,
system_string_replacement(locale_code, fragment),
)
)
for fragment, issue_type in LOCALE_REWRITE_CANDIDATES.get(locale_code, {}).items():
if _contains_fragment(normalized, fragment):
issues.append(
make_issue(
issue_type,
field_path,
fragment,
system_string_replacement(locale_code, fragment),
)
)
return issues