Add multilingual audit CI pipeline + extract mandelblog_content_guard
This commit is contained in:
269
mandelblog_content_guard/validators/rules/patterns.py
Normal file
269
mandelblog_content_guard/validators/rules/patterns.py
Normal file
@@ -0,0 +1,269 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
from ...types import make_issue
|
||||
from ...system_strings import (
|
||||
build_system_rewrite_candidates,
|
||||
is_canonical_system_string,
|
||||
system_string_replacement,
|
||||
)
|
||||
|
||||
GLOBAL_BAD_PATTERNS = (
|
||||
"The Spanish translation",
|
||||
"The Spanish translation of",
|
||||
"As the input",
|
||||
"The input",
|
||||
"Poiché l'input",
|
||||
'Unternehmen" è tedesco',
|
||||
"Support anzeigen",
|
||||
"Starter intake",
|
||||
"Business intake",
|
||||
"Plan Starter intake",
|
||||
"Plan Business intake",
|
||||
"Plan de admisión",
|
||||
"None",
|
||||
)
|
||||
|
||||
LOCALE_FORBIDDEN = {
|
||||
"nl": ("Starter intake", "Business intake", "Poiché", "Correo electrónico", "Mostrar los servicios", "Plan de admisión"),
|
||||
"en": ("Starter intake", "Business intake", "Poiché", "Correo electrónico", "Mostrar los servicios", "Questions fréquemment posées", "Plan de admisión"),
|
||||
"de": ("Starter intake", "Business intake", "Poiché", "Correo electrónico", "Mostrar los servicios", "Questions fréquemment posées", "Plan de admisión"),
|
||||
"fr": ("Starter intake", "Business intake", "Poiché", "Correo electrónico", "Mostrar los servicios", "Plan de admisión", "Support anzeigen"),
|
||||
"es": ("Poiché", 'Unternehmen" è tedesco', "Support anzeigen", "Questions fréquemment posées"),
|
||||
"it": ("Poiché l'input", "Consulta inicial sin compromiso", "Mostrar los servicios", "Questions fréquentes", "Plan de admisión", "Correo electrónico"),
|
||||
"pt": ("Poiché l'input", "Consulta inicial sin compromiso", "Mostrar los servicios", "Correo electrónico", 'Unternehmen" è tedesco', "Questions fréquemment posées"),
|
||||
"ru": ("Poiché l'input", "Consulta inicial sin compromiso", "Correo electrónico", 'Unternehmen" è tedesco', "Mostrar los servicios"),
|
||||
}
|
||||
|
||||
PLACEHOLDER_VALUES = {"None", "-", "N/A", "null"}
|
||||
|
||||
GENERIC_BADGE_LABELS = {
|
||||
"New",
|
||||
"Popular",
|
||||
"PLAN",
|
||||
"PIANO",
|
||||
"SERVICES",
|
||||
}
|
||||
|
||||
GLOBAL_REWRITE_CANDIDATES = {
|
||||
**build_system_rewrite_candidates(
|
||||
(
|
||||
"days_label",
|
||||
"average_delivery",
|
||||
"response_time",
|
||||
"without_commitment",
|
||||
"transparent_label",
|
||||
"weeks_1_2",
|
||||
"customer_reviews",
|
||||
"editable_label",
|
||||
"core_pages_label",
|
||||
"detailed_page_structure",
|
||||
"business_process_cta",
|
||||
"multilingual_rollout",
|
||||
"customization_integrations",
|
||||
"transparent_investment",
|
||||
)
|
||||
),
|
||||
}
|
||||
|
||||
LOCALE_REWRITE_CANDIDATES = {
|
||||
"en": {
|
||||
"Service packages (from) Transparent starting points.": "foreign_ui_label",
|
||||
"Frequently Asked Questions Transparent about planning, approach, and management.": "foreign_ui_label",
|
||||
"Transparent investment": "foreign_ui_label",
|
||||
},
|
||||
"de": {
|
||||
"New": "weak_marketing_copy",
|
||||
"Intakegespräch": "weak_marketing_copy",
|
||||
"SEO-ready basis": "foreign_ui_label",
|
||||
"Sales-ready mit skalierbarem Stack": "foreign_ui_label",
|
||||
"Continuous Verbesserung": "foreign_ui_label",
|
||||
"Was du bekommst": "weak_marketing_copy",
|
||||
"Einführungsmeeting": "weak_marketing_copy",
|
||||
"Starter Website": "weak_marketing_copy",
|
||||
"Business Website": "weak_marketing_copy",
|
||||
"Häufig gestellte Fragen Transparent über Planung, Vorgehensweise und Management.": "foreign_ui_label",
|
||||
},
|
||||
"es": {
|
||||
"Preguntas frecuentes Transparente sobre la planificación, el proceso y la gestión.": "foreign_ui_label",
|
||||
"Unverbindliches Gespräch, klares Angebot": "foreign_ui_label",
|
||||
},
|
||||
"pt": {
|
||||
"Siti web e negozi online": "mixed_locale_heading",
|
||||
"Caso de cliente en directo": "weak_marketing_copy",
|
||||
"El primer proyecto de producción finalizado con éxito.": "weak_marketing_copy",
|
||||
"Más sobre el proceso": "foreign_ui_label",
|
||||
"Modifiez simplement vous-même.": "foreign_ui_label",
|
||||
"Opciones de la tienda web": "foreign_ui_label",
|
||||
"Planes de soporte": "foreign_ui_label",
|
||||
"Multilingüe": "foreign_ui_label",
|
||||
"Unsere Serviços": "mixed_locale_heading",
|
||||
"Elija el camino": "mixed_locale_heading",
|
||||
"Début en direct": "foreign_ui_label",
|
||||
"Demande d'admission initiale": "foreign_ui_label",
|
||||
"Site Web d'Entreprise": "foreign_ui_label",
|
||||
"Hablar sobre el proceso empresarial": "foreign_ui_label",
|
||||
"Mise en place de boutique en ligne": "foreign_ui_label",
|
||||
"Maintenance & gestion": "foreign_ui_label",
|
||||
"Afficher le plan de soutien": "foreign_ui_label",
|
||||
"Introducción multilingüe": "foreign_ui_label",
|
||||
"Forfaits de services (à partir de)": "mixed_locale_heading",
|
||||
"Kundenschätzung": "foreign_ui_label",
|
||||
"Gestisca lei stesso il contenuto": "foreign_ui_label",
|
||||
"Optimizado para móviles": "foreign_ui_label",
|
||||
"Schnell online mit einer starken Basis": "weak_marketing_copy",
|
||||
"La entrada \"Unterstützung oder Erweiterung\"": "foreign_ui_label",
|
||||
"Suivi + corrections": "foreign_ui_label",
|
||||
"Mejoras mensuales": "foreign_ui_label",
|
||||
"¿A qué velocidad puede comenzar?": "foreign_ui_label",
|
||||
"¿Puedo editar textos e imágenes yo mismo?": "foreign_ui_label",
|
||||
"Transparente sobre o planejamento, o processo e a gestão.": "foreign_ui_label",
|
||||
"Ab 2.250 €": "foreign_ui_label",
|
||||
"Boutique en ligne": "foreign_ui_label",
|
||||
"Sales-ready mit skalierbarem Stack": "foreign_ui_label",
|
||||
},
|
||||
"fr": {
|
||||
"Erstes Produktionsprojekt erfolgreich abgeschlossen.": "weak_marketing_copy",
|
||||
"Von Kickoff bis zum Launch mit einem klaren Umfang.": "foreign_ui_label",
|
||||
"Demande d'admission initiale": "weak_marketing_copy",
|
||||
"Entretien d'accueil": "weak_marketing_copy",
|
||||
"Vraag over diensten": "foreign_ui_label",
|
||||
"Konkrete erste Schätzung": "foreign_ui_label",
|
||||
"Ansatz, der zu Ihrem Budget passt": "foreign_ui_label",
|
||||
**build_system_rewrite_candidates(("weeks_2_4",)),
|
||||
"Bereit, mit der Business-Website zu starten?": "foreign_ui_label",
|
||||
},
|
||||
"it": {
|
||||
"Planificación clara": "foreign_ui_label",
|
||||
"Mehrsprachiger Rollout-Plan": "foreign_ui_label",
|
||||
"Unverbindliches Gespräch, klares Angebot": "foreign_ui_label",
|
||||
},
|
||||
"ru": {
|
||||
"Base prête pour le SEO": "foreign_ui_label",
|
||||
"Unverbindliches Gespräch, klares Angebot": "foreign_ui_label",
|
||||
},
|
||||
}
|
||||
|
||||
KNOWN_REPLACEMENTS = {
|
||||
"Starter intake": {
|
||||
"nl": "Plan startergesprek",
|
||||
"en": "Book starter call",
|
||||
"de": "Starter-Gespräch planen",
|
||||
"fr": "Planifier l’entretien de départ",
|
||||
"es": "Reservar llamada inicial",
|
||||
"it": "Prenota una chiamata iniziale",
|
||||
"pt": "Agendar chamada inicial",
|
||||
"ru": "Запланировать стартовый звонок",
|
||||
},
|
||||
"Business intake": {
|
||||
"nl": "Plan zakelijk gesprek",
|
||||
"en": "Book business call",
|
||||
"de": "Beratungsgespräch planen",
|
||||
"fr": "Planifier l’entretien commercial",
|
||||
"es": "Reservar llamada comercial",
|
||||
"it": "Prenota una chiamata commerciale",
|
||||
"pt": "Agendar chamada comercial",
|
||||
"ru": "Запланировать деловой звонок",
|
||||
},
|
||||
"Plan Starter intake": {
|
||||
"nl": "Plan startergesprek",
|
||||
"en": "Book starter call",
|
||||
"de": "Starter-Gespräch planen",
|
||||
"fr": "Planifier l’entretien de départ",
|
||||
"es": "Reservar llamada inicial",
|
||||
"it": "Prenota una chiamata iniziale",
|
||||
"pt": "Agendar chamada inicial",
|
||||
"ru": "Запланировать стартовый звонок",
|
||||
},
|
||||
"Plan Business intake": {
|
||||
"nl": "Plan zakelijk gesprek",
|
||||
"en": "Book business call",
|
||||
"de": "Beratungsgespräch planen",
|
||||
"fr": "Planifier l’entretien commercial",
|
||||
"es": "Reservar llamada comercial",
|
||||
"it": "Prenota una chiamata commerciale",
|
||||
"pt": "Agendar chamada comercial",
|
||||
"ru": "Запланировать деловой звонок",
|
||||
},
|
||||
"Mostrar los servicios": {
|
||||
"es": "Mostrar los servicios",
|
||||
"it": "Vedi servizi",
|
||||
"pt": "Ver serviços",
|
||||
"ru": "Показать услуги",
|
||||
},
|
||||
"Correo electrónico": {"pt": "E-mail", "ru": "Электронная почта"},
|
||||
'Unternehmen" è tedesco, non olandese. La traduzione spagnola di "Unternehmen" è "empresa".': {
|
||||
"pt": "Empresa",
|
||||
"ru": "Компания",
|
||||
},
|
||||
'Poiché l\'input "Unverbindliche Erstberatung" è in tedesco (non in olandese), la traduzione in spagnolo è: "Consulta inicial sin compromiso".': {
|
||||
"it": "Senza impegno",
|
||||
"pt": "Sem compromisso",
|
||||
"ru": "Без обязательств",
|
||||
"es": "Consulta inicial sin compromiso",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _contains_fragment(text: str, fragment: str) -> bool:
|
||||
if re.fullmatch(r"[\wÀ-ÿ-]+", fragment, flags=re.UNICODE):
|
||||
pattern = re.compile(rf"(?<![\wÀ-ÿ-]){re.escape(fragment)}(?![\wÀ-ÿ-])", re.UNICODE)
|
||||
return bool(pattern.search(text))
|
||||
return fragment in text
|
||||
|
||||
|
||||
def validate_patterns(locale_code: str, field_path: str, normalized: str):
|
||||
issues = []
|
||||
for fragment in GLOBAL_BAD_PATTERNS:
|
||||
if _contains_fragment(normalized, fragment):
|
||||
issues.append(
|
||||
make_issue(
|
||||
"known_bad_pattern",
|
||||
field_path,
|
||||
fragment,
|
||||
KNOWN_REPLACEMENTS.get(fragment, {}).get(locale_code, ""),
|
||||
)
|
||||
)
|
||||
for fragment in LOCALE_FORBIDDEN.get(locale_code, ()):
|
||||
if _contains_fragment(normalized, fragment):
|
||||
issues.append(
|
||||
make_issue(
|
||||
"wrong_language_fragment",
|
||||
field_path,
|
||||
fragment,
|
||||
KNOWN_REPLACEMENTS.get(fragment, {}).get(locale_code, ""),
|
||||
)
|
||||
)
|
||||
if normalized in GENERIC_BADGE_LABELS and not is_canonical_system_string(locale_code, normalized):
|
||||
issues.append(
|
||||
make_issue(
|
||||
"generic_badge_label",
|
||||
field_path,
|
||||
normalized,
|
||||
system_string_replacement(locale_code, normalized),
|
||||
)
|
||||
)
|
||||
for fragment, issue_type in GLOBAL_REWRITE_CANDIDATES.items():
|
||||
if _contains_fragment(normalized, fragment):
|
||||
if is_canonical_system_string(locale_code, fragment):
|
||||
continue
|
||||
issues.append(
|
||||
make_issue(
|
||||
issue_type,
|
||||
field_path,
|
||||
fragment,
|
||||
system_string_replacement(locale_code, fragment),
|
||||
)
|
||||
)
|
||||
for fragment, issue_type in LOCALE_REWRITE_CANDIDATES.get(locale_code, {}).items():
|
||||
if _contains_fragment(normalized, fragment):
|
||||
issues.append(
|
||||
make_issue(
|
||||
issue_type,
|
||||
field_path,
|
||||
fragment,
|
||||
system_string_replacement(locale_code, fragment),
|
||||
)
|
||||
)
|
||||
return issues
|
||||
Reference in New Issue
Block a user