Files
mandelstudio/mandelblog_content_guard/validators/rules/patterns.py

270 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from __future__ import annotations
import re
from ...types import make_issue
from ...system_strings import (
build_system_rewrite_candidates,
is_canonical_system_string,
system_string_replacement,
)
GLOBAL_BAD_PATTERNS = (
"The Spanish translation",
"The Spanish translation of",
"As the input",
"The input",
"Poiché l'input",
'Unternehmen" è tedesco',
"Support anzeigen",
"Starter intake",
"Business intake",
"Plan Starter intake",
"Plan Business intake",
"Plan de admisión",
"None",
)
LOCALE_FORBIDDEN = {
"nl": ("Starter intake", "Business intake", "Poiché", "Correo electrónico", "Mostrar los servicios", "Plan de admisión"),
"en": ("Starter intake", "Business intake", "Poiché", "Correo electrónico", "Mostrar los servicios", "Questions fréquemment posées", "Plan de admisión"),
"de": ("Starter intake", "Business intake", "Poiché", "Correo electrónico", "Mostrar los servicios", "Questions fréquemment posées", "Plan de admisión"),
"fr": ("Starter intake", "Business intake", "Poiché", "Correo electrónico", "Mostrar los servicios", "Plan de admisión", "Support anzeigen"),
"es": ("Poiché", 'Unternehmen" è tedesco', "Support anzeigen", "Questions fréquemment posées"),
"it": ("Poiché l'input", "Consulta inicial sin compromiso", "Mostrar los servicios", "Questions fréquentes", "Plan de admisión", "Correo electrónico"),
"pt": ("Poiché l'input", "Consulta inicial sin compromiso", "Mostrar los servicios", "Correo electrónico", 'Unternehmen" è tedesco', "Questions fréquemment posées"),
"ru": ("Poiché l'input", "Consulta inicial sin compromiso", "Correo electrónico", 'Unternehmen" è tedesco', "Mostrar los servicios"),
}
PLACEHOLDER_VALUES = {"None", "-", "N/A", "null"}
GENERIC_BADGE_LABELS = {
"New",
"Popular",
"PLAN",
"PIANO",
"SERVICES",
}
GLOBAL_REWRITE_CANDIDATES = {
**build_system_rewrite_candidates(
(
"days_label",
"average_delivery",
"response_time",
"without_commitment",
"transparent_label",
"weeks_1_2",
"customer_reviews",
"editable_label",
"core_pages_label",
"detailed_page_structure",
"business_process_cta",
"multilingual_rollout",
"customization_integrations",
"transparent_investment",
)
),
}
LOCALE_REWRITE_CANDIDATES = {
"en": {
"Service packages (from) Transparent starting points.": "foreign_ui_label",
"Frequently Asked Questions Transparent about planning, approach, and management.": "foreign_ui_label",
"Transparent investment": "foreign_ui_label",
},
"de": {
"New": "weak_marketing_copy",
"Intakegespräch": "weak_marketing_copy",
"SEO-ready basis": "foreign_ui_label",
"Sales-ready mit skalierbarem Stack": "foreign_ui_label",
"Continuous Verbesserung": "foreign_ui_label",
"Was du bekommst": "weak_marketing_copy",
"Einführungsmeeting": "weak_marketing_copy",
"Starter Website": "weak_marketing_copy",
"Business Website": "weak_marketing_copy",
"Häufig gestellte Fragen Transparent über Planung, Vorgehensweise und Management.": "foreign_ui_label",
},
"es": {
"Preguntas frecuentes Transparente sobre la planificación, el proceso y la gestión.": "foreign_ui_label",
"Unverbindliches Gespräch, klares Angebot": "foreign_ui_label",
},
"pt": {
"Siti web e negozi online": "mixed_locale_heading",
"Caso de cliente en directo": "weak_marketing_copy",
"El primer proyecto de producción finalizado con éxito.": "weak_marketing_copy",
"Más sobre el proceso": "foreign_ui_label",
"Modifiez simplement vous-même.": "foreign_ui_label",
"Opciones de la tienda web": "foreign_ui_label",
"Planes de soporte": "foreign_ui_label",
"Multilingüe": "foreign_ui_label",
"Unsere Serviços": "mixed_locale_heading",
"Elija el camino": "mixed_locale_heading",
"Début en direct": "foreign_ui_label",
"Demande d'admission initiale": "foreign_ui_label",
"Site Web d'Entreprise": "foreign_ui_label",
"Hablar sobre el proceso empresarial": "foreign_ui_label",
"Mise en place de boutique en ligne": "foreign_ui_label",
"Maintenance & gestion": "foreign_ui_label",
"Afficher le plan de soutien": "foreign_ui_label",
"Introducción multilingüe": "foreign_ui_label",
"Forfaits de services (à partir de)": "mixed_locale_heading",
"Kundenschätzung": "foreign_ui_label",
"Gestisca lei stesso il contenuto": "foreign_ui_label",
"Optimizado para móviles": "foreign_ui_label",
"Schnell online mit einer starken Basis": "weak_marketing_copy",
"La entrada \"Unterstützung oder Erweiterung\"": "foreign_ui_label",
"Suivi + corrections": "foreign_ui_label",
"Mejoras mensuales": "foreign_ui_label",
"¿A qué velocidad puede comenzar?": "foreign_ui_label",
"¿Puedo editar textos e imágenes yo mismo?": "foreign_ui_label",
"Transparente sobre o planejamento, o processo e a gestão.": "foreign_ui_label",
"Ab 2.250 €": "foreign_ui_label",
"Boutique en ligne": "foreign_ui_label",
"Sales-ready mit skalierbarem Stack": "foreign_ui_label",
},
"fr": {
"Erstes Produktionsprojekt erfolgreich abgeschlossen.": "weak_marketing_copy",
"Von Kickoff bis zum Launch mit einem klaren Umfang.": "foreign_ui_label",
"Demande d'admission initiale": "weak_marketing_copy",
"Entretien d'accueil": "weak_marketing_copy",
"Vraag over diensten": "foreign_ui_label",
"Konkrete erste Schätzung": "foreign_ui_label",
"Ansatz, der zu Ihrem Budget passt": "foreign_ui_label",
**build_system_rewrite_candidates(("weeks_2_4",)),
"Bereit, mit der Business-Website zu starten?": "foreign_ui_label",
},
"it": {
"Planificación clara": "foreign_ui_label",
"Mehrsprachiger Rollout-Plan": "foreign_ui_label",
"Unverbindliches Gespräch, klares Angebot": "foreign_ui_label",
},
"ru": {
"Base prête pour le SEO": "foreign_ui_label",
"Unverbindliches Gespräch, klares Angebot": "foreign_ui_label",
},
}
KNOWN_REPLACEMENTS = {
"Starter intake": {
"nl": "Plan startergesprek",
"en": "Book starter call",
"de": "Starter-Gespräch planen",
"fr": "Planifier lentretien de départ",
"es": "Reservar llamada inicial",
"it": "Prenota una chiamata iniziale",
"pt": "Agendar chamada inicial",
"ru": "Запланировать стартовый звонок",
},
"Business intake": {
"nl": "Plan zakelijk gesprek",
"en": "Book business call",
"de": "Beratungsgespräch planen",
"fr": "Planifier lentretien commercial",
"es": "Reservar llamada comercial",
"it": "Prenota una chiamata commerciale",
"pt": "Agendar chamada comercial",
"ru": "Запланировать деловой звонок",
},
"Plan Starter intake": {
"nl": "Plan startergesprek",
"en": "Book starter call",
"de": "Starter-Gespräch planen",
"fr": "Planifier lentretien de départ",
"es": "Reservar llamada inicial",
"it": "Prenota una chiamata iniziale",
"pt": "Agendar chamada inicial",
"ru": "Запланировать стартовый звонок",
},
"Plan Business intake": {
"nl": "Plan zakelijk gesprek",
"en": "Book business call",
"de": "Beratungsgespräch planen",
"fr": "Planifier lentretien commercial",
"es": "Reservar llamada comercial",
"it": "Prenota una chiamata commerciale",
"pt": "Agendar chamada comercial",
"ru": "Запланировать деловой звонок",
},
"Mostrar los servicios": {
"es": "Mostrar los servicios",
"it": "Vedi servizi",
"pt": "Ver serviços",
"ru": "Показать услуги",
},
"Correo electrónico": {"pt": "E-mail", "ru": "Электронная почта"},
'Unternehmen" è tedesco, non olandese. La traduzione spagnola di "Unternehmen" è "empresa".': {
"pt": "Empresa",
"ru": "Компания",
},
'Poiché l\'input "Unverbindliche Erstberatung" è in tedesco (non in olandese), la traduzione in spagnolo è: "Consulta inicial sin compromiso".': {
"it": "Senza impegno",
"pt": "Sem compromisso",
"ru": "Без обязательств",
"es": "Consulta inicial sin compromiso",
},
}
def _contains_fragment(text: str, fragment: str) -> bool:
if re.fullmatch(r"[\wÀ-ÿ-]+", fragment, flags=re.UNICODE):
pattern = re.compile(rf"(?<![\wÀ-ÿ-]){re.escape(fragment)}(?![\wÀ-ÿ-])", re.UNICODE)
return bool(pattern.search(text))
return fragment in text
def validate_patterns(locale_code: str, field_path: str, normalized: str):
issues = []
for fragment in GLOBAL_BAD_PATTERNS:
if _contains_fragment(normalized, fragment):
issues.append(
make_issue(
"known_bad_pattern",
field_path,
fragment,
KNOWN_REPLACEMENTS.get(fragment, {}).get(locale_code, ""),
)
)
for fragment in LOCALE_FORBIDDEN.get(locale_code, ()):
if _contains_fragment(normalized, fragment):
issues.append(
make_issue(
"wrong_language_fragment",
field_path,
fragment,
KNOWN_REPLACEMENTS.get(fragment, {}).get(locale_code, ""),
)
)
if normalized in GENERIC_BADGE_LABELS and not is_canonical_system_string(locale_code, normalized):
issues.append(
make_issue(
"generic_badge_label",
field_path,
normalized,
system_string_replacement(locale_code, normalized),
)
)
for fragment, issue_type in GLOBAL_REWRITE_CANDIDATES.items():
if _contains_fragment(normalized, fragment):
if is_canonical_system_string(locale_code, fragment):
continue
issues.append(
make_issue(
issue_type,
field_path,
fragment,
system_string_replacement(locale_code, fragment),
)
)
for fragment, issue_type in LOCALE_REWRITE_CANDIDATES.get(locale_code, {}).items():
if _contains_fragment(normalized, fragment):
issues.append(
make_issue(
issue_type,
field_path,
fragment,
system_string_replacement(locale_code, fragment),
)
)
return issues