Add multilingual audit CI pipeline + extract mandelblog_content_guard
This commit is contained in:
15
mandelblog_content_guard/normalizers/__init__.py
Normal file
15
mandelblog_content_guard/normalizers/__init__.py
Normal file
@@ -0,0 +1,15 @@
|
||||
from .de import normalize_de_text
|
||||
from .en import normalize_en_text
|
||||
from .es import normalize_es_text
|
||||
from .it import normalize_it_text
|
||||
from .nl import normalize_nl_text
|
||||
from .ru import normalize_ru_text
|
||||
|
||||
__all__ = [
|
||||
"normalize_de_text",
|
||||
"normalize_en_text",
|
||||
"normalize_es_text",
|
||||
"normalize_it_text",
|
||||
"normalize_nl_text",
|
||||
"normalize_ru_text",
|
||||
]
|
||||
58
mandelblog_content_guard/normalizers/de.py
Normal file
58
mandelblog_content_guard/normalizers/de.py
Normal file
@@ -0,0 +1,58 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
|
||||
DE_LINE_REPLACEMENTS = {
|
||||
"Häufig gestellte Fragen Transparent über Planung, Vorgehensweise und Management.": "Häufig gestellte Fragen Klarheit über Planung, Vorgehensweise und Management.",
|
||||
"Einführungsmeeting planen Projekte anzeigen Unverbindliches Gespräch, klares Angebot Wir entwickeln schnelle Websites und Webshops, die Ihr Team selbst pflegen kann.": "Erstgespräch planen · Projekte ansehen · Unverbindliches Gespräch mit klarem Angebot. Wir entwickeln schnelle Websites und Webshops, die Ihr Team selbst pflegen kann.",
|
||||
"Einführungsmeeting planen Dienstleistungen anzeigen Verbindlich und klar Wir entwickeln schnelle Websites und Webshops, die Ihr Team selbst pflegen kann.": "Erstgespräch planen · Dienstleistungen anzeigen · Unverbindliches Gespräch mit klarem Angebot. Wir entwickeln schnelle Websites und Webshops, die Ihr Team selbst pflegen kann.",
|
||||
"Steuern 0,00 € Korb ansehen Kasse Kontakt KONTAKT Lass uns dein Projekt konkret machen Einführungsmeeting planen Dienstleistungen anzeigen So können Sie Kontakt aufnehmen Wählen Sie die Route, die zu Ihrer Frage passt.": "Steuern 0,00 € Korb ansehen Kasse Kontakt KONTAKT Lassen Sie uns Ihr Projekt konkret machen Erstgespräch planen Dienstleistungen anzeigen So können Sie Kontakt aufnehmen Wählen Sie den Weg, der zu Ihrer Frage passt.",
|
||||
"Steuern 0,00 € Korb ansehen Kasse Starter Website PLAN Starter Website Plan Starter-Gespräch planen Alle Dienstleistungen anzeigen Was du bekommst Startseite + Kernseiten Professionelle Basis, die sofort Vertrauen schafft.": "Steuern 0,00 € Korb ansehen Kasse Starter-Website PLAN Starter-Website Starter-Gespräch planen Alle Dienstleistungen anzeigen Was Sie erhalten Startseite + Kernseiten Professionelle Basis, die sofort Vertrauen schafft.",
|
||||
"Steuern 0,00 € Korb ansehen Kasse Business Website PLAN Business Website Plan Beratungsgespräch planen Alle Dienstleistungen anzeigen Was du bekommst Detailliertes Seitenlayout Mehr Platz für Dienstleistungen, Fälle und Lead-Flows.": "Steuern 0,00 € Korb ansehen Kasse Business-Website PLAN Business-Website Beratungsgespräch planen Alle Dienstleistungen anzeigen Was Sie erhalten Detailliertes Seitenlayout Mehr Platz für Dienstleistungen, Referenzen und Lead-Flows.",
|
||||
}
|
||||
|
||||
DE_PHRASE_REPLACEMENTS = {
|
||||
"New": "Neu",
|
||||
"Einführungsmeeting": "Erstgespräch",
|
||||
"Intakegespräch": "Erstgespräch",
|
||||
"SEO-ready basis": "SEO-optimierte Basis",
|
||||
"Sales-ready mit skalierbarem Stack": "Verkaufsbereit mit skalierbarer Architektur",
|
||||
"Continuous Verbesserung": "Kontinuierliche Verbesserung",
|
||||
"Was du bekommst": "Was Sie erhalten",
|
||||
"Starter Website": "Starter-Website",
|
||||
"Business Website": "Business-Website",
|
||||
"Support & Wachstum": "Support & Wachstum",
|
||||
"Lass uns dein Projekt konkret machen": "Lassen Sie uns Ihr Projekt konkret machen",
|
||||
"Wählen Sie die Route, die zu Ihrer Frage passt.": "Wählen Sie den Weg, der zu Ihrer Frage passt.",
|
||||
"Verbindlich und klar": "Unverbindliches Gespräch mit klarem Angebot",
|
||||
"Unverbindliches Gespräch, klares Angebot": "Unverbindliches Gespräch mit klarem Angebot",
|
||||
}
|
||||
|
||||
|
||||
def _apply_boundary_replacements(text: str, replacements: dict[str, str]) -> str:
|
||||
cleaned = text
|
||||
phrase_replacements = {}
|
||||
token_replacements = {}
|
||||
for source, target in replacements.items():
|
||||
if re.fullmatch(r"[\wÀ-ÿ-]+", source, flags=re.UNICODE):
|
||||
token_replacements[source] = target
|
||||
else:
|
||||
phrase_replacements[source] = target
|
||||
|
||||
for source, target in sorted(phrase_replacements.items(), key=lambda item: len(item[0]), reverse=True):
|
||||
cleaned = cleaned.replace(source, target)
|
||||
|
||||
for source, target in sorted(token_replacements.items(), key=lambda item: len(item[0]), reverse=True):
|
||||
pattern = re.compile(rf"(?<![\wÀ-ÿ-]){re.escape(source)}(?![\wÀ-ÿ-])", re.UNICODE)
|
||||
cleaned = pattern.sub(target, cleaned)
|
||||
return cleaned
|
||||
|
||||
|
||||
def normalize_de_text(text: str, field_path: str = "") -> str:
|
||||
cleaned = text
|
||||
for source, target in DE_LINE_REPLACEMENTS.items():
|
||||
if cleaned == source:
|
||||
return target
|
||||
cleaned = _apply_boundary_replacements(cleaned, DE_PHRASE_REPLACEMENTS)
|
||||
return cleaned
|
||||
28
mandelblog_content_guard/normalizers/en.py
Normal file
28
mandelblog_content_guard/normalizers/en.py
Normal file
@@ -0,0 +1,28 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
|
||||
EN_LINE_REPLACEMENTS = {
|
||||
"Service packages (from) Transparent starting points.": "Service packages (from) Clear starting points.",
|
||||
"Frequently Asked Questions Transparent about planning, approach, and management.": "Frequently Asked Questions Clear guidance on planning, approach, and management.",
|
||||
"After your intake Clear scope and steps Clear planning Transparent investment Name * E-mail * Company * Project details Book business call Ready to start with Business Website?": "After your intake Clear scope and steps Clear planning Transparent pricing Name * E-mail * Company * Project details Book business call Ready to start with Business Website?",
|
||||
"After your intake Clear scope and steps Clear planning Transparent investment Name * E-mail * Company * Project details Book starter call Ready to start with Starter Website?": "After your intake Clear scope and steps Clear planning Transparent pricing Name * E-mail * Company * Project details Book starter call Ready to start with Starter Website?",
|
||||
"After your intake Clear scope and steps Clear planning Transparent investment Name * E-mail * Company * Project details Request support plan Ready to start with Support & Growth?": "After your intake Clear scope and steps Clear planning Transparent pricing Name * E-mail * Company * Project details Request support plan Ready to start with Support & Growth?",
|
||||
"After your intake Clear scope and steps Clear planning Transparent investment Name * E-mail * Company * Project details Start webshop project Ready to start with Webshop?": "After your intake Clear scope and steps Clear planning Transparent pricing Name * E-mail * Company * Project details Start webshop project Ready to start with Webshop?",
|
||||
}
|
||||
|
||||
EN_PHRASE_REPLACEMENTS = {
|
||||
"Transparent investment": "Transparent pricing",
|
||||
"Transparent about planning, approach, and management.": "Clear guidance on planning, approach, and management.",
|
||||
"Transparent starting points.": "Clear starting points.",
|
||||
}
|
||||
|
||||
|
||||
def normalize_en_text(text: str, field_path: str = "") -> str:
|
||||
if text in EN_LINE_REPLACEMENTS:
|
||||
return EN_LINE_REPLACEMENTS[text]
|
||||
cleaned = text
|
||||
for source, target in sorted(EN_PHRASE_REPLACEMENTS.items(), key=lambda item: len(item[0]), reverse=True):
|
||||
cleaned = cleaned.replace(source, target)
|
||||
return re.sub(r"\s+", " ", cleaned).strip()
|
||||
31
mandelblog_content_guard/normalizers/es.py
Normal file
31
mandelblog_content_guard/normalizers/es.py
Normal file
@@ -0,0 +1,31 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
|
||||
ES_LINE_REPLACEMENTS = {
|
||||
"Transparente sobre la planificación, el proceso y la gestión.": "Transparencia sobre la planificación, el proceso y la gestión.",
|
||||
"<p>Transparente sobre la planificación, el proceso y la gestión.</p>": "<p>Transparencia sobre la planificación, el proceso y la gestión.</p>",
|
||||
"Preguntas frecuentes Transparente sobre la planificación, el proceso y la gestión.": "Preguntas frecuentes Transparencia sobre la planificación, el proceso y la gestión.",
|
||||
"Preguntas frecuentes Transparenteee sobre la planificación, el proceso y la gestión.": "Preguntas frecuentes Transparencia sobre la planificación, el proceso y la gestión.",
|
||||
"Planificar la reunión inicial Mostrar los proyectos Unverbindliches Gespräch, klares Angebot Construimos sitios web y tiendas online rápidas que tu equipo puede gestionar sin complicaciones.": "Planificar la reunión inicial · Mostrar los proyectos · Conversación sin compromiso con propuesta clara. Construimos sitios web y tiendas online rápidas que tu equipo puede gestionar sin complicaciones.",
|
||||
}
|
||||
|
||||
ES_PHRASE_REPLACEMENTS = {
|
||||
"Transparenteee": "Transparente",
|
||||
"Transparent": "Transparente",
|
||||
"Unverbindliches Gespräch, klares Angebot": "Conversación sin compromiso con propuesta clara",
|
||||
}
|
||||
|
||||
|
||||
def normalize_es_text(text: str, field_path: str = "") -> str:
|
||||
if text in ES_LINE_REPLACEMENTS:
|
||||
return ES_LINE_REPLACEMENTS[text]
|
||||
cleaned = text
|
||||
for source, target in sorted(ES_PHRASE_REPLACEMENTS.items(), key=lambda item: len(item[0]), reverse=True):
|
||||
if re.fullmatch(r"[\wÀ-ÿ-]+", source, flags=re.UNICODE):
|
||||
pattern = re.compile(rf"(?<![\wÀ-ÿ-]){re.escape(source)}(?![\wÀ-ÿ-])", re.UNICODE)
|
||||
cleaned = pattern.sub(target, cleaned)
|
||||
else:
|
||||
cleaned = cleaned.replace(source, target)
|
||||
return re.sub(r"\s+", " ", cleaned).strip()
|
||||
24
mandelblog_content_guard/normalizers/it.py
Normal file
24
mandelblog_content_guard/normalizers/it.py
Normal file
@@ -0,0 +1,24 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
|
||||
IT_LINE_REPLACEMENTS = {
|
||||
"Richiedi un piano di supporto Mostra i progetti Unverbindliches Gespräch, klares Angebot Realizziamo siti web e negozi online veloci che il tuo team può gestire in autonomia.": "Richiedi un piano di supporto · Mostra i progetti · Colloquio senza impegno con proposta chiara. Realizziamo siti web e negozi online veloci che il tuo team può gestire in autonomia.",
|
||||
"Dopo il colloquio iniziale Obiettivi chiari e tappe Planificación clara Transparente Investition Nome * Email * Azienda * Dettagli del progetto Richiedi un piano di supporto Pronto a iniziare con supporto e crescita?": "Dopo il colloquio iniziale Obiettivi chiari e tappe Pianificazione chiara Investimento trasparente Nome * Email * Azienda * Dettagli del progetto Richiedi un piano di supporto Pronto a iniziare con supporto e crescita?",
|
||||
"Mehrsprachiger Rollout-Plan Anpassung & Integrationen Integrazioni API, flussi di lavoro specifici e blocchi personalizzati adattati alla sua azienda.": "Piano di lancio multilingue Personalizzazioni e integrazioni Integrazioni API, flussi di lavoro specifici e blocchi personalizzati adattati alla sua azienda.",
|
||||
}
|
||||
|
||||
IT_PHRASE_REPLACEMENTS = {
|
||||
"Planificación clara": "Pianificazione chiara",
|
||||
"Unverbindliches Gespräch, klares Angebot": "Colloquio senza impegno con proposta chiara",
|
||||
}
|
||||
|
||||
|
||||
def normalize_it_text(text: str, field_path: str = "") -> str:
|
||||
if text in IT_LINE_REPLACEMENTS:
|
||||
return IT_LINE_REPLACEMENTS[text]
|
||||
cleaned = text
|
||||
for source, target in sorted(IT_PHRASE_REPLACEMENTS.items(), key=lambda item: len(item[0]), reverse=True):
|
||||
cleaned = cleaned.replace(source, target)
|
||||
return re.sub(r"\s+", " ", cleaned).strip()
|
||||
15
mandelblog_content_guard/normalizers/nl.py
Normal file
15
mandelblog_content_guard/normalizers/nl.py
Normal file
@@ -0,0 +1,15 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
|
||||
NL_PHRASE_REPLACEMENTS = {
|
||||
"PLAN": "PLAN",
|
||||
}
|
||||
|
||||
|
||||
def normalize_nl_text(text: str, field_path: str = "") -> str:
|
||||
cleaned = text
|
||||
for source, target in NL_PHRASE_REPLACEMENTS.items():
|
||||
cleaned = cleaned.replace(source, target)
|
||||
return re.sub(r"\s+", " ", cleaned).strip()
|
||||
24
mandelblog_content_guard/normalizers/ru.py
Normal file
24
mandelblog_content_guard/normalizers/ru.py
Normal file
@@ -0,0 +1,24 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
|
||||
RU_LINE_REPLACEMENTS = {
|
||||
"План многоязычного запуска Anpassung & Integrationen Интеграции API, специфические рабочие процессы и индивидуальные блоки, адаптированные под вашу компанию.": "План многоязычного запуска Настройка и интеграции Интеграции API, специфические рабочие процессы и индивидуальные блоки, адаптированные под вашу компанию.",
|
||||
"Запланировать звонок по бизнес-сайту Detailliertes Seitenlayout Разделы, ориентированные на конверсию Base prête pour le SEO Boutique en ligne Для проектов с товарами, оплатой и дальнейшим развитием e-commerce.": "Запланировать звонок по бизнес-сайту Детальная структура страниц Разделы, ориентированные на конверсию Основа, готовая для SEO Интернет-магазин Для проектов с товарами, оплатой и дальнейшим развитием e-commerce.",
|
||||
"Связаться с нами Посмотреть проекты Unverbindliches Gespräch, klares Angebot Мы создаём быстрые сайты и интернет-магазины, которыми ваша команда может управлять самостоятельно.": "Связаться с нами · Посмотреть проекты · Без обязательств, понятное предложение. Мы создаём быстрые сайты и интернет-магазины, которыми ваша команда может управлять самостоятельно.",
|
||||
}
|
||||
|
||||
RU_PHRASE_REPLACEMENTS = {
|
||||
"Base prête pour le SEO": "Основа, готовая для SEO",
|
||||
"Unverbindliches Gespräch, klares Angebot": "Без обязательств, понятное предложение",
|
||||
}
|
||||
|
||||
|
||||
def normalize_ru_text(text: str, field_path: str = "") -> str:
|
||||
if text in RU_LINE_REPLACEMENTS:
|
||||
return RU_LINE_REPLACEMENTS[text]
|
||||
cleaned = text
|
||||
for source, target in sorted(RU_PHRASE_REPLACEMENTS.items(), key=lambda item: len(item[0]), reverse=True):
|
||||
cleaned = cleaned.replace(source, target)
|
||||
return re.sub(r"\s+", " ", cleaned).strip()
|
||||
Reference in New Issue
Block a user