Add multilingual audit CI pipeline + extract mandelblog_content_guard
This commit is contained in:
44
Jenkinsfile
vendored
44
Jenkinsfile
vendored
@@ -9,6 +9,10 @@ pipeline {
|
||||
environment {
|
||||
PYENVPIPELINE_VIRTUALENV = '1'
|
||||
GIT_SSH_COMMAND = 'ssh -o StrictHostKeyChecking=accept-new'
|
||||
STAGING_AUDIT_HOST = 'root@49.12.204.96'
|
||||
STAGING_AUDIT_PROJECT_DIR = '/home/www-mandelstudio/mandelstudio'
|
||||
STAGING_AUDIT_MANAGE = '/var/lib/virtualenv/mandelstudio/bin/manage.py'
|
||||
STAGING_AUDIT_SSH_CREDENTIALS_ID = 'staging-root-ssh'
|
||||
}
|
||||
|
||||
stages {
|
||||
@@ -74,7 +78,7 @@ pipeline {
|
||||
steps {
|
||||
sh '''
|
||||
. .venv/bin/activate
|
||||
python -m compileall -q setup.py mandelstudio
|
||||
python -m compileall -q setup.py mandelstudio mandelblog_content_guard
|
||||
'''
|
||||
}
|
||||
post {
|
||||
@@ -86,6 +90,40 @@ pipeline {
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Deploy Staging') {
|
||||
steps {
|
||||
echo 'Triggering staging deploy for mandelstudio after successful CI build.'
|
||||
build job: 'deploy-project-stg',
|
||||
wait: true,
|
||||
propagate: true,
|
||||
parameters: [string(name: 'PROJECT_NAME', value: 'mandelstudio')]
|
||||
}
|
||||
}
|
||||
stage('Post-Deploy Multilingual Audit') {
|
||||
options {
|
||||
timeout(time: 10, unit: 'MINUTES')
|
||||
}
|
||||
steps {
|
||||
sh 'mkdir -p artifacts'
|
||||
withCredentials([sshUserPrivateKey(credentialsId: env.STAGING_AUDIT_SSH_CREDENTIALS_ID, keyFileVariable: 'STAGING_SSH_KEYFILE')]) {
|
||||
sh './scripts/run_remote_multilingual_audit.sh'
|
||||
}
|
||||
script {
|
||||
int status = sh(script: 'python3 scripts/multilingual_audit_ci.py --json artifacts/multilingual-audit.json', returnStatus: true)
|
||||
if (status == 2) {
|
||||
error('Block-level multilingual issues detected or audit execution failed.')
|
||||
}
|
||||
if (status == 1) {
|
||||
unstable('Warn-level multilingual issues detected.')
|
||||
}
|
||||
}
|
||||
}
|
||||
post {
|
||||
always {
|
||||
archiveArtifacts artifacts: 'artifacts/multilingual-audit.json', onlyIfSuccessful: false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
post {
|
||||
always {
|
||||
@@ -97,10 +135,6 @@ pipeline {
|
||||
. .venv/bin/activate
|
||||
pip install coverage
|
||||
'''
|
||||
echo 'Triggering staging deploy for mandelstudio after successful CI build.'
|
||||
build job: 'deploy-project-stg',
|
||||
wait: false,
|
||||
parameters: [string(name: 'PROJECT_NAME', value: 'mandelstudio')]
|
||||
}
|
||||
failure {
|
||||
emailext subject: "JENKINS-NOTIFICATION: ${currentBuild.currentResult}: Job '${env.JOB_NAME} #${env.BUILD_NUMBER}'",
|
||||
|
||||
62
Jenkinsfile.multilingual-nightly
Normal file
62
Jenkinsfile.multilingual-nightly
Normal file
@@ -0,0 +1,62 @@
|
||||
#!/usr/bin/env groovy
|
||||
|
||||
pipeline {
|
||||
agent { label 'external_pool' }
|
||||
triggers {
|
||||
cron('H 2 * * *')
|
||||
}
|
||||
options {
|
||||
disableConcurrentBuilds()
|
||||
skipDefaultCheckout(true)
|
||||
}
|
||||
environment {
|
||||
STAGING_AUDIT_HOST = 'root@49.12.204.96'
|
||||
STAGING_AUDIT_PROJECT_DIR = '/home/www-mandelstudio/mandelstudio'
|
||||
STAGING_AUDIT_MANAGE = '/var/lib/virtualenv/mandelstudio/bin/manage.py'
|
||||
STAGING_AUDIT_SSH_CREDENTIALS_ID = 'staging-root-ssh'
|
||||
}
|
||||
stages {
|
||||
stage('Checkout') {
|
||||
steps {
|
||||
withCredentials([sshUserPrivateKey(credentialsId: 'gitea-ssh', keyFileVariable: 'GIT_KEYFILE')]) {
|
||||
sh '''
|
||||
export GIT_SSH_COMMAND="ssh -i $GIT_KEYFILE -o StrictHostKeyChecking=accept-new"
|
||||
if [ -d .git ]; then
|
||||
git remote set-url origin ssh://git@git.mandelblog.com:2222/salt/mandelstudio.git
|
||||
git fetch --tags --force --progress origin +refs/heads/master:refs/remotes/origin/master
|
||||
else
|
||||
git clone ssh://git@git.mandelblog.com:2222/salt/mandelstudio.git .
|
||||
git fetch --tags --force --progress origin +refs/heads/master:refs/remotes/origin/master
|
||||
fi
|
||||
git checkout -f refs/remotes/origin/master
|
||||
'''
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Nightly Multilingual Audit') {
|
||||
options {
|
||||
timeout(time: 10, unit: 'MINUTES')
|
||||
}
|
||||
steps {
|
||||
sh 'mkdir -p artifacts && [ -f artifacts/multilingual-audit.json ] && cp artifacts/multilingual-audit.json artifacts/previous-multilingual-audit.json || true'
|
||||
withCredentials([sshUserPrivateKey(credentialsId: env.STAGING_AUDIT_SSH_CREDENTIALS_ID, keyFileVariable: 'STAGING_SSH_KEYFILE')]) {
|
||||
sh './scripts/run_remote_multilingual_audit.sh'
|
||||
}
|
||||
script {
|
||||
int status = sh(script: 'python3 scripts/multilingual_audit_ci.py --json artifacts/multilingual-audit.json --previous-json artifacts/previous-multilingual-audit.json', returnStatus: true)
|
||||
if (status == 2) {
|
||||
error('Block-level multilingual issues detected or audit execution failed.')
|
||||
}
|
||||
if (status == 1) {
|
||||
unstable('Warn-level multilingual issues detected.')
|
||||
}
|
||||
}
|
||||
}
|
||||
post {
|
||||
always {
|
||||
archiveArtifacts artifacts: 'artifacts/multilingual-audit.json,artifacts/previous-multilingual-audit.json', onlyIfSuccessful: false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
1
mandelblog_content_guard/__init__.py
Normal file
1
mandelblog_content_guard/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
default_app_config = "mandelblog_content_guard.apps.MandelblogContentGuardConfig"
|
||||
25
mandelblog_content_guard/agents/__init__.py
Normal file
25
mandelblog_content_guard/agents/__init__.py
Normal file
@@ -0,0 +1,25 @@
|
||||
from .base import BaseLanguageAgent
|
||||
from .de import GermanAgent
|
||||
from .en import EnglishAgent
|
||||
from .es import SpanishAgent
|
||||
from .fr import FrenchAgent
|
||||
from .it import ItalianAgent
|
||||
from .nl import DutchAgent
|
||||
from .pt import PortugueseAgent
|
||||
from .ru import RussianAgent
|
||||
|
||||
AGENT_REGISTRY = {
|
||||
"nl": DutchAgent,
|
||||
"en": EnglishAgent,
|
||||
"de": GermanAgent,
|
||||
"fr": FrenchAgent,
|
||||
"es": SpanishAgent,
|
||||
"it": ItalianAgent,
|
||||
"pt": PortugueseAgent,
|
||||
"ru": RussianAgent,
|
||||
}
|
||||
|
||||
|
||||
def get_language_agent(locale_code: str) -> BaseLanguageAgent:
|
||||
agent_class = AGENT_REGISTRY.get(locale_code, BaseLanguageAgent)
|
||||
return agent_class()
|
||||
187
mandelblog_content_guard/agents/base.py
Normal file
187
mandelblog_content_guard/agents/base.py
Normal file
@@ -0,0 +1,187 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from collections import defaultdict
|
||||
from typing import Any
|
||||
|
||||
from django.utils.module_loading import import_string
|
||||
|
||||
from ..settings import get_rewrite_backend
|
||||
|
||||
|
||||
class BaseLanguageAgent:
|
||||
locale = "nl"
|
||||
tone = "business"
|
||||
preferred_formality = "neutral"
|
||||
cta_defaults: dict[str, str] = {}
|
||||
vocabulary_map: dict[str, str] = {}
|
||||
contextual_vocabulary_map: dict[str, dict[str, str]] = {}
|
||||
cleanup_patterns: tuple[tuple[re.Pattern[str], str], ...] = (
|
||||
(
|
||||
re.compile(
|
||||
r"""^.*?\bis\s+(?:German|Spanish|French|Italian|Portuguese|Dutch),\s+not\s+Dutch.*?(?::\s*|\"\.\s*)(?P<quote>.+?)\"?\.?\s*$""",
|
||||
re.IGNORECASE,
|
||||
),
|
||||
"{quote}",
|
||||
),
|
||||
(
|
||||
re.compile(
|
||||
r"""^.*?\btranslation\s+from\s+.*?(?::\s*|\"\.\s*)(?P<quote>.+?)\"?\.?\s*$""",
|
||||
re.IGNORECASE,
|
||||
),
|
||||
"{quote}",
|
||||
),
|
||||
(
|
||||
re.compile(
|
||||
r"""^.*?\btraducid[oa]\s+al\s+.*?(?::\s*|\"\.\s*)(?P<quote>.+?)\"?\.?\s*$""",
|
||||
re.IGNORECASE,
|
||||
),
|
||||
"{quote}",
|
||||
),
|
||||
(
|
||||
re.compile(
|
||||
r"""^.*?\bперевод\s+с\s+.*?(?::\s*|\"\.\s*)(?P<quote>.+?)\"?\.?\s*$""",
|
||||
re.IGNORECASE,
|
||||
),
|
||||
"{quote}",
|
||||
),
|
||||
(
|
||||
re.compile(
|
||||
r"""^\s*La\s+entrada\s+\"?(?P<quote>.+?)\"?\s+está\s+en\s+alemán.*$""",
|
||||
re.IGNORECASE,
|
||||
),
|
||||
"{quote}",
|
||||
),
|
||||
)
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.backend = self._load_backend()
|
||||
|
||||
def _load_backend(self):
|
||||
backend_path = get_rewrite_backend()
|
||||
if not backend_path:
|
||||
return None
|
||||
return import_string(backend_path)
|
||||
|
||||
def backend_prompt(self, field_path: str, text: str) -> str:
|
||||
return (
|
||||
f"Rewrite the following {self.locale} website copy for a small-business "
|
||||
f"website in a natural, professional, sales-driven tone. Preserve meaning, "
|
||||
f"remove translation artifacts, keep it concise, and do not add commentary.\n"
|
||||
f"Field: {field_path}\n"
|
||||
f"Locale: {self.locale}\n"
|
||||
f"Tone: {self.tone}\n"
|
||||
f"Formality: {self.preferred_formality}\n"
|
||||
f"Text: {text}"
|
||||
)
|
||||
|
||||
def _contextual_replacements(self, field_path: str) -> dict[str, str]:
|
||||
lowered = field_path.lower()
|
||||
replacements: dict[str, str] = {}
|
||||
for token, mapping in self.contextual_vocabulary_map.items():
|
||||
if token in lowered:
|
||||
replacements.update(mapping)
|
||||
return replacements
|
||||
|
||||
def post_cleanup_text(self, text: str, field_path: str = "") -> str:
|
||||
return text
|
||||
|
||||
def _apply_replacements(self, text: str, replacements: dict[str, str]) -> str:
|
||||
cleaned = text
|
||||
phrase_replacements = {}
|
||||
token_replacements = {}
|
||||
for source, target in replacements.items():
|
||||
if not source:
|
||||
continue
|
||||
if re.fullmatch(r"[\wÀ-ÿ-]+", source, flags=re.UNICODE):
|
||||
token_replacements[source] = target
|
||||
else:
|
||||
phrase_replacements[source] = target
|
||||
|
||||
for source, target in sorted(phrase_replacements.items(), key=lambda item: len(item[0]), reverse=True):
|
||||
cleaned = cleaned.replace(source, target)
|
||||
|
||||
for source, target in sorted(token_replacements.items(), key=lambda item: len(item[0]), reverse=True):
|
||||
pattern = re.compile(rf"(?<![\wÀ-ÿ-]){re.escape(source)}(?![\wÀ-ÿ-])", re.UNICODE)
|
||||
cleaned = pattern.sub(target, cleaned)
|
||||
return cleaned
|
||||
|
||||
def cleanup_text(self, text: str, field_path: str = "") -> str:
|
||||
cleaned = text.strip()
|
||||
for pattern, replacement in self.cleanup_patterns:
|
||||
match = pattern.match(cleaned)
|
||||
if not match:
|
||||
continue
|
||||
cleaned = replacement.format(**match.groupdict()).strip()
|
||||
cleaned = self._apply_replacements(cleaned, self.vocabulary_map)
|
||||
cleaned = self._apply_replacements(cleaned, self._contextual_replacements(field_path))
|
||||
cleaned = self.post_cleanup_text(cleaned, field_path=field_path)
|
||||
return re.sub(r"\s+", " ", cleaned).strip()
|
||||
|
||||
def normalize_cta(self, text: str, field_path: str = "") -> str:
|
||||
normalized = self.cleanup_text(text, field_path=field_path)
|
||||
lowered = normalized.lower()
|
||||
for keyword, replacement in self.cta_defaults.items():
|
||||
if keyword in lowered:
|
||||
return replacement
|
||||
return normalized
|
||||
|
||||
def rewrite(self, text: str, field_path: str = "", issues: list[Any] | None = None) -> str:
|
||||
cleaned = self.cleanup_text(text, field_path=field_path)
|
||||
lowered_path = field_path.lower()
|
||||
if any(token in lowered_path for token in ("cta", "button", "link_text", "submit")):
|
||||
cleaned = self.normalize_cta(cleaned, field_path=field_path)
|
||||
elif issues and any(
|
||||
issue.issue_type in {"generic_badge_label", "foreign_ui_label", "weak_marketing_copy", "mixed_locale_heading"}
|
||||
for issue in issues
|
||||
):
|
||||
cleaned = self.cleanup_text(cleaned, field_path=field_path)
|
||||
if self.backend:
|
||||
rewritten = self.backend(
|
||||
locale=self.locale,
|
||||
field_path=field_path,
|
||||
text=cleaned,
|
||||
prompt=self.backend_prompt(field_path, cleaned),
|
||||
)
|
||||
if isinstance(rewritten, str) and rewritten.strip():
|
||||
cleaned = rewritten.strip()
|
||||
return cleaned
|
||||
|
||||
def process_block(self, block_data: Any, field_path: str = "", issue_map: dict[str, list[Any]] | None = None):
|
||||
issue_map = issue_map or {}
|
||||
if isinstance(block_data, dict):
|
||||
changed = False
|
||||
output = {}
|
||||
for key, value in block_data.items():
|
||||
child_path = f"{field_path}.{key}" if field_path else str(key)
|
||||
new_value, child_changed = self.process_block(value, child_path, issue_map)
|
||||
output[key] = new_value
|
||||
changed = changed or child_changed
|
||||
return output, changed
|
||||
if isinstance(block_data, list):
|
||||
changed = False
|
||||
output = []
|
||||
for index, value in enumerate(block_data):
|
||||
child_path = f"{field_path}[{index}]"
|
||||
new_value, child_changed = self.process_block(value, child_path, issue_map)
|
||||
output.append(new_value)
|
||||
changed = changed or child_changed
|
||||
return output, changed
|
||||
if isinstance(block_data, str):
|
||||
issues = issue_map.get(field_path, [])
|
||||
needs_rewrite = bool(issues) or any(
|
||||
token in field_path for token in ("cta", "button", "label", "placeholder", "help_text")
|
||||
)
|
||||
if not needs_rewrite:
|
||||
cleaned = self.cleanup_text(block_data)
|
||||
return cleaned, cleaned != block_data
|
||||
rewritten = self.rewrite(block_data, field_path=field_path, issues=issues)
|
||||
return rewritten, rewritten != block_data
|
||||
return block_data, False
|
||||
|
||||
def build_issue_map(self, issues: list[Any]) -> dict[str, list[Any]]:
|
||||
issue_map: dict[str, list[Any]] = defaultdict(list)
|
||||
for issue in issues:
|
||||
if issue.field_path:
|
||||
issue_map[issue.field_path].append(issue)
|
||||
return issue_map
|
||||
23
mandelblog_content_guard/agents/de.py
Normal file
23
mandelblog_content_guard/agents/de.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from .base import BaseLanguageAgent
|
||||
from ..normalizers import normalize_de_text
|
||||
from ..system_strings import build_system_vocabulary
|
||||
|
||||
|
||||
class GermanAgent(BaseLanguageAgent):
|
||||
locale = "de"
|
||||
tone = "professional and trustworthy"
|
||||
preferred_formality = "formal Sie"
|
||||
vocabulary_map = {
|
||||
**build_system_vocabulary("de", ("transparent_investment",)),
|
||||
}
|
||||
cta_defaults = {
|
||||
"starter": "Starter-Gespräch planen",
|
||||
"business": "Beratungsgespräch planen",
|
||||
"support": "Support anfragen",
|
||||
"service": "Dienstleistungen anzeigen",
|
||||
"project": "Projekt starten",
|
||||
"kontakt": "Einführungsgespräch planen",
|
||||
}
|
||||
|
||||
def post_cleanup_text(self, text: str, field_path: str = "") -> str:
|
||||
return normalize_de_text(text, field_path=field_path)
|
||||
34
mandelblog_content_guard/agents/en.py
Normal file
34
mandelblog_content_guard/agents/en.py
Normal file
@@ -0,0 +1,34 @@
|
||||
from .base import BaseLanguageAgent
|
||||
from ..normalizers import normalize_en_text
|
||||
from ..system_strings import build_contextual_system_vocabulary, build_system_vocabulary
|
||||
|
||||
|
||||
class EnglishAgent(BaseLanguageAgent):
|
||||
locale = "en"
|
||||
tone = "business-friendly and direct"
|
||||
preferred_formality = "neutral"
|
||||
vocabulary_map = {
|
||||
**build_system_vocabulary("en", ("plan_badge", "services_badge", "transparent_label", "transparent_investment")),
|
||||
}
|
||||
_system_contextual = build_contextual_system_vocabulary("en", ("plan_badge", "services_badge", "transparent_label"))
|
||||
contextual_vocabulary_map = {
|
||||
"badge": {**_system_contextual.get("badge", {})},
|
||||
"label": {**_system_contextual.get("label", {})},
|
||||
"metric": {**_system_contextual.get("metric", {})},
|
||||
"stat": {**_system_contextual.get("stat", {})},
|
||||
"title": {**_system_contextual.get("title", {})},
|
||||
"heading": {**_system_contextual.get("heading", {})},
|
||||
"rendered": {**_system_contextual.get("rendered", {})},
|
||||
}
|
||||
cta_defaults = {
|
||||
"starter": "Book starter call",
|
||||
"business": "Book business call",
|
||||
"support": "View support",
|
||||
"service": "View services",
|
||||
"project": "Start your project",
|
||||
"quote": "Request a quote",
|
||||
"contact": "Book intro call",
|
||||
}
|
||||
|
||||
def post_cleanup_text(self, text: str, field_path: str = "") -> str:
|
||||
return normalize_en_text(text, field_path=field_path)
|
||||
43
mandelblog_content_guard/agents/es.py
Normal file
43
mandelblog_content_guard/agents/es.py
Normal file
@@ -0,0 +1,43 @@
|
||||
from .base import BaseLanguageAgent
|
||||
from ..normalizers import normalize_es_text
|
||||
from ..system_strings import build_contextual_system_vocabulary, build_system_vocabulary
|
||||
|
||||
|
||||
class SpanishAgent(BaseLanguageAgent):
|
||||
locale = "es"
|
||||
tone = "clear and business-focused"
|
||||
preferred_formality = "formal"
|
||||
vocabulary_map = {
|
||||
**build_system_vocabulary(
|
||||
"es",
|
||||
(
|
||||
"plan_badge",
|
||||
"response_time",
|
||||
"without_commitment",
|
||||
"transparent_label",
|
||||
"transparent_investment",
|
||||
),
|
||||
),
|
||||
}
|
||||
_system_contextual = build_contextual_system_vocabulary("es", ("plan_badge", "transparent_label"))
|
||||
contextual_vocabulary_map = {
|
||||
"badge": {**_system_contextual.get("badge", {})},
|
||||
"label": {**_system_contextual.get("label", {})},
|
||||
"metric": {**_system_contextual.get("metric", {})},
|
||||
"stat": {**_system_contextual.get("stat", {})},
|
||||
"title": {**_system_contextual.get("title", {})},
|
||||
"heading": {**_system_contextual.get("heading", {})},
|
||||
"rendered": {**_system_contextual.get("rendered", {})},
|
||||
}
|
||||
cta_defaults = {
|
||||
"starter": "Reservar llamada inicial",
|
||||
"business": "Reservar llamada comercial",
|
||||
"support": "Solicitar soporte",
|
||||
"service": "Mostrar los servicios",
|
||||
"project": "Inicia tu proyecto",
|
||||
"quote": "Solicitar propuesta",
|
||||
"contact": "Planificar la reunión inicial",
|
||||
}
|
||||
|
||||
def post_cleanup_text(self, text: str, field_path: str = "") -> str:
|
||||
return normalize_es_text(text, field_path=field_path)
|
||||
66
mandelblog_content_guard/agents/fr.py
Normal file
66
mandelblog_content_guard/agents/fr.py
Normal file
@@ -0,0 +1,66 @@
|
||||
from .base import BaseLanguageAgent
|
||||
from ..system_strings import build_contextual_system_vocabulary, build_system_vocabulary
|
||||
|
||||
|
||||
class FrenchAgent(BaseLanguageAgent):
|
||||
locale = "fr"
|
||||
tone = "professional and commercial"
|
||||
preferred_formality = "formal"
|
||||
cta_defaults = {
|
||||
"starter": "Planifier l’entretien de départ",
|
||||
"business": "Planifier l’entretien commercial",
|
||||
"support": "Voir le support",
|
||||
"service": "Afficher les services",
|
||||
"project": "Lancez votre projet",
|
||||
"devis": "Demander un devis",
|
||||
"contact": "Planifier l’échange",
|
||||
}
|
||||
vocabulary_map = {
|
||||
**build_system_vocabulary("fr"),
|
||||
"SERVICES": "PRESTATIONS",
|
||||
"New": "Nouveau",
|
||||
"Popular": "Populaire",
|
||||
"Erstes Produktionsprojekt erfolgreich abgeschlossen.": "Premier projet de production livré avec succès.",
|
||||
"Von Kickoff bis zum Launch mit einem klaren Umfang.": "Du cadrage au lancement avec un périmètre clair.",
|
||||
"Demande d'admission initiale": "Planifier un échange initial",
|
||||
"Geschäftsprozess besprechen": "Échanger sur votre processus métier",
|
||||
"Entretien d'accueil": "Entretien initial",
|
||||
"Vraag over diensten": "Question sur les services",
|
||||
"Konkrete erste Schätzung": "Première estimation concrète",
|
||||
"Ansatz, der zu Ihrem Budget passt": "Approche adaptée à votre budget",
|
||||
"Detailliertes Seitenlayout": "Structure détaillée des pages",
|
||||
"Investition": "investissement",
|
||||
"Unverbindliches Gespräch, klares Angebot": "Sans engagement, offre claire",
|
||||
"Bereit, mit der Business-Website zu starten?": "Prêt à démarrer votre site d’entreprise ?",
|
||||
"Planifier un échange business": "Planifier un échange commercial",
|
||||
"Aucune carte bancaire requise": "Sans engagement",
|
||||
}
|
||||
_system_contextual = build_contextual_system_vocabulary("fr")
|
||||
contextual_vocabulary_map = {
|
||||
"badge": {
|
||||
**_system_contextual.get("badge", {}),
|
||||
"Popular": "Le plus demandé",
|
||||
},
|
||||
"label": {
|
||||
**_system_contextual.get("label", {}),
|
||||
"Popular": "Le plus demandé",
|
||||
},
|
||||
"metric": {
|
||||
**_system_contextual.get("metric", {}),
|
||||
},
|
||||
"stat": {
|
||||
**_system_contextual.get("stat", {}),
|
||||
},
|
||||
"title": {
|
||||
**_system_contextual.get("title", {}),
|
||||
"SERVICES": "PRESTATIONS",
|
||||
},
|
||||
"heading": {
|
||||
**_system_contextual.get("heading", {}),
|
||||
"SERVICES": "PRESTATIONS",
|
||||
},
|
||||
"rendered": {
|
||||
**_system_contextual.get("rendered", {}),
|
||||
"SERVICES": "PRESTATIONS",
|
||||
},
|
||||
}
|
||||
42
mandelblog_content_guard/agents/it.py
Normal file
42
mandelblog_content_guard/agents/it.py
Normal file
@@ -0,0 +1,42 @@
|
||||
from .base import BaseLanguageAgent
|
||||
from ..normalizers import normalize_it_text
|
||||
from ..system_strings import build_contextual_system_vocabulary, build_system_vocabulary
|
||||
|
||||
|
||||
class ItalianAgent(BaseLanguageAgent):
|
||||
locale = "it"
|
||||
tone = "professional and approachable"
|
||||
preferred_formality = "polite"
|
||||
vocabulary_map = {
|
||||
**build_system_vocabulary(
|
||||
"it",
|
||||
(
|
||||
"weeks_1_2",
|
||||
"without_commitment",
|
||||
"transparent_label",
|
||||
"transparent_investment",
|
||||
"customization_integrations",
|
||||
"multilingual_rollout",
|
||||
),
|
||||
),
|
||||
}
|
||||
_system_contextual = build_contextual_system_vocabulary("it", ("transparent_label",))
|
||||
contextual_vocabulary_map = {
|
||||
"badge": {**_system_contextual.get("badge", {})},
|
||||
"label": {**_system_contextual.get("label", {})},
|
||||
"metric": {**_system_contextual.get("metric", {})},
|
||||
"stat": {**_system_contextual.get("stat", {})},
|
||||
"rendered": {**_system_contextual.get("rendered", {})},
|
||||
}
|
||||
cta_defaults = {
|
||||
"starter": "Prenota una call iniziale",
|
||||
"business": "Pianifica la call business",
|
||||
"support": "Richiedi supporto",
|
||||
"service": "Mostra i servizi",
|
||||
"project": "Avvia il tuo progetto",
|
||||
"quote": "Richiedi una proposta",
|
||||
"contact": "Pianifica la riunione introduttiva",
|
||||
}
|
||||
|
||||
def post_cleanup_text(self, text: str, field_path: str = "") -> str:
|
||||
return normalize_it_text(text, field_path=field_path)
|
||||
20
mandelblog_content_guard/agents/nl.py
Normal file
20
mandelblog_content_guard/agents/nl.py
Normal file
@@ -0,0 +1,20 @@
|
||||
from .base import BaseLanguageAgent
|
||||
from ..normalizers import normalize_nl_text
|
||||
|
||||
|
||||
class DutchAgent(BaseLanguageAgent):
|
||||
locale = "nl"
|
||||
tone = "zakelijk en duidelijk"
|
||||
preferred_formality = "je/jij professioneel"
|
||||
cta_defaults = {
|
||||
"starter": "Plan startergesprek",
|
||||
"business": "Plan zakelijk gesprek",
|
||||
"support": "Bekijk support",
|
||||
"service": "Bekijk diensten",
|
||||
"project": "Start jouw project",
|
||||
"contact": "Plan kennismaking",
|
||||
"offerte": "Vraag voorstel aan",
|
||||
}
|
||||
|
||||
def post_cleanup_text(self, text: str, field_path: str = "") -> str:
|
||||
return normalize_nl_text(text, field_path=field_path)
|
||||
111
mandelblog_content_guard/agents/pt.py
Normal file
111
mandelblog_content_guard/agents/pt.py
Normal file
@@ -0,0 +1,111 @@
|
||||
from .base import BaseLanguageAgent
|
||||
from ..system_strings import build_contextual_system_vocabulary, build_system_vocabulary
|
||||
|
||||
|
||||
class PortugueseAgent(BaseLanguageAgent):
|
||||
locale = "pt"
|
||||
tone = "business-focused and practical"
|
||||
preferred_formality = "neutral"
|
||||
cta_defaults = {
|
||||
"starter": "Agendar chamada inicial",
|
||||
"business": "Agendar chamada comercial",
|
||||
"support": "Ver suporte",
|
||||
"service": "Ver serviços",
|
||||
"project": "Iniciar o seu projeto",
|
||||
"proposta": "Pedir proposta",
|
||||
"contact": "Agendar reunião introdutória",
|
||||
}
|
||||
vocabulary_map = {
|
||||
**build_system_vocabulary("pt"),
|
||||
"SERVICES": "SERVIÇOS",
|
||||
"New": "Novo",
|
||||
"Popular": "Em destaque",
|
||||
"Siti web e negozi online": "Sites e lojas online",
|
||||
"Siti web e negozi online che sono rapidamente online e facili da gestire": "Sites e lojas online que ficam no ar rapidamente e são fáceis de gerir",
|
||||
"Caso de cliente en directo": "Caso real de cliente",
|
||||
"El primer proyecto de producción finalizado con éxito.": "O primeiro projeto de produção foi concluído com sucesso.",
|
||||
"Más sobre el proceso": "Mais sobre o processo",
|
||||
"Modifiez simplement vous-même.": "Edite facilmente por conta própria.",
|
||||
"Opciones de la tienda web Mantenimiento y soporte Suporte mensal opcional para atualizações e estabilidade.": "Opções da loja online Manutenção e suporte Suporte mensal opcional para atualizações e estabilidade.",
|
||||
"Opciones de la tienda web": "Opções da loja online",
|
||||
"Planes de soporte": "Planos de suporte",
|
||||
"Multilingüe": "Multilingue",
|
||||
"Suivi + corrections": "Acompanhamento e correções",
|
||||
"Mejoras mensuales": "Melhorias mensais",
|
||||
"¿A qué velocidad puede comenzar?": "Com que rapidez podem começar?",
|
||||
"¿Puedo editar textos e imágenes yo mismo?": "Posso editar textos e imagens por conta própria?",
|
||||
"Einzelhandelsunternehmer": "Comerciante",
|
||||
"lifestyle": "estilo de vida",
|
||||
"À partir de 3 750 €": "A partir de 3.750 €",
|
||||
"Transparente sobre o planejamento, o processo e a gestão.": "Clareza sobre o planeamento, o processo e a gestão.",
|
||||
"Einzelhandelsinhaber Petite boutique en ligne Forfaits de services (à partir de) Pontos de partida transparentes.": "Comerciantes Pequena loja online Pacotes de serviço (a partir de) Pontos de partida claros.",
|
||||
"Unsere Serviços": "Os nossos serviços",
|
||||
"Unsere Serviços: vom schnellen Start bis zu skalierbarem Wachstum": "Os nossos serviços: do lançamento rápido ao crescimento escalável",
|
||||
"Elija el camino": "Escolha o caminho certo",
|
||||
"Elija el camino que corresponda a su fase: sitio de inicio, sitio empresarial, tienda en línea o soporte continuo.": "Escolha o caminho certo para a sua fase: site inicial, site empresarial, loja online ou suporte contínuo.",
|
||||
"Début en direct": "Lançamento rápido",
|
||||
"Demande d'admission initiale": "Agendar conversa inicial",
|
||||
"Site Web d'Entreprise": "Site empresarial",
|
||||
"Hablar sobre el proceso empresarial": "Falar sobre o processo do negócio",
|
||||
"Mise en place de boutique en ligne": "Implementação de loja online",
|
||||
"Maintenance & gestion": "Manutenção e gestão",
|
||||
"Afficher le plan de soutien": "Ver suporte",
|
||||
"Introducción multilingüe": "Lançamento multilingue",
|
||||
"Forfaits de services (à partir de)": "Pacotes de serviço (a partir de)",
|
||||
"Schnell online mit einer starken Basis": "Rápido online com uma base sólida",
|
||||
"Startseite + Kernseiten": "Página inicial + páginas essenciais",
|
||||
"Optimizado para móviles": "Otimizado para mobile",
|
||||
"Gestisca lei stesso il contenuto": "Gerir o conteúdo com autonomia",
|
||||
"Detailliertes Seitenlayout": "Estrutura detalhada das páginas",
|
||||
"Unverbindliches Gespräch, klares Angebot": "Sem compromisso, proposta clara",
|
||||
"Mehr Struktur und Konversion": "Mais estrutura e foco em conversão",
|
||||
"Sections axées sur la conversion": "Secções orientadas para conversão",
|
||||
"Base prête pour le SEO": "Base pronta para SEO",
|
||||
"Katalog + Kasse": "Catálogo + checkout",
|
||||
"Zahlungen und Auftragsfluss": "Pagamentos e fluxo de encomendas",
|
||||
"Wachstumsbereite Grundlage": "Base pronta para crescimento",
|
||||
"Soporte y crecimiento": "Suporte e crescimento",
|
||||
"Amélioration continue": "Melhoria contínua",
|
||||
"Desde 149 € al mes.": "Desde 149 € por mês.",
|
||||
"Ab 2.250 €": "A partir de 2.250 €",
|
||||
"Boutique en ligne": "Loja online",
|
||||
"Sales-ready mit skalierbarem Stack": "Preparada para vender com uma base escalável",
|
||||
"Agendar conversa sobre o serviço Ver resultados do projeto 1-2 Wochen Début en direct 4.9/5 Kundenschätzung 100% Bearbeitbar Visão geral dos serviços Cada serviço é projetado para melhorar a faturação, a confiança e a controlabilidade.": "Agendar conversa sobre o serviço Ver resultados do projeto 1 a 2 semanas Lançamento rápido 4.9/5 Avaliação dos clientes 100% Editável Visão geral dos serviços Cada serviço foi concebido para aumentar a faturação, reforçar a confiança e dar mais controlo à sua equipa.",
|
||||
"Site inicial Schnell online mit einer starken Basis A partir de 1.250 € Agendar chamada inicial Startseite + Kernseiten Optimizado para móviles Gestisca lei stesso il contenuto Recomendado Site Web d'Entreprise Mehr Struktur und Konversion Ab 2.250 € Agendar chamada comercial Detailliertes Seitenlayout Sections axées sur la conversion Base prête pour le SEO Boutique en ligne Sales-ready mit skalierbarem Stack À partir de 3 750 € Iniciar o processo da loja online Katalog + Kasse Zahlungen und Auftragsfluss Wachstumsbereite Grundlage Soporte y crecimiento Amélioration continue Desde 149 € al mes.": "Site inicial Rápido online com uma base sólida A partir de 1.250 € Agendar chamada inicial Página inicial + páginas essenciais Otimizado para mobile Gerir o conteúdo com autonomia Recomendado Site empresarial Mais estrutura e foco em conversão A partir de 2.250 € Agendar chamada comercial Estrutura detalhada das páginas Secções orientadas para conversão Base pronta para SEO Loja online Preparada para vender com uma base escalável A partir de 3.750 € Iniciar o processo da loja online Catálogo + checkout Pagamentos e fluxo de encomendas Base pronta para crescimento Suporte e crescimento Melhoria contínua Desde 149 € por mês.",
|
||||
"Perguntas frequentes Transparente sobre o planejamento, o processo e a gestão.": "Perguntas frequentes Clareza sobre o planeamento, o processo e a gestão.",
|
||||
'Ver serviços New La entrada "Unterstützung oder Erweiterung" está en alemán, no en neerlandés.': "Ver serviços Novo Suporte ou expansão",
|
||||
"Unterstützung oder Erweiterung": "Suporte ou expansão",
|
||||
'La entrada "Unterstützung oder Erweiterung"': "Suporte ou expansão",
|
||||
'La entrada "Unterstützung oder Erweiterung" está en alemán, no en neerlandés. Traducido al francés, es: "Suporte ou expansão".': "Suporte ou expansão",
|
||||
"Sem cartão de crédito": "Sem compromisso",
|
||||
}
|
||||
_system_contextual = build_contextual_system_vocabulary("pt")
|
||||
contextual_vocabulary_map = {
|
||||
"badge": {
|
||||
**_system_contextual.get("badge", {}),
|
||||
"Popular": "Escolha frequente",
|
||||
},
|
||||
"label": {
|
||||
**_system_contextual.get("label", {}),
|
||||
"Popular": "Escolha frequente",
|
||||
},
|
||||
"metric": {
|
||||
**_system_contextual.get("metric", {}),
|
||||
},
|
||||
"stat": {
|
||||
**_system_contextual.get("stat", {}),
|
||||
},
|
||||
"title": {
|
||||
"SERVICES": "SERVIÇOS",
|
||||
"Popular": "Em destaque",
|
||||
},
|
||||
"heading": {
|
||||
"SERVICES": "SERVIÇOS",
|
||||
"Popular": "Em destaque",
|
||||
},
|
||||
"rendered": {
|
||||
**_system_contextual.get("rendered", {}),
|
||||
"SERVICES": "SERVIÇOS",
|
||||
"Popular": "Em destaque",
|
||||
},
|
||||
}
|
||||
39
mandelblog_content_guard/agents/ru.py
Normal file
39
mandelblog_content_guard/agents/ru.py
Normal file
@@ -0,0 +1,39 @@
|
||||
from .base import BaseLanguageAgent
|
||||
from ..normalizers import normalize_ru_text
|
||||
from ..system_strings import build_contextual_system_vocabulary, build_system_vocabulary
|
||||
|
||||
|
||||
class RussianAgent(BaseLanguageAgent):
|
||||
locale = "ru"
|
||||
tone = "professional and confident"
|
||||
preferred_formality = "neutral polite"
|
||||
vocabulary_map = {
|
||||
**build_system_vocabulary(
|
||||
"ru",
|
||||
(
|
||||
"customization_integrations",
|
||||
"detailed_page_structure",
|
||||
"without_commitment",
|
||||
),
|
||||
),
|
||||
}
|
||||
_system_contextual = build_contextual_system_vocabulary("ru", ("plan_badge", "transparent_label"))
|
||||
contextual_vocabulary_map = {
|
||||
"badge": {**_system_contextual.get("badge", {})},
|
||||
"label": {**_system_contextual.get("label", {})},
|
||||
"metric": {**_system_contextual.get("metric", {})},
|
||||
"stat": {**_system_contextual.get("stat", {})},
|
||||
"rendered": {**_system_contextual.get("rendered", {})},
|
||||
}
|
||||
cta_defaults = {
|
||||
"starter": "Запланировать стартовую консультацию",
|
||||
"business": "Обсудить бизнес-проект",
|
||||
"support": "Посмотреть поддержку",
|
||||
"service": "Посмотреть услуги",
|
||||
"project": "Запустить свой проект",
|
||||
"contact": "Отправить запрос",
|
||||
"quote": "Получить предложение",
|
||||
}
|
||||
|
||||
def post_cleanup_text(self, text: str, field_path: str = "") -> str:
|
||||
return normalize_ru_text(text, field_path=field_path)
|
||||
16
mandelblog_content_guard/ai.py
Normal file
16
mandelblog_content_guard/ai.py
Normal file
@@ -0,0 +1,16 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from .agents import get_language_agent
|
||||
from .validators.multilingual import validate_ai_text_or_raise
|
||||
|
||||
|
||||
def guard_ai_output(locale_code: str, field_path: str, value: str) -> str:
|
||||
validate_ai_text_or_raise(locale_code, field_path, value)
|
||||
return value
|
||||
|
||||
|
||||
def rewrite_ai_output(locale_code: str, field_path: str, value: str) -> str:
|
||||
agent = get_language_agent(locale_code)
|
||||
rewritten = agent.rewrite(value, field_path=field_path)
|
||||
validate_ai_text_or_raise(locale_code, field_path, rewritten)
|
||||
return rewritten
|
||||
10
mandelblog_content_guard/apps.py
Normal file
10
mandelblog_content_guard/apps.py
Normal file
@@ -0,0 +1,10 @@
|
||||
from django.apps import AppConfig
|
||||
|
||||
|
||||
class MandelblogContentGuardConfig(AppConfig):
|
||||
default_auto_field = "django.db.models.BigAutoField"
|
||||
name = "mandelblog_content_guard"
|
||||
verbose_name = "MandelBlog Content Guard"
|
||||
|
||||
def ready(self):
|
||||
from . import signals # noqa: F401
|
||||
3
mandelblog_content_guard/extractors/__init__.py
Normal file
3
mandelblog_content_guard/extractors/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .visible_text import VisibleTextExtractor, extract_visible_rendered_text, normalize_text
|
||||
|
||||
__all__ = ["VisibleTextExtractor", "extract_visible_rendered_text", "normalize_text"]
|
||||
85
mandelblog_content_guard/extractors/visible_text.py
Normal file
85
mandelblog_content_guard/extractors/visible_text.py
Normal file
@@ -0,0 +1,85 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import html
|
||||
import re
|
||||
from html.parser import HTMLParser
|
||||
|
||||
VISIBLE_TEXT_TAGS = {"h1", "h2", "h3", "h4", "h5", "h6", "p", "button", "a", "label", "li"}
|
||||
IGNORED_TAGS = {"script", "style", "noscript", "template"}
|
||||
|
||||
|
||||
def html_unescape(value: str) -> str:
|
||||
return html.unescape(value)
|
||||
|
||||
|
||||
def normalize_text(value: str) -> str:
|
||||
return re.sub(r"\s+", " ", html_unescape(value)).strip()
|
||||
|
||||
|
||||
class VisibleTextExtractor(HTMLParser):
|
||||
def __init__(self) -> None:
|
||||
super().__init__(convert_charrefs=True)
|
||||
self.ignored_depth = 0
|
||||
self.hidden_stack: list[bool] = []
|
||||
self.visible_tag_stack: list[str] = []
|
||||
self.current_chunks: list[str] = []
|
||||
self.lines: list[str] = []
|
||||
|
||||
def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
|
||||
lowered = tag.lower()
|
||||
attrs_dict = {key.lower(): (value or "") for key, value in attrs}
|
||||
if lowered in IGNORED_TAGS:
|
||||
self.ignored_depth += 1
|
||||
return
|
||||
self.hidden_stack.append(self._is_hidden(attrs_dict))
|
||||
if lowered in VISIBLE_TEXT_TAGS and not self.ignored_depth and not any(self.hidden_stack):
|
||||
self.visible_tag_stack.append(lowered)
|
||||
|
||||
def handle_endtag(self, tag: str) -> None:
|
||||
lowered = tag.lower()
|
||||
if lowered in IGNORED_TAGS and self.ignored_depth:
|
||||
self.ignored_depth -= 1
|
||||
return
|
||||
if lowered in VISIBLE_TEXT_TAGS and self.visible_tag_stack:
|
||||
self.visible_tag_stack.pop()
|
||||
self._flush_line()
|
||||
if self.hidden_stack:
|
||||
self.hidden_stack.pop()
|
||||
|
||||
def handle_data(self, data: str) -> None:
|
||||
if self.ignored_depth or any(self.hidden_stack) or not self.visible_tag_stack:
|
||||
return
|
||||
normalized = normalize_text(data)
|
||||
if normalized:
|
||||
self.current_chunks.append(normalized)
|
||||
|
||||
def handle_comment(self, data: str) -> None:
|
||||
return
|
||||
|
||||
def close(self) -> None:
|
||||
super().close()
|
||||
self._flush_line()
|
||||
|
||||
def _flush_line(self) -> None:
|
||||
if not self.current_chunks:
|
||||
return
|
||||
line = normalize_text(" ".join(self.current_chunks))
|
||||
if line:
|
||||
self.lines.append(line)
|
||||
self.current_chunks = []
|
||||
|
||||
@staticmethod
|
||||
def _is_hidden(attrs: dict[str, str]) -> bool:
|
||||
if "hidden" in attrs:
|
||||
return True
|
||||
if attrs.get("aria-hidden", "").lower() == "true":
|
||||
return True
|
||||
style = attrs.get("style", "").replace(" ", "").lower()
|
||||
return "display:none" in style or "visibility:hidden" in style
|
||||
|
||||
|
||||
def extract_visible_rendered_text(body: str) -> str:
|
||||
parser = VisibleTextExtractor()
|
||||
parser.feed(body)
|
||||
parser.close()
|
||||
return "\n".join(parser.lines)
|
||||
95
mandelblog_content_guard/hooks.py
Normal file
95
mandelblog_content_guard/hooks.py
Normal file
@@ -0,0 +1,95 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from django.contrib import messages
|
||||
from django.http import HttpResponseRedirect
|
||||
from wagtail import hooks
|
||||
|
||||
from .types import format_issue, split_issues
|
||||
from .validators.multilingual import validate_page, validate_posted_snippet, validate_snippet_instance
|
||||
|
||||
|
||||
def _flash_issues(request, level, prefix: str, issues):
|
||||
preview = issues[:6]
|
||||
for issue in preview:
|
||||
messages.add_message(request, level, f"{prefix}: {format_issue(issue)}")
|
||||
remaining = len(issues) - len(preview)
|
||||
if remaining > 0:
|
||||
messages.add_message(request, level, f"{prefix}: {remaining} more issue(s) not shown.")
|
||||
|
||||
|
||||
@hooks.register("before_publish_page")
|
||||
def prevent_corrupt_multilingual_publish(request, page):
|
||||
issues = validate_page(page)
|
||||
blocking, warnings = split_issues(issues)
|
||||
if warnings:
|
||||
_flash_issues(request, messages.WARNING, "Content guard warning", warnings)
|
||||
if not blocking:
|
||||
return None
|
||||
_flash_issues(request, messages.ERROR, "Publishing blocked", blocking)
|
||||
return HttpResponseRedirect(request.path)
|
||||
|
||||
|
||||
@hooks.register("after_edit_page")
|
||||
def warn_on_corrupt_multilingual_draft(request, page):
|
||||
blocking, warnings = split_issues(validate_page(page))
|
||||
if blocking:
|
||||
_flash_issues(request, messages.WARNING, "Draft warning", blocking)
|
||||
if warnings:
|
||||
_flash_issues(request, messages.WARNING, "Draft warning", warnings)
|
||||
|
||||
|
||||
def _snippet_locale_code(instance, request) -> str:
|
||||
posted_locale = request.POST.get("locale") if request.method == "POST" else None
|
||||
if posted_locale:
|
||||
return posted_locale
|
||||
locale = getattr(instance, "locale", None)
|
||||
if locale is not None and getattr(locale, "language_code", None):
|
||||
return locale.language_code
|
||||
return "nl"
|
||||
|
||||
|
||||
def _validate_snippet_request(request, instance):
|
||||
if request.method != "POST":
|
||||
return None
|
||||
issues = validate_posted_snippet(_snippet_locale_code(instance, request), request.POST.dict())
|
||||
blocking, warnings = split_issues(issues)
|
||||
if warnings:
|
||||
_flash_issues(request, messages.WARNING, "Snippet warning", warnings)
|
||||
if not blocking:
|
||||
return None
|
||||
_flash_issues(request, messages.ERROR, "Snippet save blocked", blocking)
|
||||
return HttpResponseRedirect(request.path)
|
||||
|
||||
|
||||
@hooks.register("before_create_snippet")
|
||||
def prevent_corrupt_snippet_create(request, model):
|
||||
instance = model()
|
||||
posted_locale = request.GET.get("locale") or request.POST.get("locale")
|
||||
if posted_locale and hasattr(instance, "locale_id"):
|
||||
from wagtail.models import Locale
|
||||
|
||||
instance.locale = Locale.objects.get(language_code=posted_locale)
|
||||
return _validate_snippet_request(request, instance)
|
||||
|
||||
|
||||
@hooks.register("before_edit_snippet")
|
||||
def prevent_corrupt_snippet_edit(request, instance):
|
||||
return _validate_snippet_request(request, instance)
|
||||
|
||||
|
||||
def _warn_saved_snippet(request, instance):
|
||||
blocking, warnings = split_issues(validate_snippet_instance(instance))
|
||||
if blocking:
|
||||
_flash_issues(request, messages.WARNING, "Snippet integrity warning", blocking)
|
||||
if warnings:
|
||||
_flash_issues(request, messages.WARNING, "Snippet integrity warning", warnings)
|
||||
|
||||
|
||||
@hooks.register("after_create_snippet")
|
||||
def warn_on_saved_snippet_create(request, instance):
|
||||
_warn_saved_snippet(request, instance)
|
||||
|
||||
|
||||
@hooks.register("after_edit_snippet")
|
||||
def warn_on_saved_snippet_edit(request, instance):
|
||||
_warn_saved_snippet(request, instance)
|
||||
0
mandelblog_content_guard/management/__init__.py
Normal file
0
mandelblog_content_guard/management/__init__.py
Normal file
163
mandelblog_content_guard/management/commands/audit_locales.py
Normal file
163
mandelblog_content_guard/management/commands/audit_locales.py
Normal file
@@ -0,0 +1,163 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from collections import defaultdict
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from ...settings import audit_default_locales
|
||||
from ...validators.multilingual import audit_locales
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Audit all public locale pages for multilingual integrity issues."
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
"--locale",
|
||||
action="append",
|
||||
dest="locales",
|
||||
help="Limit the audit to one or more locale codes. Repeat the flag for multiple locales.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--url",
|
||||
action="append",
|
||||
dest="urls",
|
||||
help="Limit the audit to one or more public page URLs. Repeat the flag for multiple URLs.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--fix",
|
||||
action="store_true",
|
||||
help="Apply known safe replacements and republish changed content.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--rewrite",
|
||||
action="store_true",
|
||||
help="Rewrite flagged content through the locale agent system.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="Preview rewrite changes without saving content.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--format",
|
||||
choices=["text", "json"],
|
||||
default="text",
|
||||
help="Output format.",
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
locale_codes = options["locales"] or audit_default_locales()
|
||||
run = audit_locales(
|
||||
locale_codes,
|
||||
fix=options["fix"],
|
||||
rewrite=options["rewrite"],
|
||||
dry_run=options["dry_run"],
|
||||
url_filters=options["urls"],
|
||||
)
|
||||
grouped = defaultdict(list)
|
||||
for issue in run.issues.all().order_by("locale_code", "url", "field_path"):
|
||||
grouped[issue.locale_code].append(issue)
|
||||
|
||||
grouped_compact = defaultdict(list)
|
||||
for locale_code, issues in grouped.items():
|
||||
bucket = {}
|
||||
for issue in issues:
|
||||
key = (
|
||||
issue.url,
|
||||
issue.issue_type,
|
||||
issue.bad_value,
|
||||
issue.replacement,
|
||||
)
|
||||
extra = issue.extra or {}
|
||||
if key not in bucket:
|
||||
bucket[key] = {
|
||||
"url": issue.url,
|
||||
"title": issue.title,
|
||||
"severity": issue.severity,
|
||||
"issue_type": issue.issue_type,
|
||||
"field_paths": set([issue.field_path] if issue.field_path else []),
|
||||
"bad_value": issue.bad_value,
|
||||
"replacement": issue.replacement,
|
||||
"fixed": issue.fixed,
|
||||
"sources": set([extra.get("source")] if extra.get("source") else []),
|
||||
"count": extra.get("count", 1),
|
||||
}
|
||||
else:
|
||||
if issue.field_path:
|
||||
bucket[key]["field_paths"].add(issue.field_path)
|
||||
if extra.get("source"):
|
||||
bucket[key]["sources"].add(extra["source"])
|
||||
bucket[key]["count"] += extra.get("count", 1)
|
||||
grouped_compact[locale_code] = [
|
||||
{
|
||||
**entry,
|
||||
"field_paths": sorted(entry["field_paths"]),
|
||||
"sources": sorted(entry["sources"]),
|
||||
}
|
||||
for entry in bucket.values()
|
||||
]
|
||||
|
||||
if options["format"] == "json":
|
||||
payload = {
|
||||
"run_id": run.pk,
|
||||
"total_urls_checked": run.total_urls_checked,
|
||||
"issues_found": run.issues_found,
|
||||
"summary": run.summary,
|
||||
"issues": {
|
||||
locale_code: grouped_compact.get(locale_code, [])
|
||||
for locale_code in locale_codes
|
||||
},
|
||||
}
|
||||
self.stdout.write(json.dumps(payload, indent=2, ensure_ascii=False))
|
||||
return
|
||||
|
||||
for locale_code in locale_codes:
|
||||
locale_summary = run.summary.get(locale_code, {})
|
||||
self.stdout.write(f"Locale: {locale_code}")
|
||||
self.stdout.write(
|
||||
f"URLs checked: {locale_summary.get('total_urls_checked', 0)}"
|
||||
)
|
||||
self.stdout.write(
|
||||
f"Issues found: {locale_summary.get('issues_found', 0)}"
|
||||
)
|
||||
self.stdout.write(
|
||||
f"Severity: {locale_summary.get('by_severity', {})}"
|
||||
)
|
||||
if options["fix"]:
|
||||
self.stdout.write(
|
||||
f"Issues auto-fixed: {locale_summary.get('issues_fixed', 0)}"
|
||||
)
|
||||
if options["rewrite"]:
|
||||
self.stdout.write(
|
||||
f"Rewrite mode: {'dry-run' if options['dry_run'] else 'apply'}"
|
||||
)
|
||||
for issue in grouped_compact.get(locale_code, []):
|
||||
target = issue["url"] or issue["title"] or "object"
|
||||
self.stdout.write(
|
||||
f"- {target} -> {issue['issue_type']}: {issue['bad_value']}"
|
||||
)
|
||||
if issue.get("replacement"):
|
||||
self.stdout.write(f" after: {issue['replacement']}")
|
||||
if issue.get("field_paths"):
|
||||
self.stdout.write(f" fields: {', '.join(issue['field_paths'][:5])}")
|
||||
if issue.get("sources"):
|
||||
self.stdout.write(f" sources: {', '.join(issue['sources'])}")
|
||||
if issue.get("count"):
|
||||
self.stdout.write(f" count: {issue['count']}")
|
||||
if not grouped_compact.get(locale_code):
|
||||
self.stdout.write("- no issues found")
|
||||
self.stdout.write("")
|
||||
|
||||
snippet_summary = run.summary.get("snippets") or {}
|
||||
if snippet_summary:
|
||||
self.stdout.write("Snippet issues:")
|
||||
for model_name, count in snippet_summary.items():
|
||||
self.stdout.write(f"- {model_name}: {count}")
|
||||
|
||||
self.stdout.write(
|
||||
self.style.SUCCESS(
|
||||
f"Audit run {run.pk} completed. Total URLs checked: {run.total_urls_checked}. Issues found: {run.issues_found}."
|
||||
)
|
||||
)
|
||||
19
mandelblog_content_guard/mixins.py
Normal file
19
mandelblog_content_guard/mixins.py
Normal file
@@ -0,0 +1,19 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from django.core.exceptions import ValidationError
|
||||
|
||||
|
||||
class MultilingualValidationMixin:
|
||||
"""Opt-in mixin for project models that want explicit clean()-time enforcement."""
|
||||
|
||||
def clean(self):
|
||||
from .types import format_issue
|
||||
from .validators.multilingual import validate_snippet_instance
|
||||
|
||||
super_clean = getattr(super(), "clean", None)
|
||||
if callable(super_clean):
|
||||
super_clean()
|
||||
issues = validate_snippet_instance(self)
|
||||
blocking = [issue for issue in issues if issue.blocks]
|
||||
if blocking:
|
||||
raise ValidationError({"content_guard": [format_issue(issue) for issue in blocking]})
|
||||
15
mandelblog_content_guard/normalizers/__init__.py
Normal file
15
mandelblog_content_guard/normalizers/__init__.py
Normal file
@@ -0,0 +1,15 @@
|
||||
from .de import normalize_de_text
|
||||
from .en import normalize_en_text
|
||||
from .es import normalize_es_text
|
||||
from .it import normalize_it_text
|
||||
from .nl import normalize_nl_text
|
||||
from .ru import normalize_ru_text
|
||||
|
||||
__all__ = [
|
||||
"normalize_de_text",
|
||||
"normalize_en_text",
|
||||
"normalize_es_text",
|
||||
"normalize_it_text",
|
||||
"normalize_nl_text",
|
||||
"normalize_ru_text",
|
||||
]
|
||||
58
mandelblog_content_guard/normalizers/de.py
Normal file
58
mandelblog_content_guard/normalizers/de.py
Normal file
@@ -0,0 +1,58 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
|
||||
DE_LINE_REPLACEMENTS = {
|
||||
"Häufig gestellte Fragen Transparent über Planung, Vorgehensweise und Management.": "Häufig gestellte Fragen Klarheit über Planung, Vorgehensweise und Management.",
|
||||
"Einführungsmeeting planen Projekte anzeigen Unverbindliches Gespräch, klares Angebot Wir entwickeln schnelle Websites und Webshops, die Ihr Team selbst pflegen kann.": "Erstgespräch planen · Projekte ansehen · Unverbindliches Gespräch mit klarem Angebot. Wir entwickeln schnelle Websites und Webshops, die Ihr Team selbst pflegen kann.",
|
||||
"Einführungsmeeting planen Dienstleistungen anzeigen Verbindlich und klar Wir entwickeln schnelle Websites und Webshops, die Ihr Team selbst pflegen kann.": "Erstgespräch planen · Dienstleistungen anzeigen · Unverbindliches Gespräch mit klarem Angebot. Wir entwickeln schnelle Websites und Webshops, die Ihr Team selbst pflegen kann.",
|
||||
"Steuern 0,00 € Korb ansehen Kasse Kontakt KONTAKT Lass uns dein Projekt konkret machen Einführungsmeeting planen Dienstleistungen anzeigen So können Sie Kontakt aufnehmen Wählen Sie die Route, die zu Ihrer Frage passt.": "Steuern 0,00 € Korb ansehen Kasse Kontakt KONTAKT Lassen Sie uns Ihr Projekt konkret machen Erstgespräch planen Dienstleistungen anzeigen So können Sie Kontakt aufnehmen Wählen Sie den Weg, der zu Ihrer Frage passt.",
|
||||
"Steuern 0,00 € Korb ansehen Kasse Starter Website PLAN Starter Website Plan Starter-Gespräch planen Alle Dienstleistungen anzeigen Was du bekommst Startseite + Kernseiten Professionelle Basis, die sofort Vertrauen schafft.": "Steuern 0,00 € Korb ansehen Kasse Starter-Website PLAN Starter-Website Starter-Gespräch planen Alle Dienstleistungen anzeigen Was Sie erhalten Startseite + Kernseiten Professionelle Basis, die sofort Vertrauen schafft.",
|
||||
"Steuern 0,00 € Korb ansehen Kasse Business Website PLAN Business Website Plan Beratungsgespräch planen Alle Dienstleistungen anzeigen Was du bekommst Detailliertes Seitenlayout Mehr Platz für Dienstleistungen, Fälle und Lead-Flows.": "Steuern 0,00 € Korb ansehen Kasse Business-Website PLAN Business-Website Beratungsgespräch planen Alle Dienstleistungen anzeigen Was Sie erhalten Detailliertes Seitenlayout Mehr Platz für Dienstleistungen, Referenzen und Lead-Flows.",
|
||||
}
|
||||
|
||||
DE_PHRASE_REPLACEMENTS = {
|
||||
"New": "Neu",
|
||||
"Einführungsmeeting": "Erstgespräch",
|
||||
"Intakegespräch": "Erstgespräch",
|
||||
"SEO-ready basis": "SEO-optimierte Basis",
|
||||
"Sales-ready mit skalierbarem Stack": "Verkaufsbereit mit skalierbarer Architektur",
|
||||
"Continuous Verbesserung": "Kontinuierliche Verbesserung",
|
||||
"Was du bekommst": "Was Sie erhalten",
|
||||
"Starter Website": "Starter-Website",
|
||||
"Business Website": "Business-Website",
|
||||
"Support & Wachstum": "Support & Wachstum",
|
||||
"Lass uns dein Projekt konkret machen": "Lassen Sie uns Ihr Projekt konkret machen",
|
||||
"Wählen Sie die Route, die zu Ihrer Frage passt.": "Wählen Sie den Weg, der zu Ihrer Frage passt.",
|
||||
"Verbindlich und klar": "Unverbindliches Gespräch mit klarem Angebot",
|
||||
"Unverbindliches Gespräch, klares Angebot": "Unverbindliches Gespräch mit klarem Angebot",
|
||||
}
|
||||
|
||||
|
||||
def _apply_boundary_replacements(text: str, replacements: dict[str, str]) -> str:
|
||||
cleaned = text
|
||||
phrase_replacements = {}
|
||||
token_replacements = {}
|
||||
for source, target in replacements.items():
|
||||
if re.fullmatch(r"[\wÀ-ÿ-]+", source, flags=re.UNICODE):
|
||||
token_replacements[source] = target
|
||||
else:
|
||||
phrase_replacements[source] = target
|
||||
|
||||
for source, target in sorted(phrase_replacements.items(), key=lambda item: len(item[0]), reverse=True):
|
||||
cleaned = cleaned.replace(source, target)
|
||||
|
||||
for source, target in sorted(token_replacements.items(), key=lambda item: len(item[0]), reverse=True):
|
||||
pattern = re.compile(rf"(?<![\wÀ-ÿ-]){re.escape(source)}(?![\wÀ-ÿ-])", re.UNICODE)
|
||||
cleaned = pattern.sub(target, cleaned)
|
||||
return cleaned
|
||||
|
||||
|
||||
def normalize_de_text(text: str, field_path: str = "") -> str:
|
||||
cleaned = text
|
||||
for source, target in DE_LINE_REPLACEMENTS.items():
|
||||
if cleaned == source:
|
||||
return target
|
||||
cleaned = _apply_boundary_replacements(cleaned, DE_PHRASE_REPLACEMENTS)
|
||||
return cleaned
|
||||
28
mandelblog_content_guard/normalizers/en.py
Normal file
28
mandelblog_content_guard/normalizers/en.py
Normal file
@@ -0,0 +1,28 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
|
||||
EN_LINE_REPLACEMENTS = {
|
||||
"Service packages (from) Transparent starting points.": "Service packages (from) Clear starting points.",
|
||||
"Frequently Asked Questions Transparent about planning, approach, and management.": "Frequently Asked Questions Clear guidance on planning, approach, and management.",
|
||||
"After your intake Clear scope and steps Clear planning Transparent investment Name * E-mail * Company * Project details Book business call Ready to start with Business Website?": "After your intake Clear scope and steps Clear planning Transparent pricing Name * E-mail * Company * Project details Book business call Ready to start with Business Website?",
|
||||
"After your intake Clear scope and steps Clear planning Transparent investment Name * E-mail * Company * Project details Book starter call Ready to start with Starter Website?": "After your intake Clear scope and steps Clear planning Transparent pricing Name * E-mail * Company * Project details Book starter call Ready to start with Starter Website?",
|
||||
"After your intake Clear scope and steps Clear planning Transparent investment Name * E-mail * Company * Project details Request support plan Ready to start with Support & Growth?": "After your intake Clear scope and steps Clear planning Transparent pricing Name * E-mail * Company * Project details Request support plan Ready to start with Support & Growth?",
|
||||
"After your intake Clear scope and steps Clear planning Transparent investment Name * E-mail * Company * Project details Start webshop project Ready to start with Webshop?": "After your intake Clear scope and steps Clear planning Transparent pricing Name * E-mail * Company * Project details Start webshop project Ready to start with Webshop?",
|
||||
}
|
||||
|
||||
EN_PHRASE_REPLACEMENTS = {
|
||||
"Transparent investment": "Transparent pricing",
|
||||
"Transparent about planning, approach, and management.": "Clear guidance on planning, approach, and management.",
|
||||
"Transparent starting points.": "Clear starting points.",
|
||||
}
|
||||
|
||||
|
||||
def normalize_en_text(text: str, field_path: str = "") -> str:
|
||||
if text in EN_LINE_REPLACEMENTS:
|
||||
return EN_LINE_REPLACEMENTS[text]
|
||||
cleaned = text
|
||||
for source, target in sorted(EN_PHRASE_REPLACEMENTS.items(), key=lambda item: len(item[0]), reverse=True):
|
||||
cleaned = cleaned.replace(source, target)
|
||||
return re.sub(r"\s+", " ", cleaned).strip()
|
||||
31
mandelblog_content_guard/normalizers/es.py
Normal file
31
mandelblog_content_guard/normalizers/es.py
Normal file
@@ -0,0 +1,31 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
|
||||
ES_LINE_REPLACEMENTS = {
|
||||
"Transparente sobre la planificación, el proceso y la gestión.": "Transparencia sobre la planificación, el proceso y la gestión.",
|
||||
"<p>Transparente sobre la planificación, el proceso y la gestión.</p>": "<p>Transparencia sobre la planificación, el proceso y la gestión.</p>",
|
||||
"Preguntas frecuentes Transparente sobre la planificación, el proceso y la gestión.": "Preguntas frecuentes Transparencia sobre la planificación, el proceso y la gestión.",
|
||||
"Preguntas frecuentes Transparenteee sobre la planificación, el proceso y la gestión.": "Preguntas frecuentes Transparencia sobre la planificación, el proceso y la gestión.",
|
||||
"Planificar la reunión inicial Mostrar los proyectos Unverbindliches Gespräch, klares Angebot Construimos sitios web y tiendas online rápidas que tu equipo puede gestionar sin complicaciones.": "Planificar la reunión inicial · Mostrar los proyectos · Conversación sin compromiso con propuesta clara. Construimos sitios web y tiendas online rápidas que tu equipo puede gestionar sin complicaciones.",
|
||||
}
|
||||
|
||||
ES_PHRASE_REPLACEMENTS = {
|
||||
"Transparenteee": "Transparente",
|
||||
"Transparent": "Transparente",
|
||||
"Unverbindliches Gespräch, klares Angebot": "Conversación sin compromiso con propuesta clara",
|
||||
}
|
||||
|
||||
|
||||
def normalize_es_text(text: str, field_path: str = "") -> str:
|
||||
if text in ES_LINE_REPLACEMENTS:
|
||||
return ES_LINE_REPLACEMENTS[text]
|
||||
cleaned = text
|
||||
for source, target in sorted(ES_PHRASE_REPLACEMENTS.items(), key=lambda item: len(item[0]), reverse=True):
|
||||
if re.fullmatch(r"[\wÀ-ÿ-]+", source, flags=re.UNICODE):
|
||||
pattern = re.compile(rf"(?<![\wÀ-ÿ-]){re.escape(source)}(?![\wÀ-ÿ-])", re.UNICODE)
|
||||
cleaned = pattern.sub(target, cleaned)
|
||||
else:
|
||||
cleaned = cleaned.replace(source, target)
|
||||
return re.sub(r"\s+", " ", cleaned).strip()
|
||||
24
mandelblog_content_guard/normalizers/it.py
Normal file
24
mandelblog_content_guard/normalizers/it.py
Normal file
@@ -0,0 +1,24 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
|
||||
IT_LINE_REPLACEMENTS = {
|
||||
"Richiedi un piano di supporto Mostra i progetti Unverbindliches Gespräch, klares Angebot Realizziamo siti web e negozi online veloci che il tuo team può gestire in autonomia.": "Richiedi un piano di supporto · Mostra i progetti · Colloquio senza impegno con proposta chiara. Realizziamo siti web e negozi online veloci che il tuo team può gestire in autonomia.",
|
||||
"Dopo il colloquio iniziale Obiettivi chiari e tappe Planificación clara Transparente Investition Nome * Email * Azienda * Dettagli del progetto Richiedi un piano di supporto Pronto a iniziare con supporto e crescita?": "Dopo il colloquio iniziale Obiettivi chiari e tappe Pianificazione chiara Investimento trasparente Nome * Email * Azienda * Dettagli del progetto Richiedi un piano di supporto Pronto a iniziare con supporto e crescita?",
|
||||
"Mehrsprachiger Rollout-Plan Anpassung & Integrationen Integrazioni API, flussi di lavoro specifici e blocchi personalizzati adattati alla sua azienda.": "Piano di lancio multilingue Personalizzazioni e integrazioni Integrazioni API, flussi di lavoro specifici e blocchi personalizzati adattati alla sua azienda.",
|
||||
}
|
||||
|
||||
IT_PHRASE_REPLACEMENTS = {
|
||||
"Planificación clara": "Pianificazione chiara",
|
||||
"Unverbindliches Gespräch, klares Angebot": "Colloquio senza impegno con proposta chiara",
|
||||
}
|
||||
|
||||
|
||||
def normalize_it_text(text: str, field_path: str = "") -> str:
|
||||
if text in IT_LINE_REPLACEMENTS:
|
||||
return IT_LINE_REPLACEMENTS[text]
|
||||
cleaned = text
|
||||
for source, target in sorted(IT_PHRASE_REPLACEMENTS.items(), key=lambda item: len(item[0]), reverse=True):
|
||||
cleaned = cleaned.replace(source, target)
|
||||
return re.sub(r"\s+", " ", cleaned).strip()
|
||||
15
mandelblog_content_guard/normalizers/nl.py
Normal file
15
mandelblog_content_guard/normalizers/nl.py
Normal file
@@ -0,0 +1,15 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
|
||||
NL_PHRASE_REPLACEMENTS = {
|
||||
"PLAN": "PLAN",
|
||||
}
|
||||
|
||||
|
||||
def normalize_nl_text(text: str, field_path: str = "") -> str:
|
||||
cleaned = text
|
||||
for source, target in NL_PHRASE_REPLACEMENTS.items():
|
||||
cleaned = cleaned.replace(source, target)
|
||||
return re.sub(r"\s+", " ", cleaned).strip()
|
||||
24
mandelblog_content_guard/normalizers/ru.py
Normal file
24
mandelblog_content_guard/normalizers/ru.py
Normal file
@@ -0,0 +1,24 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
|
||||
RU_LINE_REPLACEMENTS = {
|
||||
"План многоязычного запуска Anpassung & Integrationen Интеграции API, специфические рабочие процессы и индивидуальные блоки, адаптированные под вашу компанию.": "План многоязычного запуска Настройка и интеграции Интеграции API, специфические рабочие процессы и индивидуальные блоки, адаптированные под вашу компанию.",
|
||||
"Запланировать звонок по бизнес-сайту Detailliertes Seitenlayout Разделы, ориентированные на конверсию Base prête pour le SEO Boutique en ligne Для проектов с товарами, оплатой и дальнейшим развитием e-commerce.": "Запланировать звонок по бизнес-сайту Детальная структура страниц Разделы, ориентированные на конверсию Основа, готовая для SEO Интернет-магазин Для проектов с товарами, оплатой и дальнейшим развитием e-commerce.",
|
||||
"Связаться с нами Посмотреть проекты Unverbindliches Gespräch, klares Angebot Мы создаём быстрые сайты и интернет-магазины, которыми ваша команда может управлять самостоятельно.": "Связаться с нами · Посмотреть проекты · Без обязательств, понятное предложение. Мы создаём быстрые сайты и интернет-магазины, которыми ваша команда может управлять самостоятельно.",
|
||||
}
|
||||
|
||||
RU_PHRASE_REPLACEMENTS = {
|
||||
"Base prête pour le SEO": "Основа, готовая для SEO",
|
||||
"Unverbindliches Gespräch, klares Angebot": "Без обязательств, понятное предложение",
|
||||
}
|
||||
|
||||
|
||||
def normalize_ru_text(text: str, field_path: str = "") -> str:
|
||||
if text in RU_LINE_REPLACEMENTS:
|
||||
return RU_LINE_REPLACEMENTS[text]
|
||||
cleaned = text
|
||||
for source, target in sorted(RU_PHRASE_REPLACEMENTS.items(), key=lambda item: len(item[0]), reverse=True):
|
||||
cleaned = cleaned.replace(source, target)
|
||||
return re.sub(r"\s+", " ", cleaned).strip()
|
||||
79
mandelblog_content_guard/settings.py
Normal file
79
mandelblog_content_guard/settings.py
Normal file
@@ -0,0 +1,79 @@
|
||||
from __future__ import annotations
|
||||
|
||||
"""
|
||||
Reusable configuration helpers for mandelblog_content_guard.
|
||||
|
||||
Supported Django settings:
|
||||
- CONTENT_GUARD_STRICT: bool
|
||||
- CONTENT_GUARD_BLOCK_MEDIUM: bool
|
||||
- CONTENT_GUARD_LOCALES: list[str]
|
||||
- CONTENT_GUARD_REWRITE_ENABLED: bool
|
||||
- CONTENT_GUARD_REWRITE_BACKEND: dotted path | None
|
||||
"""
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
DEFAULT_LOCALES = ["nl", "en", "de", "fr", "es", "it", "pt", "ru"]
|
||||
|
||||
SEVERITY = {
|
||||
"CRITICAL": "block",
|
||||
"HIGH": "block",
|
||||
"MEDIUM": "warn",
|
||||
"LOW": "log",
|
||||
}
|
||||
|
||||
ISSUE_LEVELS = {
|
||||
"known_bad_pattern": "CRITICAL",
|
||||
"wrong_language_fragment": "CRITICAL",
|
||||
"rendered_bad_pattern": "CRITICAL",
|
||||
"rendered_wrong_language": "CRITICAL",
|
||||
"render_status": "CRITICAL",
|
||||
"language_heuristic": "CRITICAL",
|
||||
"cta_language_mismatch": "HIGH",
|
||||
"form_language_mismatch": "HIGH",
|
||||
"empty_form_copy": "HIGH",
|
||||
"placeholder_value": "HIGH",
|
||||
"rewrite_candidate": "MEDIUM",
|
||||
"weak_marketing_copy": "MEDIUM",
|
||||
"foreign_ui_label": "MEDIUM",
|
||||
"generic_badge_label": "MEDIUM",
|
||||
"mixed_locale_heading": "MEDIUM",
|
||||
"cta_tone_check": "MEDIUM",
|
||||
}
|
||||
|
||||
|
||||
def strict_mode_enabled() -> bool:
|
||||
return getattr(settings, "CONTENT_GUARD_STRICT", True)
|
||||
|
||||
|
||||
def block_medium_enabled() -> bool:
|
||||
return getattr(settings, "CONTENT_GUARD_BLOCK_MEDIUM", False)
|
||||
|
||||
|
||||
def audit_default_locales() -> list[str]:
|
||||
return list(getattr(settings, "CONTENT_GUARD_LOCALES", DEFAULT_LOCALES))
|
||||
|
||||
|
||||
def rewrite_enabled() -> bool:
|
||||
return getattr(settings, "CONTENT_GUARD_REWRITE_ENABLED", True)
|
||||
|
||||
|
||||
def get_rewrite_backend() -> str | None:
|
||||
return getattr(settings, "CONTENT_GUARD_REWRITE_BACKEND", None)
|
||||
|
||||
|
||||
def classify_issue(issue_type: str) -> str:
|
||||
return ISSUE_LEVELS.get(issue_type, "LOW")
|
||||
|
||||
|
||||
def severity_for_issue(issue_type: str) -> str:
|
||||
return SEVERITY[classify_issue(issue_type)]
|
||||
|
||||
|
||||
def should_block_issue(issue_type: str) -> bool:
|
||||
level = classify_issue(issue_type)
|
||||
if level in {"CRITICAL", "HIGH"}:
|
||||
return True
|
||||
if level == "MEDIUM":
|
||||
return block_medium_enabled() and strict_mode_enabled()
|
||||
return False
|
||||
26
mandelblog_content_guard/signals.py
Normal file
26
mandelblog_content_guard/signals.py
Normal file
@@ -0,0 +1,26 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from functools import lru_cache
|
||||
|
||||
from django.db.models.signals import pre_save
|
||||
from django.dispatch import receiver
|
||||
from wagtail.models import Page
|
||||
from wagtail.snippets.models import get_snippet_models
|
||||
|
||||
from .validators.multilingual import validate_instance_or_raise
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def _snippet_models():
|
||||
return tuple(get_snippet_models())
|
||||
|
||||
|
||||
def _is_snippet_instance(instance) -> bool:
|
||||
instance_model = instance.__class__
|
||||
return any(model == instance_model for model in _snippet_models())
|
||||
|
||||
|
||||
@receiver(pre_save)
|
||||
def enforce_multilingual_integrity(sender, instance, **kwargs):
|
||||
if isinstance(instance, Page) or _is_snippet_instance(instance):
|
||||
validate_instance_or_raise(instance)
|
||||
368
mandelblog_content_guard/system_strings.py
Normal file
368
mandelblog_content_guard/system_strings.py
Normal file
@@ -0,0 +1,368 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Iterable
|
||||
|
||||
SYSTEM_STRING_SPECS = {
|
||||
"plan_badge": {
|
||||
"sources": ("PLAN",),
|
||||
"issue_type": "generic_badge_label",
|
||||
"translations": {
|
||||
"en": "Package",
|
||||
"fr": "FORFAIT",
|
||||
"es": "Paquete",
|
||||
"ru": "Пакет",
|
||||
},
|
||||
"canonical_by_locale": {
|
||||
"de": ("PLAN",),
|
||||
"nl": ("PLAN",),
|
||||
"it": ("PIANO",),
|
||||
},
|
||||
"contexts": {
|
||||
"en": {
|
||||
"badge": "Package",
|
||||
"label": "Package",
|
||||
"title": "Package",
|
||||
"heading": "Package",
|
||||
"rendered": "Package",
|
||||
},
|
||||
"fr": {
|
||||
"badge": "FORFAIT",
|
||||
"label": "FORFAIT",
|
||||
"title": "FORFAIT",
|
||||
"heading": "FORFAIT",
|
||||
"rendered": "FORFAIT",
|
||||
},
|
||||
"es": {
|
||||
"badge": "Paquete",
|
||||
"label": "Paquete",
|
||||
"title": "Paquete",
|
||||
"heading": "Paquete",
|
||||
"rendered": "Paquete",
|
||||
},
|
||||
"ru": {
|
||||
"badge": "Пакет",
|
||||
"label": "Пакет",
|
||||
"title": "Пакет",
|
||||
"heading": "Пакет",
|
||||
"rendered": "Пакет",
|
||||
},
|
||||
},
|
||||
},
|
||||
"services_badge": {
|
||||
"sources": ("SERVICES",),
|
||||
"issue_type": "generic_badge_label",
|
||||
"translations": {
|
||||
"en": "Services",
|
||||
"fr": "PRESTATIONS",
|
||||
"pt": "SERVIÇOS",
|
||||
},
|
||||
"contexts": {
|
||||
"en": {
|
||||
"badge": "Services",
|
||||
"label": "Services",
|
||||
"title": "Services",
|
||||
"heading": "Services",
|
||||
"rendered": "Services",
|
||||
},
|
||||
"fr": {
|
||||
"badge": "PRESTATIONS",
|
||||
"label": "PRESTATIONS",
|
||||
"title": "PRESTATIONS",
|
||||
"heading": "PRESTATIONS",
|
||||
"rendered": "PRESTATIONS",
|
||||
},
|
||||
"pt": {
|
||||
"badge": "SERVIÇOS",
|
||||
"label": "SERVIÇOS",
|
||||
"title": "SERVIÇOS",
|
||||
"heading": "SERVIÇOS",
|
||||
"rendered": "SERVIÇOS",
|
||||
},
|
||||
},
|
||||
},
|
||||
"response_time": {
|
||||
"sources": ("Reaktionszeit",),
|
||||
"issue_type": "foreign_ui_label",
|
||||
"translations": {
|
||||
"en": "Response time",
|
||||
"fr": "Temps de réponse",
|
||||
"es": "Tiempo de respuesta",
|
||||
"it": "Tempo di risposta",
|
||||
"ru": "Время ответа",
|
||||
},
|
||||
},
|
||||
"average_delivery": {
|
||||
"sources": ("Durchschnittliche Lieferung",),
|
||||
"issue_type": "foreign_ui_label",
|
||||
"translations": {
|
||||
"en": "Average delivery time",
|
||||
"fr": "Délai moyen de livraison",
|
||||
"es": "Plazo medio de entrega",
|
||||
"it": "Tempo medio di consegna",
|
||||
"ru": "Средний срок запуска",
|
||||
},
|
||||
},
|
||||
"without_commitment": {
|
||||
"sources": ("Unverbindlich",),
|
||||
"issue_type": "foreign_ui_label",
|
||||
"translations": {
|
||||
"en": "No obligation",
|
||||
"fr": "Sans engagement",
|
||||
"es": "Sin compromiso",
|
||||
"it": "Senza impegno",
|
||||
"pt": "Sem compromisso",
|
||||
"ru": "Без обязательств",
|
||||
},
|
||||
},
|
||||
"transparent_label": {
|
||||
"sources": ("Transparent",),
|
||||
"issue_type": "foreign_ui_label",
|
||||
"translations": {
|
||||
"en": "Clear",
|
||||
"fr": "Clair",
|
||||
"es": "Transparente",
|
||||
"it": "Chiaro",
|
||||
"pt": "Transparente",
|
||||
"ru": "Прозрачно",
|
||||
},
|
||||
"contexts": {
|
||||
"en": {
|
||||
"badge": "Clear",
|
||||
"label": "Clear",
|
||||
"metric": "Clear",
|
||||
"stat": "Clear",
|
||||
"rendered": "Clear",
|
||||
},
|
||||
"fr": {
|
||||
"badge": "Clair",
|
||||
"label": "Clair",
|
||||
"metric": "Clair",
|
||||
"stat": "Clair",
|
||||
"rendered": "Clair",
|
||||
},
|
||||
"es": {
|
||||
"badge": "Transparente",
|
||||
"label": "Transparente",
|
||||
"metric": "Transparente",
|
||||
"stat": "Transparente",
|
||||
"rendered": "Transparente",
|
||||
},
|
||||
"it": {
|
||||
"badge": "Chiaro",
|
||||
"label": "Chiaro",
|
||||
"metric": "Chiaro",
|
||||
"stat": "Chiaro",
|
||||
"rendered": "Chiaro",
|
||||
},
|
||||
"pt": {
|
||||
"badge": "Clara",
|
||||
"label": "Clara",
|
||||
"metric": "Investimento claro",
|
||||
"stat": "Investimento claro",
|
||||
"rendered": "Investimento claro",
|
||||
},
|
||||
"ru": {
|
||||
"badge": "Прозрачно",
|
||||
"label": "Прозрачно",
|
||||
"metric": "Прозрачно",
|
||||
"stat": "Прозрачно",
|
||||
"rendered": "Прозрачно",
|
||||
},
|
||||
},
|
||||
},
|
||||
"weeks_1_2": {
|
||||
"sources": ("1-2 Wochen",),
|
||||
"issue_type": "weak_marketing_copy",
|
||||
"translations": {
|
||||
"fr": "1 à 2 semaines",
|
||||
"es": "1-2 semanas",
|
||||
"it": "1-2 settimane",
|
||||
"pt": "1 a 2 semanas",
|
||||
},
|
||||
"contexts": {
|
||||
"fr": {
|
||||
"metric": "1 à 2 semaines",
|
||||
"stat": "1 à 2 semaines",
|
||||
},
|
||||
"es": {
|
||||
"metric": "1-2 semanas",
|
||||
"stat": "1-2 semanas",
|
||||
},
|
||||
"it": {
|
||||
"metric": "1-2 settimane",
|
||||
"stat": "1-2 settimane",
|
||||
},
|
||||
"pt": {
|
||||
"metric": "1 a 2 semanas",
|
||||
"stat": "1 a 2 semanas",
|
||||
},
|
||||
},
|
||||
},
|
||||
"weeks_2_4": {
|
||||
"sources": ("2-4 Wochen",),
|
||||
"issue_type": "foreign_ui_label",
|
||||
"translations": {
|
||||
"fr": "2 à 4 semaines",
|
||||
},
|
||||
"contexts": {
|
||||
"fr": {
|
||||
"metric": "2 à 4 semaines",
|
||||
"stat": "2 à 4 semaines",
|
||||
},
|
||||
},
|
||||
},
|
||||
"days_label": {
|
||||
"sources": ("Tages",),
|
||||
"issue_type": "weak_marketing_copy",
|
||||
"translations": {
|
||||
"fr": "jours",
|
||||
"pt": "dias",
|
||||
},
|
||||
},
|
||||
"customer_reviews": {
|
||||
"sources": ("Kundenschätzung",),
|
||||
"issue_type": "foreign_ui_label",
|
||||
"translations": {
|
||||
"en": "Customer rating",
|
||||
"fr": "Avis clients",
|
||||
"es": "Valoración de clientes",
|
||||
"it": "Valutazione clienti",
|
||||
"pt": "Avaliação dos clientes",
|
||||
"ru": "Оценка клиентов",
|
||||
},
|
||||
},
|
||||
"editable_label": {
|
||||
"sources": ("Bearbeitbar",),
|
||||
"issue_type": "foreign_ui_label",
|
||||
"translations": {
|
||||
"en": "Editable",
|
||||
"fr": "Modifiable",
|
||||
"es": "Editable",
|
||||
"it": "Modificabile",
|
||||
"pt": "Editável",
|
||||
"ru": "Редактируемо",
|
||||
},
|
||||
},
|
||||
"core_pages_label": {
|
||||
"sources": ("Startseite + Kernseiten",),
|
||||
"issue_type": "foreign_ui_label",
|
||||
"translations": {
|
||||
"pt": "Página inicial + páginas essenciais",
|
||||
},
|
||||
},
|
||||
"detailed_page_structure": {
|
||||
"sources": ("Detailliertes Seitenlayout",),
|
||||
"issue_type": "foreign_ui_label",
|
||||
"translations": {
|
||||
"fr": "Structure détaillée des pages",
|
||||
"es": "Estructura detallada de páginas",
|
||||
"it": "Struttura dettagliata delle pagine",
|
||||
"pt": "Estrutura detalhada das páginas",
|
||||
"ru": "Детальная структура страниц",
|
||||
},
|
||||
},
|
||||
"business_process_cta": {
|
||||
"sources": ("Geschäftsprozess besprechen",),
|
||||
"issue_type": "foreign_ui_label",
|
||||
"translations": {
|
||||
"fr": "Échanger sur votre processus métier",
|
||||
"es": "Hablar sobre el proceso del negocio",
|
||||
"pt": "Falar sobre o processo do negócio",
|
||||
},
|
||||
},
|
||||
"multilingual_rollout": {
|
||||
"sources": ("Mehrsprachige Einführung", "Mehrsprachiger Rollout-Plan"),
|
||||
"issue_type": "foreign_ui_label",
|
||||
"translations": {
|
||||
"fr": "Déploiement multilingue",
|
||||
"it": "Lancio multilingue",
|
||||
"ru": "Многоязычный запуск",
|
||||
},
|
||||
},
|
||||
"customization_integrations": {
|
||||
"sources": ("Anpassung & Integrationen",),
|
||||
"issue_type": "foreign_ui_label",
|
||||
"translations": {
|
||||
"fr": "Personnalisation & intégrations",
|
||||
"es": "Personalización e integraciones",
|
||||
"it": "Personalizzazioni e integrazioni",
|
||||
"pt": "Personalização e integrações",
|
||||
"ru": "Настройка и интеграции",
|
||||
},
|
||||
},
|
||||
"transparent_investment": {
|
||||
"sources": ("Transparente Investition",),
|
||||
"issue_type": "foreign_ui_label",
|
||||
"translations": {
|
||||
"de": "Transparente Investition",
|
||||
"en": "Transparent pricing",
|
||||
"fr": "Investissement transparent",
|
||||
"es": "Inversión transparente",
|
||||
"it": "Investimento trasparente",
|
||||
"pt": "Investimento transparente",
|
||||
"ru": "Прозрачный бюджет",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def build_system_vocabulary(locale_code: str, keys: Iterable[str] | None = None) -> dict[str, str]:
|
||||
vocabulary: dict[str, str] = {}
|
||||
selected_keys = tuple(keys or SYSTEM_STRING_SPECS.keys())
|
||||
for key in selected_keys:
|
||||
spec = SYSTEM_STRING_SPECS[key]
|
||||
target = spec.get("translations", {}).get(locale_code)
|
||||
if not target:
|
||||
continue
|
||||
for source in spec["sources"]:
|
||||
vocabulary[source] = target
|
||||
return vocabulary
|
||||
|
||||
|
||||
def build_contextual_system_vocabulary(locale_code: str, keys: Iterable[str] | None = None) -> dict[str, dict[str, str]]:
|
||||
contextual: dict[str, dict[str, str]] = {}
|
||||
selected_keys = tuple(keys or SYSTEM_STRING_SPECS.keys())
|
||||
for key in selected_keys:
|
||||
spec = SYSTEM_STRING_SPECS[key]
|
||||
locale_contexts = spec.get("contexts", {}).get(locale_code, {})
|
||||
if not locale_contexts:
|
||||
continue
|
||||
source = spec["sources"][0]
|
||||
for context_name, replacement in locale_contexts.items():
|
||||
contextual.setdefault(context_name, {})[source] = replacement
|
||||
return contextual
|
||||
|
||||
|
||||
def build_system_rewrite_candidates(keys: Iterable[str] | None = None) -> dict[str, str]:
|
||||
candidates: dict[str, str] = {}
|
||||
selected_keys = tuple(keys or SYSTEM_STRING_SPECS.keys())
|
||||
for key in selected_keys:
|
||||
spec = SYSTEM_STRING_SPECS[key]
|
||||
for source in spec["sources"]:
|
||||
candidates[source] = spec["issue_type"]
|
||||
return candidates
|
||||
|
||||
|
||||
def all_system_sources() -> set[str]:
|
||||
sources: set[str] = set()
|
||||
for spec in SYSTEM_STRING_SPECS.values():
|
||||
sources.update(spec["sources"])
|
||||
return sources
|
||||
|
||||
|
||||
def is_canonical_system_string(locale_code: str, source: str) -> bool:
|
||||
for spec in SYSTEM_STRING_SPECS.values():
|
||||
if source in spec.get("canonical_by_locale", {}).get(locale_code, ()):
|
||||
return True
|
||||
if locale_code == "de":
|
||||
return source in all_system_sources()
|
||||
replacement = system_string_replacement(locale_code, source)
|
||||
return bool(replacement and replacement == source)
|
||||
|
||||
|
||||
def system_string_replacement(locale_code: str, source: str) -> str:
|
||||
for spec in SYSTEM_STRING_SPECS.values():
|
||||
if source not in spec["sources"]:
|
||||
continue
|
||||
return spec.get("translations", {}).get(locale_code, "")
|
||||
return ""
|
||||
56
mandelblog_content_guard/tests.py
Normal file
56
mandelblog_content_guard/tests.py
Normal file
@@ -0,0 +1,56 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
from django.test import SimpleTestCase
|
||||
|
||||
from mandelblog_content_guard.agents import get_language_agent
|
||||
from mandelblog_content_guard.extractors.visible_text import extract_visible_rendered_text
|
||||
from mandelblog_content_guard.system_strings import build_system_rewrite_candidates, build_system_vocabulary
|
||||
from mandelblog_content_guard.validators.multilingual import validate_text_nodes
|
||||
|
||||
|
||||
class PackageLevelContentGuardTests(SimpleTestCase):
|
||||
def test_system_string_replacement_catalog(self):
|
||||
self.assertEqual(build_system_vocabulary("fr")["PLAN"], "FORFAIT")
|
||||
self.assertEqual(build_system_vocabulary("pt")["Unverbindlich"], "Sem compromisso")
|
||||
self.assertEqual(build_system_rewrite_candidates()["PLAN"], "generic_badge_label")
|
||||
|
||||
def test_canonical_source_suppression(self):
|
||||
nl_issues = validate_text_nodes("nl", [("body.badge", "PLAN")])
|
||||
it_issues = validate_text_nodes("it", [("body.badge", "PIANO")])
|
||||
self.assertFalse(any(issue.bad_value == "PLAN" for issue in nl_issues))
|
||||
self.assertFalse(any(issue.bad_value == "PIANO" for issue in it_issues))
|
||||
|
||||
def test_visible_text_extraction(self):
|
||||
html = """
|
||||
<html><body>
|
||||
<script>var x = 1;</script>
|
||||
<style>.hidden{display:none}</style>
|
||||
<h1>Visible heading</h1>
|
||||
<p aria-hidden="true">Invisible text</p>
|
||||
<a href="#">Visible link</a>
|
||||
</body></html>
|
||||
"""
|
||||
extracted = extract_visible_rendered_text(html)
|
||||
self.assertIn("Visible heading", extracted)
|
||||
self.assertIn("Visible link", extracted)
|
||||
self.assertNotIn("Invisible text", extracted)
|
||||
self.assertNotIn("var x", extracted)
|
||||
|
||||
def test_locale_normalizers(self):
|
||||
de_agent = get_language_agent("de")
|
||||
en_agent = get_language_agent("en")
|
||||
self.assertEqual(de_agent.rewrite("Was du bekommst", "body.heading"), "Was Sie erhalten")
|
||||
self.assertEqual(en_agent.rewrite("PLAN", "body.badge"), "Package")
|
||||
|
||||
def test_audit_json_contract_shape(self):
|
||||
payload = {
|
||||
"run_id": 1,
|
||||
"summary": {"en": {"total_urls_checked": 1, "issues_found": 0, "issues_fixed": 0, "remaining_issues": 0, "by_severity": {"block": 0, "warn": 0, "log": 0}}},
|
||||
"issues": {"en": []},
|
||||
}
|
||||
rendered = json.dumps(payload)
|
||||
parsed = json.loads(rendered)
|
||||
self.assertEqual(sorted(parsed.keys()), ["issues", "run_id", "summary"])
|
||||
self.assertIn("by_severity", parsed["summary"]["en"])
|
||||
65
mandelblog_content_guard/types.py
Normal file
65
mandelblog_content_guard/types.py
Normal file
@@ -0,0 +1,65 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import asdict, dataclass
|
||||
from typing import Any
|
||||
|
||||
from .settings import classify_issue, severity_for_issue, should_block_issue
|
||||
|
||||
|
||||
@dataclass
|
||||
class AuditIssue:
|
||||
severity: str
|
||||
issue_type: str
|
||||
field_path: str
|
||||
bad_value: str
|
||||
replacement: str = ""
|
||||
extra: dict[str, Any] | None = None
|
||||
|
||||
@property
|
||||
def level(self) -> str:
|
||||
return classify_issue(self.issue_type)
|
||||
|
||||
@property
|
||||
def blocks(self) -> bool:
|
||||
return self.severity == "block" or should_block_issue(self.issue_type)
|
||||
|
||||
def asdict(self) -> dict[str, Any]:
|
||||
data = asdict(self)
|
||||
data["extra"] = data.get("extra") or {}
|
||||
data["level"] = self.level
|
||||
return data
|
||||
|
||||
|
||||
def make_issue(issue_type: str, field_path: str, bad_value: str, replacement: str = "", extra: dict[str, Any] | None = None) -> AuditIssue:
|
||||
return AuditIssue(
|
||||
severity=severity_for_issue(issue_type),
|
||||
issue_type=issue_type,
|
||||
field_path=field_path,
|
||||
bad_value=bad_value,
|
||||
replacement=replacement,
|
||||
extra=extra or {},
|
||||
)
|
||||
|
||||
|
||||
def dedupe_issues(issues: list[AuditIssue]) -> list[AuditIssue]:
|
||||
seen = set()
|
||||
deduped = []
|
||||
for issue in issues:
|
||||
key = (issue.severity, issue.issue_type, issue.field_path, issue.bad_value)
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
deduped.append(issue)
|
||||
return deduped
|
||||
|
||||
|
||||
def split_issues(issues: list[AuditIssue]) -> tuple[list[AuditIssue], list[AuditIssue]]:
|
||||
blocking = [issue for issue in issues if issue.blocks]
|
||||
warnings = [issue for issue in issues if not issue.blocks]
|
||||
return blocking, warnings
|
||||
|
||||
|
||||
def format_issue(issue: AuditIssue) -> str:
|
||||
suffix = f" -> {issue.replacement}" if issue.replacement else ""
|
||||
return f"[{issue.level}] {issue.field_path}: {issue.bad_value}{suffix}"
|
||||
|
||||
0
mandelblog_content_guard/validators/__init__.py
Normal file
0
mandelblog_content_guard/validators/__init__.py
Normal file
452
mandelblog_content_guard/validators/multilingual.py
Normal file
452
mandelblog_content_guard/validators/multilingual.py
Normal file
@@ -0,0 +1,452 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
from collections import Counter
|
||||
from typing import Any
|
||||
from urllib.error import HTTPError, URLError
|
||||
from urllib.request import Request, urlopen
|
||||
|
||||
from django.core.exceptions import ValidationError
|
||||
from django.utils import timezone
|
||||
from wagtail.models import Page, Site
|
||||
from wagtail.snippets.models import get_snippet_models
|
||||
|
||||
from ..agents import get_language_agent
|
||||
from ..extractors.visible_text import extract_visible_rendered_text, normalize_text
|
||||
from ..settings import audit_default_locales, rewrite_enabled
|
||||
from ..types import dedupe_issues, format_issue, make_issue
|
||||
from .rules.cta import validate_cta
|
||||
from .rules.forms import validate_form_copy
|
||||
from .rules.language import detect_language_mismatch
|
||||
from .rules.patterns import (
|
||||
GLOBAL_BAD_PATTERNS,
|
||||
KNOWN_REPLACEMENTS,
|
||||
LOCALE_FORBIDDEN,
|
||||
validate_patterns,
|
||||
)
|
||||
from mandelstudio.models import LocaleAuditIssue, LocaleAuditRun
|
||||
|
||||
logger = logging.getLogger("mandelstudio.multilingual")
|
||||
|
||||
|
||||
def expected_locale(instance: Any) -> str:
|
||||
locale = getattr(instance, "locale", None)
|
||||
if locale is not None and getattr(locale, "language_code", None):
|
||||
return locale.language_code
|
||||
return "nl"
|
||||
|
||||
|
||||
def iter_text_nodes(value: Any, path: str = ""):
|
||||
if value is None:
|
||||
return
|
||||
if isinstance(value, str):
|
||||
yield path, value
|
||||
return
|
||||
if hasattr(value, "raw_data"):
|
||||
yield from iter_text_nodes(list(value.raw_data), path)
|
||||
return
|
||||
if isinstance(value, list):
|
||||
for index, item in enumerate(value):
|
||||
yield from iter_text_nodes(item, f"{path}[{index}]")
|
||||
return
|
||||
if isinstance(value, dict):
|
||||
for key, item in value.items():
|
||||
child_path = f"{path}.{key}" if path else str(key)
|
||||
yield from iter_text_nodes(item, child_path)
|
||||
|
||||
|
||||
def extract_instance_text(instance: Any) -> list[tuple[str, str]]:
|
||||
nodes: list[tuple[str, str]] = []
|
||||
for field_name in ["title", "seo_title", "search_description"]:
|
||||
value = getattr(instance, field_name, None)
|
||||
if isinstance(value, str) and value.strip():
|
||||
nodes.append((field_name, value))
|
||||
for field_name in ["body", "content", "footer", "mini_footer"]:
|
||||
if hasattr(instance, field_name):
|
||||
nodes.extend(list(iter_text_nodes(getattr(instance, field_name), field_name)))
|
||||
return nodes
|
||||
|
||||
|
||||
def validate_text_nodes(locale_code: str, nodes: list[tuple[str, str]]):
|
||||
issues = []
|
||||
for field_path, raw_text in nodes:
|
||||
normalized = normalize_text(raw_text)
|
||||
if not normalized:
|
||||
continue
|
||||
issues.extend(validate_patterns(locale_code, field_path, normalized))
|
||||
issues.extend(validate_cta(locale_code, field_path, normalized))
|
||||
issues.extend(validate_form_copy(locale_code, field_path, normalized))
|
||||
if len(normalized) >= 80:
|
||||
mismatch = detect_language_mismatch(locale_code, normalized)
|
||||
if mismatch:
|
||||
issues.append(make_issue("language_heuristic", field_path, mismatch["message"]))
|
||||
return dedupe_issues(issues)
|
||||
|
||||
|
||||
REWRITE_REVIEW_TYPES = {
|
||||
"known_bad_pattern",
|
||||
"wrong_language_fragment",
|
||||
"rendered_bad_pattern",
|
||||
"rendered_wrong_language",
|
||||
"rewrite_candidate",
|
||||
"weak_marketing_copy",
|
||||
"foreign_ui_label",
|
||||
"generic_badge_label",
|
||||
"mixed_locale_heading",
|
||||
"cta_language_mismatch",
|
||||
}
|
||||
|
||||
|
||||
def validate_page(page: Page):
|
||||
return validate_text_nodes(expected_locale(page), extract_instance_text(page.specific))
|
||||
|
||||
|
||||
def validate_snippet_instance(instance: Any):
|
||||
return validate_text_nodes(expected_locale(instance), extract_instance_text(instance))
|
||||
|
||||
|
||||
def validate_posted_snippet(locale_code: str, payload: dict[str, Any]):
|
||||
nodes = [(key, value) for key, value in payload.items() if isinstance(value, str)]
|
||||
return validate_text_nodes(locale_code, nodes)
|
||||
|
||||
|
||||
def _replace_known_strings(value: Any, locale_code: str):
|
||||
changes = []
|
||||
if isinstance(value, str):
|
||||
new = value
|
||||
for bad, replacements in KNOWN_REPLACEMENTS.items():
|
||||
replacement = replacements.get(locale_code)
|
||||
if replacement and bad in new:
|
||||
new = new.replace(bad, replacement)
|
||||
changes.append({"bad": bad, "replacement": replacement})
|
||||
return new, changes, new != value
|
||||
if isinstance(value, list):
|
||||
out = []
|
||||
changed = False
|
||||
for item in value:
|
||||
new_item, item_changes, item_changed = _replace_known_strings(item, locale_code)
|
||||
out.append(new_item)
|
||||
changes.extend(item_changes)
|
||||
changed = changed or item_changed
|
||||
return out, changes, changed
|
||||
if isinstance(value, dict):
|
||||
out = {}
|
||||
changed = False
|
||||
for key, item in value.items():
|
||||
new_item, item_changes, item_changed = _replace_known_strings(item, locale_code)
|
||||
out[key] = new_item
|
||||
changes.extend(item_changes)
|
||||
changed = changed or item_changed
|
||||
return out, changes, changed
|
||||
return value, changes, False
|
||||
|
||||
|
||||
def apply_known_replacements(instance: Any, locale_code: str):
|
||||
changes = []
|
||||
for field_name in ["title", "seo_title", "search_description"]:
|
||||
value = getattr(instance, field_name, None)
|
||||
if not isinstance(value, str):
|
||||
continue
|
||||
new_value, field_changes, changed = _replace_known_strings(value, locale_code)
|
||||
if changed:
|
||||
setattr(instance, field_name, new_value)
|
||||
changes.extend({"field": field_name, **change} for change in field_changes)
|
||||
|
||||
for field_name in ["body", "content", "footer", "mini_footer"]:
|
||||
if not hasattr(instance, field_name):
|
||||
continue
|
||||
field_value = getattr(instance, field_name)
|
||||
if hasattr(field_value, "raw_data"):
|
||||
new_raw, field_changes, changed = _replace_known_strings(list(field_value.raw_data), locale_code)
|
||||
if changed:
|
||||
setattr(instance, field_name, new_raw)
|
||||
changes.extend({"field": field_name, **change} for change in field_changes)
|
||||
elif isinstance(field_value, str):
|
||||
new_value, field_changes, changed = _replace_known_strings(field_value, locale_code)
|
||||
if changed:
|
||||
setattr(instance, field_name, new_value)
|
||||
changes.extend({"field": field_name, **change} for change in field_changes)
|
||||
|
||||
if not changes:
|
||||
return []
|
||||
if isinstance(instance, Page):
|
||||
revision = instance.save_revision()
|
||||
if instance.live:
|
||||
revision.publish()
|
||||
return changes
|
||||
instance.save()
|
||||
return changes
|
||||
|
||||
|
||||
def rewrite_with_agent(instance: Any, locale_code: str, issues, *, dry_run: bool = False):
|
||||
if not rewrite_enabled():
|
||||
return []
|
||||
agent = get_language_agent(locale_code)
|
||||
issue_map = agent.build_issue_map(issues)
|
||||
changes = []
|
||||
|
||||
for field_name in ["title", "seo_title", "search_description"]:
|
||||
value = getattr(instance, field_name, None)
|
||||
if not isinstance(value, str):
|
||||
continue
|
||||
field_issues = issue_map.get(field_name, [])
|
||||
rewritten = agent.rewrite(value, field_path=field_name, issues=field_issues)
|
||||
if rewritten != value:
|
||||
setattr(instance, field_name, rewritten)
|
||||
changes.append({"field": field_name, "before": value, "after": rewritten, "method": "agent"})
|
||||
|
||||
for field_name in ["body", "content", "footer", "mini_footer"]:
|
||||
if not hasattr(instance, field_name):
|
||||
continue
|
||||
field_value = getattr(instance, field_name)
|
||||
if hasattr(field_value, "raw_data"):
|
||||
rewritten, changed = agent.process_block(list(field_value.raw_data), field_name, issue_map)
|
||||
if changed:
|
||||
setattr(instance, field_name, rewritten)
|
||||
changes.append({"field": field_name, "method": "agent"})
|
||||
elif isinstance(field_value, str):
|
||||
rewritten = agent.rewrite(field_value, field_path=field_name, issues=issue_map.get(field_name, []))
|
||||
if rewritten != field_value:
|
||||
setattr(instance, field_name, rewritten)
|
||||
changes.append({"field": field_name, "before": field_value, "after": rewritten, "method": "agent"})
|
||||
|
||||
if not changes or dry_run:
|
||||
return changes
|
||||
if isinstance(instance, Page):
|
||||
revision = instance.save_revision()
|
||||
if instance.live:
|
||||
revision.publish()
|
||||
return changes
|
||||
instance.save()
|
||||
return changes
|
||||
|
||||
|
||||
def enumerate_public_pages(locale_codes: list[str] | None = None, url_filters: list[str] | None = None):
|
||||
result = {}
|
||||
site = Site.objects.order_by("id").first()
|
||||
site_root = getattr(site, "root_page", None)
|
||||
normalized_filters = set(url_filters or [])
|
||||
for locale_code in (locale_codes or audit_default_locales()):
|
||||
locale_root_path = None
|
||||
if site_root is not None:
|
||||
translated_root = (
|
||||
Page.objects.filter(
|
||||
translation_key=site_root.translation_key,
|
||||
locale__language_code=locale_code,
|
||||
)
|
||||
.specific()
|
||||
.first()
|
||||
)
|
||||
chosen_root = translated_root or site_root
|
||||
locale_root_path = getattr(chosen_root, "path", None)
|
||||
qs = (
|
||||
Page.objects.filter(locale__language_code=locale_code)
|
||||
.live()
|
||||
.public()
|
||||
.specific()
|
||||
.order_by("path")
|
||||
)
|
||||
pages = []
|
||||
for page in qs:
|
||||
page_url = getattr(page, "url", None)
|
||||
if not page_url:
|
||||
continue
|
||||
if locale_root_path and not page.path.startswith(locale_root_path):
|
||||
continue
|
||||
if normalized_filters and page_url not in normalized_filters:
|
||||
continue
|
||||
pages.append(page)
|
||||
result[locale_code] = pages
|
||||
return result
|
||||
|
||||
|
||||
def fetch_rendered_text(page: Page):
|
||||
page_url = getattr(page, "url", None)
|
||||
if not page_url:
|
||||
return 598, "missing page URL"
|
||||
if str(page_url).startswith("http"):
|
||||
full_url = page_url
|
||||
else:
|
||||
try:
|
||||
site = page.get_site()
|
||||
except Site.DoesNotExist:
|
||||
site = None
|
||||
site = site or Site.objects.order_by("id").first()
|
||||
if site is None or not getattr(site, "root_url", None):
|
||||
return 598, "missing site root_url"
|
||||
full_url = f"{site.root_url}{page_url}"
|
||||
request = Request(full_url, headers={"User-Agent": "mandelstudio-audit/1.0"})
|
||||
try:
|
||||
with urlopen(request, timeout=30) as response:
|
||||
status = response.getcode()
|
||||
body = response.read().decode("utf-8", errors="replace")
|
||||
except HTTPError as exc:
|
||||
status = exc.code
|
||||
body = exc.read().decode("utf-8", errors="replace")
|
||||
except URLError as exc:
|
||||
status = 599
|
||||
body = str(exc)
|
||||
text = extract_visible_rendered_text(body)
|
||||
return status, text
|
||||
|
||||
|
||||
def iter_rendered_lines(rendered_text: str) -> list[str]:
|
||||
lines = []
|
||||
for chunk in re.split(r"(?<=[\.\!\?])\s+|\s{2,}", rendered_text):
|
||||
normalized = normalize_text(chunk)
|
||||
if normalized:
|
||||
lines.append(normalized)
|
||||
return lines
|
||||
|
||||
|
||||
def validate_rendered_output(locale_code: str, rendered_text: str, status_code: int):
|
||||
issues = []
|
||||
if status_code != 200:
|
||||
issues.append(make_issue("render_status", "rendered", str(status_code)))
|
||||
source_counter = Counter()
|
||||
for line in iter_rendered_lines(rendered_text):
|
||||
line_issues = validate_patterns(locale_code, "rendered", line)
|
||||
for issue in line_issues:
|
||||
issue.bad_value = line
|
||||
issue.extra = {**(issue.extra or {}), "source": "rendered"}
|
||||
source_counter[(issue.issue_type, issue.bad_value)] += 1
|
||||
issues.extend(line_issues)
|
||||
for issue in issues:
|
||||
if issue.extra is not None:
|
||||
issue.extra["count"] = source_counter.get((issue.issue_type, issue.bad_value), 1)
|
||||
for fragment in GLOBAL_BAD_PATTERNS:
|
||||
if fragment in rendered_text:
|
||||
issue = make_issue("rendered_bad_pattern", "rendered", fragment, KNOWN_REPLACEMENTS.get(fragment, {}).get(locale_code, ""))
|
||||
issue.extra = {"source": "rendered", "count": 1}
|
||||
issues.append(issue)
|
||||
for fragment in LOCALE_FORBIDDEN.get(locale_code, ()):
|
||||
if fragment in rendered_text:
|
||||
issue = make_issue("rendered_wrong_language", "rendered", fragment, KNOWN_REPLACEMENTS.get(fragment, {}).get(locale_code, ""))
|
||||
issue.extra = {"source": "rendered", "count": 1}
|
||||
issues.append(issue)
|
||||
return dedupe_issues(issues)
|
||||
|
||||
|
||||
def annotate_rewrite_previews(locale_code: str, issues):
|
||||
agent = get_language_agent(locale_code)
|
||||
for issue in issues:
|
||||
if issue.issue_type not in REWRITE_REVIEW_TYPES:
|
||||
continue
|
||||
if issue.replacement:
|
||||
continue
|
||||
preview = agent.rewrite(issue.bad_value, field_path=issue.field_path, issues=[issue])
|
||||
if preview and preview != issue.bad_value:
|
||||
issue.replacement = preview
|
||||
issue.extra = {**(issue.extra or {}), "review_candidate": True}
|
||||
return issues
|
||||
|
||||
|
||||
def validate_instance_or_raise(instance: Any):
|
||||
issues = validate_page(instance) if isinstance(instance, Page) else validate_snippet_instance(instance)
|
||||
blocking = [issue for issue in issues if issue.blocks]
|
||||
if not blocking:
|
||||
return issues
|
||||
raise ValidationError({"content_guard": [format_issue(issue) for issue in blocking]})
|
||||
|
||||
|
||||
def validate_ai_text_or_raise(locale_code: str, field_path: str, value: str):
|
||||
issues = validate_text_nodes(locale_code, [(field_path, value)])
|
||||
blocking = [issue for issue in issues if issue.blocks]
|
||||
if not blocking:
|
||||
return issues
|
||||
raise ValidationError({"content_guard": [format_issue(issue) for issue in blocking]})
|
||||
|
||||
|
||||
def record_issues(run: LocaleAuditRun, locale_code: str, obj: Any, issues, *, fixed: bool = False) -> None:
|
||||
for issue in issues:
|
||||
LocaleAuditIssue.objects.create(
|
||||
run=run,
|
||||
locale_code=locale_code,
|
||||
object_id=getattr(obj, "pk", None),
|
||||
object_type=obj.__class__.__name__,
|
||||
url=getattr(obj, "url", "") or "",
|
||||
title=getattr(obj, "title", str(obj))[:255],
|
||||
severity=issue.severity,
|
||||
issue_type=issue.issue_type,
|
||||
field_path=issue.field_path,
|
||||
bad_value=issue.bad_value,
|
||||
replacement=issue.replacement,
|
||||
fixed=fixed,
|
||||
extra=issue.extra or {},
|
||||
)
|
||||
|
||||
|
||||
def audit_locales(locale_codes: list[str], fix: bool = False, rewrite: bool = False, dry_run: bool = False, url_filters: list[str] | None = None) -> LocaleAuditRun:
|
||||
run = LocaleAuditRun.objects.create(locale_codes=locale_codes, fix_enabled=fix or rewrite)
|
||||
pages_by_locale = enumerate_public_pages(locale_codes, url_filters=url_filters)
|
||||
summary: dict[str, Any] = {}
|
||||
total_checked = 0
|
||||
total_issues = 0
|
||||
pages_with_issues = 0
|
||||
|
||||
for locale_code, pages in pages_by_locale.items():
|
||||
locale_summary = {"total_urls_checked": len(pages), "issues_found": 0, "issues_fixed": 0, "remaining_issues": 0, "by_severity": {"block": 0, "warn": 0, "log": 0}}
|
||||
for page in pages:
|
||||
total_checked += 1
|
||||
status_code, rendered = fetch_rendered_text(page)
|
||||
issues = dedupe_issues(validate_page(page) + validate_rendered_output(locale_code, rendered, status_code))
|
||||
if rewrite:
|
||||
issues = annotate_rewrite_previews(locale_code, issues)
|
||||
initial_issue_count = len(issues)
|
||||
fixed_changes = []
|
||||
if issues and fix:
|
||||
fixed_changes = apply_known_replacements(page.specific, locale_code)
|
||||
if fixed_changes:
|
||||
record_issues(run, locale_code, page, issues, fixed=True)
|
||||
status_code, rendered = fetch_rendered_text(page.specific)
|
||||
issues = dedupe_issues(validate_page(page.specific) + validate_rendered_output(locale_code, rendered, status_code))
|
||||
if rewrite:
|
||||
issues = annotate_rewrite_previews(locale_code, issues)
|
||||
if issues and rewrite:
|
||||
rewrite_changes = rewrite_with_agent(page.specific, locale_code, issues, dry_run=dry_run)
|
||||
if rewrite_changes:
|
||||
record_issues(run, locale_code, page, issues, fixed=not dry_run)
|
||||
if not dry_run:
|
||||
status_code, rendered = fetch_rendered_text(page.specific)
|
||||
issues = dedupe_issues(validate_page(page.specific) + validate_rendered_output(locale_code, rendered, status_code))
|
||||
issues = annotate_rewrite_previews(locale_code, issues)
|
||||
if issues:
|
||||
pages_with_issues += 1
|
||||
record_issues(run, locale_code, page, issues)
|
||||
locale_summary["issues_found"] += initial_issue_count
|
||||
locale_summary["issues_fixed"] += initial_issue_count - len(issues)
|
||||
locale_summary["remaining_issues"] += len(issues)
|
||||
for issue in issues:
|
||||
locale_summary["by_severity"][issue.severity] = locale_summary["by_severity"].get(issue.severity, 0) + 1
|
||||
total_issues += initial_issue_count
|
||||
summary[locale_code] = locale_summary
|
||||
|
||||
snippet_summary = {}
|
||||
for model in get_snippet_models():
|
||||
count = 0
|
||||
for instance in model.objects.all():
|
||||
issues = validate_snippet_instance(instance)
|
||||
if rewrite:
|
||||
issues = annotate_rewrite_previews(expected_locale(instance), issues)
|
||||
if issues and rewrite:
|
||||
rewrite_changes = rewrite_with_agent(instance, expected_locale(instance), issues, dry_run=dry_run)
|
||||
if rewrite_changes and not dry_run:
|
||||
issues = validate_snippet_instance(instance)
|
||||
if not issues:
|
||||
continue
|
||||
count += len(issues)
|
||||
record_issues(run, expected_locale(instance), instance, issues)
|
||||
if count:
|
||||
snippet_summary[model.__name__] = count
|
||||
total_issues += count
|
||||
summary["snippets"] = snippet_summary
|
||||
|
||||
run.total_urls_checked = total_checked
|
||||
run.issues_found = total_issues
|
||||
run.pages_with_issues = pages_with_issues
|
||||
run.summary = summary
|
||||
run.finished_at = timezone.now()
|
||||
run.save(update_fields=["total_urls_checked", "issues_found", "pages_with_issues", "summary", "finished_at"])
|
||||
logger.info("Completed multilingual audit run %s", run.pk)
|
||||
return run
|
||||
146
mandelblog_content_guard/validators/rules/cta.py
Normal file
146
mandelblog_content_guard/validators/rules/cta.py
Normal file
@@ -0,0 +1,146 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
from ...types import make_issue
|
||||
|
||||
CTA_RULES = {
|
||||
"nl": (
|
||||
r"^Plan ",
|
||||
r"^Bekijk ",
|
||||
r"^Vraag ",
|
||||
r"^Bespreek ",
|
||||
r"^Contact$",
|
||||
r"^Start ",
|
||||
r"^Meer ",
|
||||
r"^Verstuur ",
|
||||
r"^Neem ",
|
||||
),
|
||||
"en": (
|
||||
r"^Book ",
|
||||
r"^View ",
|
||||
r"^Schedule ",
|
||||
r"^Start ",
|
||||
r"^Talk ",
|
||||
r"^Discuss ",
|
||||
r"^Contact$",
|
||||
r"^Explore ",
|
||||
r"^Learn ",
|
||||
r"^Request ",
|
||||
r"^Send ",
|
||||
),
|
||||
"de": (
|
||||
r"^Plan",
|
||||
r"^Mehr",
|
||||
r"^Support",
|
||||
r"^Start",
|
||||
r"^Kontakt",
|
||||
r"^Gespr",
|
||||
r"^Kostenlose",
|
||||
r"^Anfrage",
|
||||
r"^Projekte",
|
||||
r"^Verein",
|
||||
r"^Besprech",
|
||||
r"^Anzeig",
|
||||
r"^Ansehen",
|
||||
r"^Technisch",
|
||||
r"^Unterst",
|
||||
r"^Unsere",
|
||||
r"^Service",
|
||||
r"^Dienstleistungen",
|
||||
r"^Erstgespräch",
|
||||
r"^Einführ",
|
||||
r"^Anpassung",
|
||||
r"^Ansichts",
|
||||
r"^Prozess",
|
||||
r"^Pakete",
|
||||
r"^Demo",
|
||||
r"^Alle ",
|
||||
r"^Ein ",
|
||||
r"^Webshop",
|
||||
),
|
||||
"fr": (
|
||||
r"^Planifier",
|
||||
r"^Voir",
|
||||
r"^Découvrir",
|
||||
r"^Demander",
|
||||
r"^Lancer",
|
||||
r"^Démarrer",
|
||||
r"^Contacter",
|
||||
r"^Contact$",
|
||||
r"^Parler",
|
||||
r"^Lancez",
|
||||
r"^Prendre",
|
||||
r"^Envoyer",
|
||||
r"^Afficher",
|
||||
),
|
||||
"es": (
|
||||
r"^Reservar",
|
||||
r"^Ver",
|
||||
r"^Solicitar",
|
||||
r"^Inicia",
|
||||
r"^Hablar",
|
||||
r"^Descubrir",
|
||||
r"^Contactar",
|
||||
r"^Planificar",
|
||||
r"^Programe",
|
||||
r"^Concertar",
|
||||
r"^Enviar",
|
||||
r"^Mostrar",
|
||||
r"^Comenta",
|
||||
),
|
||||
"it": (
|
||||
r"^Prenota",
|
||||
r"^Vedi",
|
||||
r"^Avvia",
|
||||
r"^Richiedi",
|
||||
r"^Contatta",
|
||||
r"^Contatto$",
|
||||
r"^Scopri",
|
||||
r"^Pianifica",
|
||||
r"^Invia",
|
||||
r"^Mostra",
|
||||
r"^Parla",
|
||||
r"^Parliamo",
|
||||
),
|
||||
"pt": (
|
||||
r"^Agendar",
|
||||
r"^Ver",
|
||||
r"^Iniciar",
|
||||
r"^Pedir",
|
||||
r"^Contactar",
|
||||
r"^Falar",
|
||||
r"^Explorar",
|
||||
r"^Marcar",
|
||||
r"^Solicitar",
|
||||
r"^Enviar",
|
||||
r"^Mostrar",
|
||||
),
|
||||
"ru": (
|
||||
r"^Заплан",
|
||||
r"^Посмотр",
|
||||
r"^Запуст",
|
||||
r"^Связ",
|
||||
r"^Подробнее",
|
||||
r"^Показать",
|
||||
r"^Отправ",
|
||||
r"^Получ",
|
||||
r"^Запрос",
|
||||
),
|
||||
}
|
||||
|
||||
CTA_FIELDS = {
|
||||
"cta_text",
|
||||
"primary_cta_text",
|
||||
"secondary_cta_text",
|
||||
"submit_button_text",
|
||||
}
|
||||
|
||||
|
||||
def validate_cta(locale_code: str, field_path: str, normalized: str):
|
||||
last_segment = field_path.split(".")[-1]
|
||||
if last_segment not in CTA_FIELDS:
|
||||
return []
|
||||
if any(re.search(pattern, normalized) for pattern in CTA_RULES.get(locale_code, ())):
|
||||
return []
|
||||
return [make_issue("cta_language_mismatch", field_path, normalized)]
|
||||
21
mandelblog_content_guard/validators/rules/forms.py
Normal file
21
mandelblog_content_guard/validators/rules/forms.py
Normal file
@@ -0,0 +1,21 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from ...types import make_issue
|
||||
from .patterns import PLACEHOLDER_VALUES
|
||||
from .language import detect_language_mismatch
|
||||
|
||||
FORM_FIELDS = {"label", "placeholder", "help_text"}
|
||||
|
||||
|
||||
def validate_form_copy(locale_code: str, field_path: str, normalized: str):
|
||||
last_segment = field_path.split(".")[-1]
|
||||
if last_segment not in FORM_FIELDS:
|
||||
return []
|
||||
issues = []
|
||||
if normalized in PLACEHOLDER_VALUES or normalized == "":
|
||||
issues.append(make_issue("empty_form_copy", field_path, normalized))
|
||||
mismatch = detect_language_mismatch(locale_code, normalized)
|
||||
if mismatch:
|
||||
issues.append(make_issue("form_language_mismatch", field_path, mismatch["message"]))
|
||||
return issues
|
||||
|
||||
43
mandelblog_content_guard/validators/rules/language.py
Normal file
43
mandelblog_content_guard/validators/rules/language.py
Normal file
@@ -0,0 +1,43 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
STOPWORDS = {
|
||||
"nl": {"de", "het", "een", "en", "voor", "met", "van", "je", "wij", "niet"},
|
||||
"en": {"the", "and", "for", "with", "your", "you", "from", "that", "this", "not"},
|
||||
"de": {"der", "die", "das", "und", "mit", "für", "nicht", "eine", "ist", "sie"},
|
||||
"fr": {"le", "la", "les", "et", "avec", "pour", "vous", "une", "pas", "des"},
|
||||
"es": {"el", "la", "los", "las", "con", "para", "una", "que", "del", "por"},
|
||||
"it": {"il", "la", "con", "per", "una", "che", "del", "non", "gli", "dei"},
|
||||
"pt": {"o", "a", "os", "as", "com", "para", "uma", "que", "não", "dos"},
|
||||
"ru": {"и", "в", "на", "с", "для", "что", "это", "как", "по", "не"},
|
||||
}
|
||||
|
||||
|
||||
def _tokenize(text: str) -> list[str]:
|
||||
text = re.sub(r"<[^>]+>", " ", text)
|
||||
return re.findall(r"[\w\u0400-\u04FF']+", text.lower())
|
||||
|
||||
|
||||
def detect_language_mismatch(locale_code: str, text: str):
|
||||
tokens = _tokenize(text)
|
||||
if len(tokens) < 12:
|
||||
return None
|
||||
scores = {code: sum(1 for token in tokens if token in words) for code, words in STOPWORDS.items()}
|
||||
expected = scores.get(locale_code, 0)
|
||||
foreign_locale, foreign_score = max(scores.items(), key=lambda item: item[1])
|
||||
if foreign_locale == locale_code:
|
||||
return None
|
||||
if expected >= foreign_score:
|
||||
return None
|
||||
if foreign_score >= 6 and foreign_score >= expected + 4:
|
||||
return {
|
||||
"severity": "block",
|
||||
"message": f"expected={locale_code}, detected={foreign_locale}, score={foreign_score}, expected_score={expected}",
|
||||
}
|
||||
if expected == 0 and foreign_score >= 5:
|
||||
return {
|
||||
"severity": "warn",
|
||||
"message": f"expected={locale_code}, detected={foreign_locale}, score={foreign_score}, expected_score={expected}",
|
||||
}
|
||||
return None
|
||||
269
mandelblog_content_guard/validators/rules/patterns.py
Normal file
269
mandelblog_content_guard/validators/rules/patterns.py
Normal file
@@ -0,0 +1,269 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
from ...types import make_issue
|
||||
from ...system_strings import (
|
||||
build_system_rewrite_candidates,
|
||||
is_canonical_system_string,
|
||||
system_string_replacement,
|
||||
)
|
||||
|
||||
GLOBAL_BAD_PATTERNS = (
|
||||
"The Spanish translation",
|
||||
"The Spanish translation of",
|
||||
"As the input",
|
||||
"The input",
|
||||
"Poiché l'input",
|
||||
'Unternehmen" è tedesco',
|
||||
"Support anzeigen",
|
||||
"Starter intake",
|
||||
"Business intake",
|
||||
"Plan Starter intake",
|
||||
"Plan Business intake",
|
||||
"Plan de admisión",
|
||||
"None",
|
||||
)
|
||||
|
||||
LOCALE_FORBIDDEN = {
|
||||
"nl": ("Starter intake", "Business intake", "Poiché", "Correo electrónico", "Mostrar los servicios", "Plan de admisión"),
|
||||
"en": ("Starter intake", "Business intake", "Poiché", "Correo electrónico", "Mostrar los servicios", "Questions fréquemment posées", "Plan de admisión"),
|
||||
"de": ("Starter intake", "Business intake", "Poiché", "Correo electrónico", "Mostrar los servicios", "Questions fréquemment posées", "Plan de admisión"),
|
||||
"fr": ("Starter intake", "Business intake", "Poiché", "Correo electrónico", "Mostrar los servicios", "Plan de admisión", "Support anzeigen"),
|
||||
"es": ("Poiché", 'Unternehmen" è tedesco', "Support anzeigen", "Questions fréquemment posées"),
|
||||
"it": ("Poiché l'input", "Consulta inicial sin compromiso", "Mostrar los servicios", "Questions fréquentes", "Plan de admisión", "Correo electrónico"),
|
||||
"pt": ("Poiché l'input", "Consulta inicial sin compromiso", "Mostrar los servicios", "Correo electrónico", 'Unternehmen" è tedesco', "Questions fréquemment posées"),
|
||||
"ru": ("Poiché l'input", "Consulta inicial sin compromiso", "Correo electrónico", 'Unternehmen" è tedesco', "Mostrar los servicios"),
|
||||
}
|
||||
|
||||
PLACEHOLDER_VALUES = {"None", "-", "N/A", "null"}
|
||||
|
||||
GENERIC_BADGE_LABELS = {
|
||||
"New",
|
||||
"Popular",
|
||||
"PLAN",
|
||||
"PIANO",
|
||||
"SERVICES",
|
||||
}
|
||||
|
||||
GLOBAL_REWRITE_CANDIDATES = {
|
||||
**build_system_rewrite_candidates(
|
||||
(
|
||||
"days_label",
|
||||
"average_delivery",
|
||||
"response_time",
|
||||
"without_commitment",
|
||||
"transparent_label",
|
||||
"weeks_1_2",
|
||||
"customer_reviews",
|
||||
"editable_label",
|
||||
"core_pages_label",
|
||||
"detailed_page_structure",
|
||||
"business_process_cta",
|
||||
"multilingual_rollout",
|
||||
"customization_integrations",
|
||||
"transparent_investment",
|
||||
)
|
||||
),
|
||||
}
|
||||
|
||||
LOCALE_REWRITE_CANDIDATES = {
|
||||
"en": {
|
||||
"Service packages (from) Transparent starting points.": "foreign_ui_label",
|
||||
"Frequently Asked Questions Transparent about planning, approach, and management.": "foreign_ui_label",
|
||||
"Transparent investment": "foreign_ui_label",
|
||||
},
|
||||
"de": {
|
||||
"New": "weak_marketing_copy",
|
||||
"Intakegespräch": "weak_marketing_copy",
|
||||
"SEO-ready basis": "foreign_ui_label",
|
||||
"Sales-ready mit skalierbarem Stack": "foreign_ui_label",
|
||||
"Continuous Verbesserung": "foreign_ui_label",
|
||||
"Was du bekommst": "weak_marketing_copy",
|
||||
"Einführungsmeeting": "weak_marketing_copy",
|
||||
"Starter Website": "weak_marketing_copy",
|
||||
"Business Website": "weak_marketing_copy",
|
||||
"Häufig gestellte Fragen Transparent über Planung, Vorgehensweise und Management.": "foreign_ui_label",
|
||||
},
|
||||
"es": {
|
||||
"Preguntas frecuentes Transparente sobre la planificación, el proceso y la gestión.": "foreign_ui_label",
|
||||
"Unverbindliches Gespräch, klares Angebot": "foreign_ui_label",
|
||||
},
|
||||
"pt": {
|
||||
"Siti web e negozi online": "mixed_locale_heading",
|
||||
"Caso de cliente en directo": "weak_marketing_copy",
|
||||
"El primer proyecto de producción finalizado con éxito.": "weak_marketing_copy",
|
||||
"Más sobre el proceso": "foreign_ui_label",
|
||||
"Modifiez simplement vous-même.": "foreign_ui_label",
|
||||
"Opciones de la tienda web": "foreign_ui_label",
|
||||
"Planes de soporte": "foreign_ui_label",
|
||||
"Multilingüe": "foreign_ui_label",
|
||||
"Unsere Serviços": "mixed_locale_heading",
|
||||
"Elija el camino": "mixed_locale_heading",
|
||||
"Début en direct": "foreign_ui_label",
|
||||
"Demande d'admission initiale": "foreign_ui_label",
|
||||
"Site Web d'Entreprise": "foreign_ui_label",
|
||||
"Hablar sobre el proceso empresarial": "foreign_ui_label",
|
||||
"Mise en place de boutique en ligne": "foreign_ui_label",
|
||||
"Maintenance & gestion": "foreign_ui_label",
|
||||
"Afficher le plan de soutien": "foreign_ui_label",
|
||||
"Introducción multilingüe": "foreign_ui_label",
|
||||
"Forfaits de services (à partir de)": "mixed_locale_heading",
|
||||
"Kundenschätzung": "foreign_ui_label",
|
||||
"Gestisca lei stesso il contenuto": "foreign_ui_label",
|
||||
"Optimizado para móviles": "foreign_ui_label",
|
||||
"Schnell online mit einer starken Basis": "weak_marketing_copy",
|
||||
"La entrada \"Unterstützung oder Erweiterung\"": "foreign_ui_label",
|
||||
"Suivi + corrections": "foreign_ui_label",
|
||||
"Mejoras mensuales": "foreign_ui_label",
|
||||
"¿A qué velocidad puede comenzar?": "foreign_ui_label",
|
||||
"¿Puedo editar textos e imágenes yo mismo?": "foreign_ui_label",
|
||||
"Transparente sobre o planejamento, o processo e a gestão.": "foreign_ui_label",
|
||||
"Ab 2.250 €": "foreign_ui_label",
|
||||
"Boutique en ligne": "foreign_ui_label",
|
||||
"Sales-ready mit skalierbarem Stack": "foreign_ui_label",
|
||||
},
|
||||
"fr": {
|
||||
"Erstes Produktionsprojekt erfolgreich abgeschlossen.": "weak_marketing_copy",
|
||||
"Von Kickoff bis zum Launch mit einem klaren Umfang.": "foreign_ui_label",
|
||||
"Demande d'admission initiale": "weak_marketing_copy",
|
||||
"Entretien d'accueil": "weak_marketing_copy",
|
||||
"Vraag over diensten": "foreign_ui_label",
|
||||
"Konkrete erste Schätzung": "foreign_ui_label",
|
||||
"Ansatz, der zu Ihrem Budget passt": "foreign_ui_label",
|
||||
**build_system_rewrite_candidates(("weeks_2_4",)),
|
||||
"Bereit, mit der Business-Website zu starten?": "foreign_ui_label",
|
||||
},
|
||||
"it": {
|
||||
"Planificación clara": "foreign_ui_label",
|
||||
"Mehrsprachiger Rollout-Plan": "foreign_ui_label",
|
||||
"Unverbindliches Gespräch, klares Angebot": "foreign_ui_label",
|
||||
},
|
||||
"ru": {
|
||||
"Base prête pour le SEO": "foreign_ui_label",
|
||||
"Unverbindliches Gespräch, klares Angebot": "foreign_ui_label",
|
||||
},
|
||||
}
|
||||
|
||||
KNOWN_REPLACEMENTS = {
|
||||
"Starter intake": {
|
||||
"nl": "Plan startergesprek",
|
||||
"en": "Book starter call",
|
||||
"de": "Starter-Gespräch planen",
|
||||
"fr": "Planifier l’entretien de départ",
|
||||
"es": "Reservar llamada inicial",
|
||||
"it": "Prenota una chiamata iniziale",
|
||||
"pt": "Agendar chamada inicial",
|
||||
"ru": "Запланировать стартовый звонок",
|
||||
},
|
||||
"Business intake": {
|
||||
"nl": "Plan zakelijk gesprek",
|
||||
"en": "Book business call",
|
||||
"de": "Beratungsgespräch planen",
|
||||
"fr": "Planifier l’entretien commercial",
|
||||
"es": "Reservar llamada comercial",
|
||||
"it": "Prenota una chiamata commerciale",
|
||||
"pt": "Agendar chamada comercial",
|
||||
"ru": "Запланировать деловой звонок",
|
||||
},
|
||||
"Plan Starter intake": {
|
||||
"nl": "Plan startergesprek",
|
||||
"en": "Book starter call",
|
||||
"de": "Starter-Gespräch planen",
|
||||
"fr": "Planifier l’entretien de départ",
|
||||
"es": "Reservar llamada inicial",
|
||||
"it": "Prenota una chiamata iniziale",
|
||||
"pt": "Agendar chamada inicial",
|
||||
"ru": "Запланировать стартовый звонок",
|
||||
},
|
||||
"Plan Business intake": {
|
||||
"nl": "Plan zakelijk gesprek",
|
||||
"en": "Book business call",
|
||||
"de": "Beratungsgespräch planen",
|
||||
"fr": "Planifier l’entretien commercial",
|
||||
"es": "Reservar llamada comercial",
|
||||
"it": "Prenota una chiamata commerciale",
|
||||
"pt": "Agendar chamada comercial",
|
||||
"ru": "Запланировать деловой звонок",
|
||||
},
|
||||
"Mostrar los servicios": {
|
||||
"es": "Mostrar los servicios",
|
||||
"it": "Vedi servizi",
|
||||
"pt": "Ver serviços",
|
||||
"ru": "Показать услуги",
|
||||
},
|
||||
"Correo electrónico": {"pt": "E-mail", "ru": "Электронная почта"},
|
||||
'Unternehmen" è tedesco, non olandese. La traduzione spagnola di "Unternehmen" è "empresa".': {
|
||||
"pt": "Empresa",
|
||||
"ru": "Компания",
|
||||
},
|
||||
'Poiché l\'input "Unverbindliche Erstberatung" è in tedesco (non in olandese), la traduzione in spagnolo è: "Consulta inicial sin compromiso".': {
|
||||
"it": "Senza impegno",
|
||||
"pt": "Sem compromisso",
|
||||
"ru": "Без обязательств",
|
||||
"es": "Consulta inicial sin compromiso",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _contains_fragment(text: str, fragment: str) -> bool:
|
||||
if re.fullmatch(r"[\wÀ-ÿ-]+", fragment, flags=re.UNICODE):
|
||||
pattern = re.compile(rf"(?<![\wÀ-ÿ-]){re.escape(fragment)}(?![\wÀ-ÿ-])", re.UNICODE)
|
||||
return bool(pattern.search(text))
|
||||
return fragment in text
|
||||
|
||||
|
||||
def validate_patterns(locale_code: str, field_path: str, normalized: str):
|
||||
issues = []
|
||||
for fragment in GLOBAL_BAD_PATTERNS:
|
||||
if _contains_fragment(normalized, fragment):
|
||||
issues.append(
|
||||
make_issue(
|
||||
"known_bad_pattern",
|
||||
field_path,
|
||||
fragment,
|
||||
KNOWN_REPLACEMENTS.get(fragment, {}).get(locale_code, ""),
|
||||
)
|
||||
)
|
||||
for fragment in LOCALE_FORBIDDEN.get(locale_code, ()):
|
||||
if _contains_fragment(normalized, fragment):
|
||||
issues.append(
|
||||
make_issue(
|
||||
"wrong_language_fragment",
|
||||
field_path,
|
||||
fragment,
|
||||
KNOWN_REPLACEMENTS.get(fragment, {}).get(locale_code, ""),
|
||||
)
|
||||
)
|
||||
if normalized in GENERIC_BADGE_LABELS and not is_canonical_system_string(locale_code, normalized):
|
||||
issues.append(
|
||||
make_issue(
|
||||
"generic_badge_label",
|
||||
field_path,
|
||||
normalized,
|
||||
system_string_replacement(locale_code, normalized),
|
||||
)
|
||||
)
|
||||
for fragment, issue_type in GLOBAL_REWRITE_CANDIDATES.items():
|
||||
if _contains_fragment(normalized, fragment):
|
||||
if is_canonical_system_string(locale_code, fragment):
|
||||
continue
|
||||
issues.append(
|
||||
make_issue(
|
||||
issue_type,
|
||||
field_path,
|
||||
fragment,
|
||||
system_string_replacement(locale_code, fragment),
|
||||
)
|
||||
)
|
||||
for fragment, issue_type in LOCALE_REWRITE_CANDIDATES.get(locale_code, {}).items():
|
||||
if _contains_fragment(normalized, fragment):
|
||||
issues.append(
|
||||
make_issue(
|
||||
issue_type,
|
||||
field_path,
|
||||
fragment,
|
||||
system_string_replacement(locale_code, fragment),
|
||||
)
|
||||
)
|
||||
return issues
|
||||
7
mandelstudio/apps.py
Normal file
7
mandelstudio/apps.py
Normal file
@@ -0,0 +1,7 @@
|
||||
from django.apps import AppConfig
|
||||
|
||||
|
||||
class MandelstudioConfig(AppConfig):
|
||||
default_auto_field = "django.db.models.BigAutoField"
|
||||
name = "mandelstudio"
|
||||
verbose_name = "Mandelstudio"
|
||||
1
mandelstudio/content_guard/__init__.py
Normal file
1
mandelstudio/content_guard/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
from mandelblog_content_guard import * # noqa: F401,F403
|
||||
1
mandelstudio/content_guard/agents/__init__.py
Normal file
1
mandelstudio/content_guard/agents/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
from mandelblog_content_guard.agents import * # noqa: F401,F403
|
||||
1
mandelstudio/content_guard/agents/base.py
Normal file
1
mandelstudio/content_guard/agents/base.py
Normal file
@@ -0,0 +1 @@
|
||||
from mandelblog_content_guard.agents.base import * # noqa: F401,F403
|
||||
1
mandelstudio/content_guard/agents/de.py
Normal file
1
mandelstudio/content_guard/agents/de.py
Normal file
@@ -0,0 +1 @@
|
||||
from mandelblog_content_guard.agents.de import * # noqa: F401,F403
|
||||
1
mandelstudio/content_guard/agents/en.py
Normal file
1
mandelstudio/content_guard/agents/en.py
Normal file
@@ -0,0 +1 @@
|
||||
from mandelblog_content_guard.agents.en import * # noqa: F401,F403
|
||||
1
mandelstudio/content_guard/agents/es.py
Normal file
1
mandelstudio/content_guard/agents/es.py
Normal file
@@ -0,0 +1 @@
|
||||
from mandelblog_content_guard.agents.es import * # noqa: F401,F403
|
||||
1
mandelstudio/content_guard/agents/fr.py
Normal file
1
mandelstudio/content_guard/agents/fr.py
Normal file
@@ -0,0 +1 @@
|
||||
from mandelblog_content_guard.agents.fr import * # noqa: F401,F403
|
||||
1
mandelstudio/content_guard/agents/it.py
Normal file
1
mandelstudio/content_guard/agents/it.py
Normal file
@@ -0,0 +1 @@
|
||||
from mandelblog_content_guard.agents.it import * # noqa: F401,F403
|
||||
1
mandelstudio/content_guard/agents/nl.py
Normal file
1
mandelstudio/content_guard/agents/nl.py
Normal file
@@ -0,0 +1 @@
|
||||
from mandelblog_content_guard.agents.nl import * # noqa: F401,F403
|
||||
1
mandelstudio/content_guard/agents/pt.py
Normal file
1
mandelstudio/content_guard/agents/pt.py
Normal file
@@ -0,0 +1 @@
|
||||
from mandelblog_content_guard.agents.pt import * # noqa: F401,F403
|
||||
1
mandelstudio/content_guard/agents/ru.py
Normal file
1
mandelstudio/content_guard/agents/ru.py
Normal file
@@ -0,0 +1 @@
|
||||
from mandelblog_content_guard.agents.ru import * # noqa: F401,F403
|
||||
1
mandelstudio/content_guard/ai.py
Normal file
1
mandelstudio/content_guard/ai.py
Normal file
@@ -0,0 +1 @@
|
||||
from mandelblog_content_guard.ai import * # noqa: F401,F403
|
||||
1
mandelstudio/content_guard/hooks.py
Normal file
1
mandelstudio/content_guard/hooks.py
Normal file
@@ -0,0 +1 @@
|
||||
from mandelblog_content_guard.hooks import * # noqa: F401,F403
|
||||
1
mandelstudio/content_guard/mixins.py
Normal file
1
mandelstudio/content_guard/mixins.py
Normal file
@@ -0,0 +1 @@
|
||||
from mandelblog_content_guard.mixins import * # noqa: F401,F403
|
||||
1
mandelstudio/content_guard/normalizers/__init__.py
Normal file
1
mandelstudio/content_guard/normalizers/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
from mandelblog_content_guard.normalizers import * # noqa: F401,F403
|
||||
1
mandelstudio/content_guard/normalizers/de.py
Normal file
1
mandelstudio/content_guard/normalizers/de.py
Normal file
@@ -0,0 +1 @@
|
||||
from mandelblog_content_guard.normalizers.de import * # noqa: F401,F403
|
||||
1
mandelstudio/content_guard/normalizers/en.py
Normal file
1
mandelstudio/content_guard/normalizers/en.py
Normal file
@@ -0,0 +1 @@
|
||||
from mandelblog_content_guard.normalizers.en import * # noqa: F401,F403
|
||||
1
mandelstudio/content_guard/normalizers/es.py
Normal file
1
mandelstudio/content_guard/normalizers/es.py
Normal file
@@ -0,0 +1 @@
|
||||
from mandelblog_content_guard.normalizers.es import * # noqa: F401,F403
|
||||
1
mandelstudio/content_guard/normalizers/it.py
Normal file
1
mandelstudio/content_guard/normalizers/it.py
Normal file
@@ -0,0 +1 @@
|
||||
from mandelblog_content_guard.normalizers.it import * # noqa: F401,F403
|
||||
1
mandelstudio/content_guard/normalizers/nl.py
Normal file
1
mandelstudio/content_guard/normalizers/nl.py
Normal file
@@ -0,0 +1 @@
|
||||
from mandelblog_content_guard.normalizers.nl import * # noqa: F401,F403
|
||||
1
mandelstudio/content_guard/normalizers/ru.py
Normal file
1
mandelstudio/content_guard/normalizers/ru.py
Normal file
@@ -0,0 +1 @@
|
||||
from mandelblog_content_guard.normalizers.ru import * # noqa: F401,F403
|
||||
1
mandelstudio/content_guard/settings.py
Normal file
1
mandelstudio/content_guard/settings.py
Normal file
@@ -0,0 +1 @@
|
||||
from mandelblog_content_guard.settings import * # noqa: F401,F403
|
||||
1
mandelstudio/content_guard/signals.py
Normal file
1
mandelstudio/content_guard/signals.py
Normal file
@@ -0,0 +1 @@
|
||||
from mandelblog_content_guard.signals import * # noqa: F401,F403
|
||||
1
mandelstudio/content_guard/system_strings.py
Normal file
1
mandelstudio/content_guard/system_strings.py
Normal file
@@ -0,0 +1 @@
|
||||
from mandelblog_content_guard.system_strings import * # noqa: F401,F403
|
||||
1
mandelstudio/content_guard/types.py
Normal file
1
mandelstudio/content_guard/types.py
Normal file
@@ -0,0 +1 @@
|
||||
from mandelblog_content_guard.types import * # noqa: F401,F403
|
||||
1
mandelstudio/content_guard/validators/__init__.py
Normal file
1
mandelstudio/content_guard/validators/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
from mandelblog_content_guard.validators import * # noqa: F401,F403
|
||||
1
mandelstudio/content_guard/validators/multilingual.py
Normal file
1
mandelstudio/content_guard/validators/multilingual.py
Normal file
@@ -0,0 +1 @@
|
||||
from mandelblog_content_guard.validators.multilingual import * # noqa: F401,F403
|
||||
1
mandelstudio/content_guard/validators/rules/__init__.py
Normal file
1
mandelstudio/content_guard/validators/rules/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
from mandelblog_content_guard.validators.rules import * # noqa: F401,F403
|
||||
1
mandelstudio/content_guard/validators/rules/cta.py
Normal file
1
mandelstudio/content_guard/validators/rules/cta.py
Normal file
@@ -0,0 +1 @@
|
||||
from mandelblog_content_guard.validators.rules.cta import * # noqa: F401,F403
|
||||
1
mandelstudio/content_guard/validators/rules/forms.py
Normal file
1
mandelstudio/content_guard/validators/rules/forms.py
Normal file
@@ -0,0 +1 @@
|
||||
from mandelblog_content_guard.validators.rules.forms import * # noqa: F401,F403
|
||||
1
mandelstudio/content_guard/validators/rules/language.py
Normal file
1
mandelstudio/content_guard/validators/rules/language.py
Normal file
@@ -0,0 +1 @@
|
||||
from mandelblog_content_guard.validators.rules.language import * # noqa: F401,F403
|
||||
1
mandelstudio/content_guard/validators/rules/patterns.py
Normal file
1
mandelstudio/content_guard/validators/rules/patterns.py
Normal file
@@ -0,0 +1 @@
|
||||
from mandelblog_content_guard.validators.rules.patterns import * # noqa: F401,F403
|
||||
@@ -1,5 +1,11 @@
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
||||
if str(PROJECT_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
|
||||
|
||||
def _patch_legacy_django_translation_aliases():
|
||||
|
||||
0
mandelstudio/management/__init__.py
Normal file
0
mandelstudio/management/__init__.py
Normal file
0
mandelstudio/management/commands/__init__.py
Normal file
0
mandelstudio/management/commands/__init__.py
Normal file
1
mandelstudio/management/commands/audit_locales.py
Normal file
1
mandelstudio/management/commands/audit_locales.py
Normal file
@@ -0,0 +1 @@
|
||||
from mandelblog_content_guard.management.commands.audit_locales import Command # noqa: F401
|
||||
101
mandelstudio/models.py
Normal file
101
mandelstudio/models.py
Normal file
@@ -0,0 +1,101 @@
|
||||
import uuid
|
||||
|
||||
from django.db import models
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
from wagtail.admin.panels import FieldPanel
|
||||
from wagtail.blocks import RichTextBlock
|
||||
from wagtail.contrib.settings.models import BaseSiteSetting
|
||||
from wagtail.fields import StreamField
|
||||
from wagtail.models import Locale, Site, TranslatableMixin
|
||||
from wagtail.snippets.models import register_snippet
|
||||
|
||||
from ocyan.plugin.wagtail.block_plugin import get_extra_ocyan_settings_blocks
|
||||
from ocyan.plugin.wagtail.blocks import (
|
||||
AboutUsBlock,
|
||||
HeadedPagelistBlock,
|
||||
HeadedRichTextBlock,
|
||||
)
|
||||
|
||||
from mandelblog_content_guard.mixins import MultilingualValidationMixin
|
||||
|
||||
|
||||
@register_snippet
|
||||
class LocalizedFooterContent(MultilingualValidationMixin, TranslatableMixin, models.Model):
|
||||
title = models.CharField(max_length=120, default="Footer content")
|
||||
site = models.ForeignKey(
|
||||
Site, on_delete=models.CASCADE, related_name="localized_footer_contents"
|
||||
)
|
||||
locale = models.ForeignKey(Locale, on_delete=models.PROTECT, related_name="+")
|
||||
translation_key = models.UUIDField(default=uuid.uuid4, editable=False)
|
||||
|
||||
footer = StreamField(
|
||||
[
|
||||
("about_us", AboutUsBlock()),
|
||||
("text", HeadedRichTextBlock()),
|
||||
("page_list", HeadedPagelistBlock()),
|
||||
]
|
||||
+ get_extra_ocyan_settings_blocks(),
|
||||
default=list,
|
||||
use_json_field=True,
|
||||
)
|
||||
mini_footer = StreamField(
|
||||
[("text", RichTextBlock())],
|
||||
default=list,
|
||||
use_json_field=True,
|
||||
)
|
||||
|
||||
panels = [
|
||||
FieldPanel("title"),
|
||||
FieldPanel("site"),
|
||||
FieldPanel("locale"),
|
||||
FieldPanel("footer"),
|
||||
FieldPanel("mini_footer"),
|
||||
]
|
||||
|
||||
class Meta(TranslatableMixin.Meta):
|
||||
verbose_name = _("Localized footer content")
|
||||
verbose_name_plural = _("Localized footer contents")
|
||||
constraints = [
|
||||
models.UniqueConstraint(
|
||||
fields=["site", "locale"],
|
||||
name="unique_localized_footer_per_site_locale",
|
||||
),
|
||||
]
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.site.hostname} [{self.locale.language_code}]"
|
||||
|
||||
|
||||
class LocaleAuditRun(models.Model):
|
||||
started_at = models.DateTimeField(auto_now_add=True)
|
||||
finished_at = models.DateTimeField(null=True, blank=True)
|
||||
locale_codes = models.JSONField(default=list, blank=True)
|
||||
fix_enabled = models.BooleanField(default=False)
|
||||
total_urls_checked = models.PositiveIntegerField(default=0)
|
||||
issues_found = models.PositiveIntegerField(default=0)
|
||||
pages_with_issues = models.PositiveIntegerField(default=0)
|
||||
summary = models.JSONField(default=dict, blank=True)
|
||||
|
||||
class Meta:
|
||||
ordering = ["-started_at"]
|
||||
|
||||
|
||||
class LocaleAuditIssue(models.Model):
|
||||
run = models.ForeignKey(
|
||||
LocaleAuditRun, related_name="issues", on_delete=models.CASCADE
|
||||
)
|
||||
locale_code = models.CharField(max_length=12)
|
||||
object_id = models.PositiveIntegerField(null=True, blank=True)
|
||||
object_type = models.CharField(max_length=128, blank=True)
|
||||
url = models.TextField(blank=True)
|
||||
title = models.CharField(max_length=255, blank=True)
|
||||
severity = models.CharField(max_length=16)
|
||||
issue_type = models.CharField(max_length=64)
|
||||
field_path = models.CharField(max_length=512, blank=True)
|
||||
bad_value = models.TextField(blank=True)
|
||||
replacement = models.TextField(blank=True)
|
||||
fixed = models.BooleanField(default=False)
|
||||
extra = models.JSONField(default=dict, blank=True)
|
||||
|
||||
class Meta:
|
||||
ordering = ["locale_code", "url", "field_path"]
|
||||
@@ -9,6 +9,7 @@ https://docs.djangoproject.com/en/2.0/ref/settings/
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
from configtype.jsonconfig import setup_search_paths
|
||||
|
||||
@@ -20,7 +21,10 @@ setup_search_paths("/etc/ocyan/", str(_project_app_path))
|
||||
|
||||
from ocyan.main.settings import * # pylint:disable=W0401,W0614
|
||||
|
||||
INSTALLED_APPS = ["mandelstudio"] + INSTALLED_APPS
|
||||
INSTALLED_APPS = [
|
||||
"mandelblog_content_guard.apps.MandelblogContentGuardConfig",
|
||||
"mandelstudio",
|
||||
] + INSTALLED_APPS
|
||||
|
||||
# Enable request language negotiation.
|
||||
if "django.middleware.locale.LocaleMiddleware" not in MIDDLEWARE:
|
||||
@@ -64,3 +68,14 @@ ACTIVE_VERTICAL = "agency"
|
||||
# Wagtail content internationalization in admin
|
||||
WAGTAIL_I18N_ENABLED = True
|
||||
WAGTAIL_CONTENT_LANGUAGES = LANGUAGES
|
||||
|
||||
CONTENT_GUARD_STRICT = True
|
||||
CONTENT_GUARD_BLOCK_MEDIUM = False
|
||||
CONTENT_GUARD_LOCALES = [code for code, _label in LANGUAGES]
|
||||
CONTENT_GUARD_REWRITE_ENABLED = True
|
||||
CONTENT_GUARD_REWRITE_BACKEND = None
|
||||
|
||||
if "test" in sys.argv:
|
||||
MIGRATION_MODULES = globals().get("MIGRATION_MODULES", {}).copy()
|
||||
MIGRATION_MODULES["template_engine"] = "mandelstudio.test_migrations.template_engine"
|
||||
TEST_RUNNER = "django.test.runner.DiscoverRunner"
|
||||
|
||||
@@ -0,0 +1,2 @@
|
||||
from importlib import import_module
|
||||
Migration = import_module("ocyan.plugin.template_engine.engine.migrations.0001_initial").Migration
|
||||
@@ -0,0 +1,2 @@
|
||||
from importlib import import_module
|
||||
Migration = import_module("ocyan.plugin.template_engine.engine.migrations.0002_templateenginesitesettings").Migration
|
||||
@@ -0,0 +1,2 @@
|
||||
from importlib import import_module
|
||||
Migration = import_module("ocyan.plugin.template_engine.engine.migrations.0003_templateenginesitesettings_nav_items").Migration
|
||||
@@ -0,0 +1,2 @@
|
||||
from importlib import import_module
|
||||
Migration = import_module("ocyan.plugin.template_engine.engine.migrations.0004_alter_basehomepage_body_alter_basestandardpage_body").Migration
|
||||
@@ -0,0 +1,2 @@
|
||||
from importlib import import_module
|
||||
Migration = import_module("ocyan.plugin.template_engine.engine.migrations.0005_templateenginesitesettings_header_variant_and_more").Migration
|
||||
@@ -0,0 +1,2 @@
|
||||
from importlib import import_module
|
||||
Migration = import_module("ocyan.plugin.template_engine.engine.migrations.0006_templateenginesitesettings_footer_dynamic_fields").Migration
|
||||
@@ -0,0 +1,2 @@
|
||||
from importlib import import_module
|
||||
Migration = import_module("ocyan.plugin.template_engine.engine.migrations.0007_templateenginesitesettings_header_cta_fields").Migration
|
||||
@@ -0,0 +1,2 @@
|
||||
from importlib import import_module
|
||||
Migration = import_module("ocyan.plugin.template_engine.engine.migrations.0008_templateenginesitesettings_footer_bottom_links_and_more").Migration
|
||||
@@ -0,0 +1,2 @@
|
||||
from importlib import import_module
|
||||
Migration = import_module("ocyan.plugin.template_engine.engine.migrations.0009_alter_basehomepage_body_alter_basestandardpage_body_and_more").Migration
|
||||
@@ -0,0 +1,2 @@
|
||||
from importlib import import_module
|
||||
Migration = import_module("ocyan.plugin.template_engine.engine.migrations.0010_enginepage_and_more").Migration
|
||||
@@ -0,0 +1,2 @@
|
||||
from importlib import import_module
|
||||
Migration = import_module("ocyan.plugin.template_engine.engine.migrations.0011_alter_basehomepage_body_alter_basestandardpage_body_and_more").Migration
|
||||
@@ -0,0 +1,2 @@
|
||||
from importlib import import_module
|
||||
Migration = import_module("ocyan.plugin.template_engine.engine.migrations.0012_alter_basehomepage_body_alter_basestandardpage_body_and_more").Migration
|
||||
@@ -0,0 +1,2 @@
|
||||
from importlib import import_module
|
||||
Migration = import_module("ocyan.plugin.template_engine.engine.migrations.0013_engineblockpreset").Migration
|
||||
@@ -0,0 +1,2 @@
|
||||
from importlib import import_module
|
||||
Migration = import_module("ocyan.plugin.template_engine.engine.migrations.0014_alter_basehomepage_body_alter_basestandardpage_body_and_more").Migration
|
||||
@@ -0,0 +1,21 @@
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
def _ensure_navitem_table(apps, schema_editor):
|
||||
try:
|
||||
model = apps.get_model("template_engine", "TemplateEngineNavItem")
|
||||
except LookupError:
|
||||
return
|
||||
existing = set(schema_editor.connection.introspection.table_names())
|
||||
if model._meta.db_table not in existing:
|
||||
schema_editor.create_model(model)
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("template_engine", "0014_alter_basehomepage_body_alter_basestandardpage_body_and_more"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunPython(_ensure_navitem_table, migrations.RunPython.noop),
|
||||
]
|
||||
@@ -0,0 +1,2 @@
|
||||
from importlib import import_module
|
||||
Migration = import_module("ocyan.plugin.template_engine.engine.migrations.0016_alter_basehomepage_body_alter_basestandardpage_body_and_more").Migration
|
||||
1
mandelstudio/tests/__init__.py
Normal file
1
mandelstudio/tests/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
181
mandelstudio/tests/test_content_guard.py
Normal file
181
mandelstudio/tests/test_content_guard.py
Normal file
@@ -0,0 +1,181 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from io import StringIO
|
||||
from unittest import mock
|
||||
|
||||
from django.core.management import call_command
|
||||
from django.test import SimpleTestCase, override_settings
|
||||
|
||||
from mandelblog_content_guard.agents import get_language_agent
|
||||
from mandelblog_content_guard.ai import rewrite_ai_output
|
||||
from mandelblog_content_guard.system_strings import build_system_rewrite_candidates, build_system_vocabulary
|
||||
from mandelblog_content_guard.types import split_issues
|
||||
from mandelblog_content_guard.validators.multilingual import extract_visible_rendered_text, validate_text_nodes
|
||||
|
||||
|
||||
class ContentGuardRuleTests(SimpleTestCase):
|
||||
def test_mixed_language_detection_blocks(self):
|
||||
issues = validate_text_nodes(
|
||||
"pt",
|
||||
[("body.hero_text", 'Poiché l\'input "Unverbindliche Erstberatung" è in tedesco')],
|
||||
)
|
||||
blocking, _warnings = split_issues(issues)
|
||||
self.assertTrue(blocking)
|
||||
self.assertTrue(any(issue.issue_type == "known_bad_pattern" for issue in blocking))
|
||||
|
||||
def test_cta_mismatch_detection_blocks(self):
|
||||
issues = validate_text_nodes("en", [("body.cta_text", "Plan kennismaking")])
|
||||
blocking, _warnings = split_issues(issues)
|
||||
self.assertTrue(any(issue.issue_type == "cta_language_mismatch" for issue in blocking))
|
||||
|
||||
def test_form_validation_blocks_wrong_language(self):
|
||||
issues = validate_text_nodes("ru", [("body.form.label", "Correo electrónico")])
|
||||
blocking, _warnings = split_issues(issues)
|
||||
self.assertTrue(any(issue.issue_type in {"known_bad_pattern", "form_language_mismatch"} for issue in blocking))
|
||||
|
||||
@override_settings(CONTENT_GUARD_BLOCK_MEDIUM=True)
|
||||
def test_medium_can_be_blocked_in_strict_mode(self):
|
||||
issues = validate_text_nodes(
|
||||
"en",
|
||||
[("body.summary", "le la les et avec pour vous une pas des extra words to trigger heuristic")],
|
||||
)
|
||||
blocking, _warnings = split_issues(issues)
|
||||
self.assertTrue(any(issue.issue_type == "language_heuristic" for issue in blocking))
|
||||
|
||||
def test_language_agent_registry(self):
|
||||
agent = get_language_agent("pt")
|
||||
self.assertEqual(agent.locale, "pt")
|
||||
self.assertEqual(agent.normalize_cta("contact"), "Agendar reunião introdutória")
|
||||
|
||||
def test_agent_rewrite_cleans_explanation_artifact(self):
|
||||
agent = get_language_agent("ru")
|
||||
text = 'Soporte y crecimiento" is Spanish, not Dutch. The translation from Spanish to Russian is: "Поддержка и рост".'
|
||||
self.assertEqual(agent.rewrite(text, "body.headline"), "Поддержка и рост")
|
||||
|
||||
def test_portuguese_agent_contextual_badge_rewrite(self):
|
||||
agent = get_language_agent("pt")
|
||||
self.assertEqual(agent.rewrite("SERVICES", "body.cards[0].badge"), "SERVIÇOS")
|
||||
self.assertEqual(agent.rewrite("Transparent", "body.metrics[0].label"), "Investimento claro")
|
||||
|
||||
def test_french_agent_contextual_badge_rewrite(self):
|
||||
agent = get_language_agent("fr")
|
||||
self.assertEqual(agent.rewrite("PLAN", "body.cards[0].badge"), "FORFAIT")
|
||||
self.assertEqual(agent.rewrite("Transparent", "body.cards[0].label"), "Clair")
|
||||
|
||||
def test_german_agent_normalizes_non_system_copy(self):
|
||||
agent = get_language_agent("de")
|
||||
self.assertEqual(agent.rewrite("New", "body.cards[0].badge"), "Neu")
|
||||
self.assertEqual(agent.rewrite("Intakegespräch", "body.stats[0].label"), "Erstgespräch")
|
||||
self.assertEqual(agent.rewrite("Was du bekommst", "body.heading"), "Was Sie erhalten")
|
||||
self.assertEqual(
|
||||
agent.rewrite("Sales-ready mit skalierbarem Stack", "body.cards[0].text"),
|
||||
"Verkaufsbereit mit skalierbarer Architektur",
|
||||
)
|
||||
self.assertEqual(
|
||||
agent.rewrite(
|
||||
"Einführungsmeeting planen Projekte anzeigen Unverbindliches Gespräch, klares Angebot Wir entwickeln schnelle Websites und Webshops, die Ihr Team selbst pflegen kann.",
|
||||
"rendered",
|
||||
),
|
||||
"Erstgespräch planen · Projekte ansehen · Unverbindliches Gespräch mit klarem Angebot. Wir entwickeln schnelle Websites und Webshops, die Ihr Team selbst pflegen kann.",
|
||||
)
|
||||
|
||||
def test_rewrite_ai_output_validates_result(self):
|
||||
rewritten = rewrite_ai_output(
|
||||
"pt",
|
||||
"body.cta_text",
|
||||
'Sem cartão de crédito" is not Dutch; . The translation from German to Spanish is: "Sem cartão de crédito".',
|
||||
)
|
||||
self.assertEqual(rewritten, "Sem compromisso")
|
||||
|
||||
def test_portuguese_rewrite_candidates_are_detected(self):
|
||||
issues = validate_text_nodes(
|
||||
"pt",
|
||||
[("body.hero_text", "Siti web e negozi online che sono rapidamente online e facili da gestire")],
|
||||
)
|
||||
self.assertTrue(any(issue.issue_type == "mixed_locale_heading" for issue in issues))
|
||||
|
||||
def test_french_foreign_ui_label_is_detected(self):
|
||||
issues = validate_text_nodes(
|
||||
"fr",
|
||||
[("body.metric_label", "Durchschnittliche Lieferung")],
|
||||
)
|
||||
self.assertTrue(any(issue.issue_type == "foreign_ui_label" for issue in issues))
|
||||
|
||||
def test_de_canonical_system_strings_are_not_rewrite_candidates(self):
|
||||
issues = validate_text_nodes(
|
||||
"de",
|
||||
[("body.metric_label", "Durchschnittliche Lieferung"), ("body.badge", "PLAN")],
|
||||
)
|
||||
self.assertFalse(any(issue.bad_value == "Durchschnittliche Lieferung" for issue in issues))
|
||||
self.assertFalse(any(issue.bad_value == "PLAN" for issue in issues))
|
||||
|
||||
def test_extract_visible_rendered_text_ignores_hidden_script_and_style(self):
|
||||
html = """
|
||||
<html><body>
|
||||
<style>.x{color:red}</style>
|
||||
<script>var foo = 'bar';</script>
|
||||
<h1>Visible title</h1>
|
||||
<p style="display:none">Hidden copy</p>
|
||||
<div aria-hidden="true"><p>Also hidden</p></div>
|
||||
<a href="#">Visible link</a>
|
||||
<noscript>Nope</noscript>
|
||||
</body></html>
|
||||
"""
|
||||
extracted = extract_visible_rendered_text(html)
|
||||
self.assertIn("Visible title", extracted)
|
||||
self.assertIn("Visible link", extracted)
|
||||
self.assertNotIn("Hidden copy", extracted)
|
||||
self.assertNotIn("Also hidden", extracted)
|
||||
self.assertNotIn("var foo", extracted)
|
||||
|
||||
def test_system_strings_are_centralized_for_fr_and_pt(self):
|
||||
self.assertEqual(build_system_vocabulary("fr")["PLAN"], "FORFAIT")
|
||||
self.assertEqual(build_system_vocabulary("fr")["Reaktionszeit"], "Temps de réponse")
|
||||
self.assertEqual(build_system_vocabulary("pt")["Transparent"], "Transparente")
|
||||
self.assertEqual(build_system_vocabulary("fr")["Transparente Investition"], "Investissement transparent")
|
||||
self.assertEqual(build_system_vocabulary("pt")["Transparente Investition"], "Investimento transparente")
|
||||
self.assertEqual(build_system_rewrite_candidates()["Durchschnittliche Lieferung"], "foreign_ui_label")
|
||||
|
||||
|
||||
class AuditLocalesCommandTests(SimpleTestCase):
|
||||
@mock.patch("mandelblog_content_guard.management.commands.audit_locales.audit_locales")
|
||||
def test_json_output(self, audit_locales_mock):
|
||||
run = mock.Mock()
|
||||
run.pk = 12
|
||||
run.total_urls_checked = 2
|
||||
run.issues_found = 1
|
||||
run.summary = {"en": {"total_urls_checked": 2, "issues_found": 1, "by_severity": {"block": 1}}}
|
||||
issue = mock.Mock(
|
||||
url="/en/contact/",
|
||||
title="Contact",
|
||||
severity="block",
|
||||
issue_type="wrong_language_fragment",
|
||||
field_path="body.form.label",
|
||||
bad_value="Correo electrónico",
|
||||
replacement="Email",
|
||||
fixed=False,
|
||||
)
|
||||
run.issues.all.return_value.order_by.return_value = [issue]
|
||||
audit_locales_mock.return_value = run
|
||||
|
||||
out = StringIO()
|
||||
call_command("audit_locales", "--locale", "en", "--format=json", stdout=out)
|
||||
rendered = out.getvalue().strip()
|
||||
payload = json.loads(rendered)
|
||||
self.assertEqual(payload["run_id"], 12)
|
||||
self.assertEqual(payload["issues"]["en"][0]["bad_value"], "Correo electrónico")
|
||||
|
||||
@mock.patch("mandelblog_content_guard.management.commands.audit_locales.audit_locales")
|
||||
def test_rewrite_flags_are_forwarded(self, audit_locales_mock):
|
||||
run = mock.Mock()
|
||||
run.pk = 13
|
||||
run.total_urls_checked = 1
|
||||
run.issues_found = 0
|
||||
run.summary = {"pt": {"total_urls_checked": 1, "issues_found": 0, "issues_fixed": 0, "by_severity": {"block": 0, "warn": 0, "log": 0}}}
|
||||
run.issues.all.return_value.order_by.return_value = []
|
||||
audit_locales_mock.return_value = run
|
||||
|
||||
out = StringIO()
|
||||
call_command("audit_locales", "--locale", "pt", "--rewrite", "--dry-run", stdout=out)
|
||||
audit_locales_mock.assert_called_once_with(["pt"], fix=False, rewrite=True, dry_run=True)
|
||||
0
mandelstudio/validators/__init__.py
Normal file
0
mandelstudio/validators/__init__.py
Normal file
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user