182 lines
8.7 KiB
Python
182 lines
8.7 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
from io import StringIO
|
|
from unittest import mock
|
|
|
|
from django.core.management import call_command
|
|
from django.test import SimpleTestCase, override_settings
|
|
|
|
from mandelblog_content_guard.agents import get_language_agent
|
|
from mandelblog_content_guard.ai import rewrite_ai_output
|
|
from mandelblog_content_guard.system_strings import build_system_rewrite_candidates, build_system_vocabulary
|
|
from mandelblog_content_guard.types import split_issues
|
|
from mandelblog_content_guard.validators.multilingual import extract_visible_rendered_text, validate_text_nodes
|
|
|
|
|
|
class ContentGuardRuleTests(SimpleTestCase):
|
|
def test_mixed_language_detection_blocks(self):
|
|
issues = validate_text_nodes(
|
|
"pt",
|
|
[("body.hero_text", 'Poiché l\'input "Unverbindliche Erstberatung" è in tedesco')],
|
|
)
|
|
blocking, _warnings = split_issues(issues)
|
|
self.assertTrue(blocking)
|
|
self.assertTrue(any(issue.issue_type == "known_bad_pattern" for issue in blocking))
|
|
|
|
def test_cta_mismatch_detection_blocks(self):
|
|
issues = validate_text_nodes("en", [("body.cta_text", "Plan kennismaking")])
|
|
blocking, _warnings = split_issues(issues)
|
|
self.assertTrue(any(issue.issue_type == "cta_language_mismatch" for issue in blocking))
|
|
|
|
def test_form_validation_blocks_wrong_language(self):
|
|
issues = validate_text_nodes("ru", [("body.form.label", "Correo electrónico")])
|
|
blocking, _warnings = split_issues(issues)
|
|
self.assertTrue(any(issue.issue_type in {"known_bad_pattern", "form_language_mismatch"} for issue in blocking))
|
|
|
|
@override_settings(CONTENT_GUARD_BLOCK_MEDIUM=True)
|
|
def test_medium_can_be_blocked_in_strict_mode(self):
|
|
issues = validate_text_nodes(
|
|
"en",
|
|
[("body.summary", "le la les et avec pour vous une pas des extra words to trigger heuristic")],
|
|
)
|
|
blocking, _warnings = split_issues(issues)
|
|
self.assertTrue(any(issue.issue_type == "language_heuristic" for issue in blocking))
|
|
|
|
def test_language_agent_registry(self):
|
|
agent = get_language_agent("pt")
|
|
self.assertEqual(agent.locale, "pt")
|
|
self.assertEqual(agent.normalize_cta("contact"), "Agendar reunião introdutória")
|
|
|
|
def test_agent_rewrite_cleans_explanation_artifact(self):
|
|
agent = get_language_agent("ru")
|
|
text = 'Soporte y crecimiento" is Spanish, not Dutch. The translation from Spanish to Russian is: "Поддержка и рост".'
|
|
self.assertEqual(agent.rewrite(text, "body.headline"), "Поддержка и рост")
|
|
|
|
def test_portuguese_agent_contextual_badge_rewrite(self):
|
|
agent = get_language_agent("pt")
|
|
self.assertEqual(agent.rewrite("SERVICES", "body.cards[0].badge"), "SERVIÇOS")
|
|
self.assertEqual(agent.rewrite("Transparent", "body.metrics[0].label"), "Investimento claro")
|
|
|
|
def test_french_agent_contextual_badge_rewrite(self):
|
|
agent = get_language_agent("fr")
|
|
self.assertEqual(agent.rewrite("PLAN", "body.cards[0].badge"), "FORFAIT")
|
|
self.assertEqual(agent.rewrite("Transparent", "body.cards[0].label"), "Clair")
|
|
|
|
def test_german_agent_normalizes_non_system_copy(self):
|
|
agent = get_language_agent("de")
|
|
self.assertEqual(agent.rewrite("New", "body.cards[0].badge"), "Neu")
|
|
self.assertEqual(agent.rewrite("Intakegespräch", "body.stats[0].label"), "Erstgespräch")
|
|
self.assertEqual(agent.rewrite("Was du bekommst", "body.heading"), "Was Sie erhalten")
|
|
self.assertEqual(
|
|
agent.rewrite("Sales-ready mit skalierbarem Stack", "body.cards[0].text"),
|
|
"Verkaufsbereit mit skalierbarer Architektur",
|
|
)
|
|
self.assertEqual(
|
|
agent.rewrite(
|
|
"Einführungsmeeting planen Projekte anzeigen Unverbindliches Gespräch, klares Angebot Wir entwickeln schnelle Websites und Webshops, die Ihr Team selbst pflegen kann.",
|
|
"rendered",
|
|
),
|
|
"Erstgespräch planen · Projekte ansehen · Unverbindliches Gespräch mit klarem Angebot. Wir entwickeln schnelle Websites und Webshops, die Ihr Team selbst pflegen kann.",
|
|
)
|
|
|
|
def test_rewrite_ai_output_validates_result(self):
|
|
rewritten = rewrite_ai_output(
|
|
"pt",
|
|
"body.cta_text",
|
|
'Sem cartão de crédito" is not Dutch; . The translation from German to Spanish is: "Sem cartão de crédito".',
|
|
)
|
|
self.assertEqual(rewritten, "Sem compromisso")
|
|
|
|
def test_portuguese_rewrite_candidates_are_detected(self):
|
|
issues = validate_text_nodes(
|
|
"pt",
|
|
[("body.hero_text", "Siti web e negozi online che sono rapidamente online e facili da gestire")],
|
|
)
|
|
self.assertTrue(any(issue.issue_type == "mixed_locale_heading" for issue in issues))
|
|
|
|
def test_french_foreign_ui_label_is_detected(self):
|
|
issues = validate_text_nodes(
|
|
"fr",
|
|
[("body.metric_label", "Durchschnittliche Lieferung")],
|
|
)
|
|
self.assertTrue(any(issue.issue_type == "foreign_ui_label" for issue in issues))
|
|
|
|
def test_de_canonical_system_strings_are_not_rewrite_candidates(self):
|
|
issues = validate_text_nodes(
|
|
"de",
|
|
[("body.metric_label", "Durchschnittliche Lieferung"), ("body.badge", "PLAN")],
|
|
)
|
|
self.assertFalse(any(issue.bad_value == "Durchschnittliche Lieferung" for issue in issues))
|
|
self.assertFalse(any(issue.bad_value == "PLAN" for issue in issues))
|
|
|
|
def test_extract_visible_rendered_text_ignores_hidden_script_and_style(self):
|
|
html = """
|
|
<html><body>
|
|
<style>.x{color:red}</style>
|
|
<script>var foo = 'bar';</script>
|
|
<h1>Visible title</h1>
|
|
<p style="display:none">Hidden copy</p>
|
|
<div aria-hidden="true"><p>Also hidden</p></div>
|
|
<a href="#">Visible link</a>
|
|
<noscript>Nope</noscript>
|
|
</body></html>
|
|
"""
|
|
extracted = extract_visible_rendered_text(html)
|
|
self.assertIn("Visible title", extracted)
|
|
self.assertIn("Visible link", extracted)
|
|
self.assertNotIn("Hidden copy", extracted)
|
|
self.assertNotIn("Also hidden", extracted)
|
|
self.assertNotIn("var foo", extracted)
|
|
|
|
def test_system_strings_are_centralized_for_fr_and_pt(self):
|
|
self.assertEqual(build_system_vocabulary("fr")["PLAN"], "FORFAIT")
|
|
self.assertEqual(build_system_vocabulary("fr")["Reaktionszeit"], "Temps de réponse")
|
|
self.assertEqual(build_system_vocabulary("pt")["Transparent"], "Transparente")
|
|
self.assertEqual(build_system_vocabulary("fr")["Transparente Investition"], "Investissement transparent")
|
|
self.assertEqual(build_system_vocabulary("pt")["Transparente Investition"], "Investimento transparente")
|
|
self.assertEqual(build_system_rewrite_candidates()["Durchschnittliche Lieferung"], "foreign_ui_label")
|
|
|
|
|
|
class AuditLocalesCommandTests(SimpleTestCase):
|
|
@mock.patch("mandelblog_content_guard.management.commands.audit_locales.audit_locales")
|
|
def test_json_output(self, audit_locales_mock):
|
|
run = mock.Mock()
|
|
run.pk = 12
|
|
run.total_urls_checked = 2
|
|
run.issues_found = 1
|
|
run.summary = {"en": {"total_urls_checked": 2, "issues_found": 1, "by_severity": {"block": 1}}}
|
|
issue = mock.Mock(
|
|
url="/en/contact/",
|
|
title="Contact",
|
|
severity="block",
|
|
issue_type="wrong_language_fragment",
|
|
field_path="body.form.label",
|
|
bad_value="Correo electrónico",
|
|
replacement="Email",
|
|
fixed=False,
|
|
)
|
|
run.issues.all.return_value.order_by.return_value = [issue]
|
|
audit_locales_mock.return_value = run
|
|
|
|
out = StringIO()
|
|
call_command("audit_locales", "--locale", "en", "--format=json", stdout=out)
|
|
rendered = out.getvalue().strip()
|
|
payload = json.loads(rendered)
|
|
self.assertEqual(payload["run_id"], 12)
|
|
self.assertEqual(payload["issues"]["en"][0]["bad_value"], "Correo electrónico")
|
|
|
|
@mock.patch("mandelblog_content_guard.management.commands.audit_locales.audit_locales")
|
|
def test_rewrite_flags_are_forwarded(self, audit_locales_mock):
|
|
run = mock.Mock()
|
|
run.pk = 13
|
|
run.total_urls_checked = 1
|
|
run.issues_found = 0
|
|
run.summary = {"pt": {"total_urls_checked": 1, "issues_found": 0, "issues_fixed": 0, "by_severity": {"block": 0, "warn": 0, "log": 0}}}
|
|
run.issues.all.return_value.order_by.return_value = []
|
|
audit_locales_mock.return_value = run
|
|
|
|
out = StringIO()
|
|
call_command("audit_locales", "--locale", "pt", "--rewrite", "--dry-run", stdout=out)
|
|
audit_locales_mock.assert_called_once_with(["pt"], fix=False, rewrite=True, dry_run=True)
|