Files
mandelstudio/mandelstudio/tests/test_content_guard.py

182 lines
8.7 KiB
Python

from __future__ import annotations
import json
from io import StringIO
from unittest import mock
from django.core.management import call_command
from django.test import SimpleTestCase, override_settings
from mandelblog_content_guard.agents import get_language_agent
from mandelblog_content_guard.ai import rewrite_ai_output
from mandelblog_content_guard.system_strings import build_system_rewrite_candidates, build_system_vocabulary
from mandelblog_content_guard.types import split_issues
from mandelblog_content_guard.validators.multilingual import extract_visible_rendered_text, validate_text_nodes
class ContentGuardRuleTests(SimpleTestCase):
def test_mixed_language_detection_blocks(self):
issues = validate_text_nodes(
"pt",
[("body.hero_text", 'Poiché l\'input "Unverbindliche Erstberatung" è in tedesco')],
)
blocking, _warnings = split_issues(issues)
self.assertTrue(blocking)
self.assertTrue(any(issue.issue_type == "known_bad_pattern" for issue in blocking))
def test_cta_mismatch_detection_blocks(self):
issues = validate_text_nodes("en", [("body.cta_text", "Plan kennismaking")])
blocking, _warnings = split_issues(issues)
self.assertTrue(any(issue.issue_type == "cta_language_mismatch" for issue in blocking))
def test_form_validation_blocks_wrong_language(self):
issues = validate_text_nodes("ru", [("body.form.label", "Correo electrónico")])
blocking, _warnings = split_issues(issues)
self.assertTrue(any(issue.issue_type in {"known_bad_pattern", "form_language_mismatch"} for issue in blocking))
@override_settings(CONTENT_GUARD_BLOCK_MEDIUM=True)
def test_medium_can_be_blocked_in_strict_mode(self):
issues = validate_text_nodes(
"en",
[("body.summary", "le la les et avec pour vous une pas des extra words to trigger heuristic")],
)
blocking, _warnings = split_issues(issues)
self.assertTrue(any(issue.issue_type == "language_heuristic" for issue in blocking))
def test_language_agent_registry(self):
agent = get_language_agent("pt")
self.assertEqual(agent.locale, "pt")
self.assertEqual(agent.normalize_cta("contact"), "Agendar reunião introdutória")
def test_agent_rewrite_cleans_explanation_artifact(self):
agent = get_language_agent("ru")
text = 'Soporte y crecimiento" is Spanish, not Dutch. The translation from Spanish to Russian is: "Поддержка и рост".'
self.assertEqual(agent.rewrite(text, "body.headline"), "Поддержка и рост")
def test_portuguese_agent_contextual_badge_rewrite(self):
agent = get_language_agent("pt")
self.assertEqual(agent.rewrite("SERVICES", "body.cards[0].badge"), "SERVIÇOS")
self.assertEqual(agent.rewrite("Transparent", "body.metrics[0].label"), "Investimento claro")
def test_french_agent_contextual_badge_rewrite(self):
agent = get_language_agent("fr")
self.assertEqual(agent.rewrite("PLAN", "body.cards[0].badge"), "FORFAIT")
self.assertEqual(agent.rewrite("Transparent", "body.cards[0].label"), "Clair")
def test_german_agent_normalizes_non_system_copy(self):
agent = get_language_agent("de")
self.assertEqual(agent.rewrite("New", "body.cards[0].badge"), "Neu")
self.assertEqual(agent.rewrite("Intakegespräch", "body.stats[0].label"), "Erstgespräch")
self.assertEqual(agent.rewrite("Was du bekommst", "body.heading"), "Was Sie erhalten")
self.assertEqual(
agent.rewrite("Sales-ready mit skalierbarem Stack", "body.cards[0].text"),
"Verkaufsbereit mit skalierbarer Architektur",
)
self.assertEqual(
agent.rewrite(
"Einführungsmeeting planen Projekte anzeigen Unverbindliches Gespräch, klares Angebot Wir entwickeln schnelle Websites und Webshops, die Ihr Team selbst pflegen kann.",
"rendered",
),
"Erstgespräch planen · Projekte ansehen · Unverbindliches Gespräch mit klarem Angebot. Wir entwickeln schnelle Websites und Webshops, die Ihr Team selbst pflegen kann.",
)
def test_rewrite_ai_output_validates_result(self):
rewritten = rewrite_ai_output(
"pt",
"body.cta_text",
'Sem cartão de crédito" is not Dutch; . The translation from German to Spanish is: "Sem cartão de crédito".',
)
self.assertEqual(rewritten, "Sem compromisso")
def test_portuguese_rewrite_candidates_are_detected(self):
issues = validate_text_nodes(
"pt",
[("body.hero_text", "Siti web e negozi online che sono rapidamente online e facili da gestire")],
)
self.assertTrue(any(issue.issue_type == "mixed_locale_heading" for issue in issues))
def test_french_foreign_ui_label_is_detected(self):
issues = validate_text_nodes(
"fr",
[("body.metric_label", "Durchschnittliche Lieferung")],
)
self.assertTrue(any(issue.issue_type == "foreign_ui_label" for issue in issues))
def test_de_canonical_system_strings_are_not_rewrite_candidates(self):
issues = validate_text_nodes(
"de",
[("body.metric_label", "Durchschnittliche Lieferung"), ("body.badge", "PLAN")],
)
self.assertFalse(any(issue.bad_value == "Durchschnittliche Lieferung" for issue in issues))
self.assertFalse(any(issue.bad_value == "PLAN" for issue in issues))
def test_extract_visible_rendered_text_ignores_hidden_script_and_style(self):
html = """
<html><body>
<style>.x{color:red}</style>
<script>var foo = 'bar';</script>
<h1>Visible title</h1>
<p style="display:none">Hidden copy</p>
<div aria-hidden="true"><p>Also hidden</p></div>
<a href="#">Visible link</a>
<noscript>Nope</noscript>
</body></html>
"""
extracted = extract_visible_rendered_text(html)
self.assertIn("Visible title", extracted)
self.assertIn("Visible link", extracted)
self.assertNotIn("Hidden copy", extracted)
self.assertNotIn("Also hidden", extracted)
self.assertNotIn("var foo", extracted)
def test_system_strings_are_centralized_for_fr_and_pt(self):
self.assertEqual(build_system_vocabulary("fr")["PLAN"], "FORFAIT")
self.assertEqual(build_system_vocabulary("fr")["Reaktionszeit"], "Temps de réponse")
self.assertEqual(build_system_vocabulary("pt")["Transparent"], "Transparente")
self.assertEqual(build_system_vocabulary("fr")["Transparente Investition"], "Investissement transparent")
self.assertEqual(build_system_vocabulary("pt")["Transparente Investition"], "Investimento transparente")
self.assertEqual(build_system_rewrite_candidates()["Durchschnittliche Lieferung"], "foreign_ui_label")
class AuditLocalesCommandTests(SimpleTestCase):
@mock.patch("mandelblog_content_guard.management.commands.audit_locales.audit_locales")
def test_json_output(self, audit_locales_mock):
run = mock.Mock()
run.pk = 12
run.total_urls_checked = 2
run.issues_found = 1
run.summary = {"en": {"total_urls_checked": 2, "issues_found": 1, "by_severity": {"block": 1}}}
issue = mock.Mock(
url="/en/contact/",
title="Contact",
severity="block",
issue_type="wrong_language_fragment",
field_path="body.form.label",
bad_value="Correo electrónico",
replacement="Email",
fixed=False,
)
run.issues.all.return_value.order_by.return_value = [issue]
audit_locales_mock.return_value = run
out = StringIO()
call_command("audit_locales", "--locale", "en", "--format=json", stdout=out)
rendered = out.getvalue().strip()
payload = json.loads(rendered)
self.assertEqual(payload["run_id"], 12)
self.assertEqual(payload["issues"]["en"][0]["bad_value"], "Correo electrónico")
@mock.patch("mandelblog_content_guard.management.commands.audit_locales.audit_locales")
def test_rewrite_flags_are_forwarded(self, audit_locales_mock):
run = mock.Mock()
run.pk = 13
run.total_urls_checked = 1
run.issues_found = 0
run.summary = {"pt": {"total_urls_checked": 1, "issues_found": 0, "issues_fixed": 0, "by_severity": {"block": 0, "warn": 0, "log": 0}}}
run.issues.all.return_value.order_by.return_value = []
audit_locales_mock.return_value = run
out = StringIO()
call_command("audit_locales", "--locale", "pt", "--rewrite", "--dry-run", stdout=out)
audit_locales_mock.assert_called_once_with(["pt"], fix=False, rewrite=True, dry_run=True)