Files
mandelstudio/mandelstudio/tests/test_content_guard.py

259 lines
9.7 KiB
Python

from __future__ import annotations
import json
from io import StringIO
from unittest import mock
from django.core.management import call_command
from django.test import SimpleTestCase, override_settings
from mandelblog_content_guard.agents import get_language_agent
from mandelblog_content_guard.ai import rewrite_ai_output
from mandelblog_content_guard.system_strings import (
build_system_rewrite_candidates,
build_system_vocabulary,
)
from mandelblog_content_guard.types import split_issues
from mandelblog_content_guard.validators.multilingual import (
extract_visible_rendered_text,
validate_text_nodes,
)
class ContentGuardRuleTests(SimpleTestCase):
def test_mixed_language_detection_blocks(self):
issues = validate_text_nodes(
"pt",
[
(
"body.hero_text",
'Poiché l\'input "Unverbindliche Erstberatung" è in tedesco',
)
],
)
blocking, _warnings = split_issues(issues)
self.assertTrue(blocking)
self.assertTrue(
any(issue.issue_type == "known_bad_pattern" for issue in blocking)
)
def test_cta_mismatch_detection_blocks(self):
issues = validate_text_nodes("en", [("body.cta_text", "Plan kennismaking")])
blocking, _warnings = split_issues(issues)
self.assertTrue(
any(issue.issue_type == "cta_language_mismatch" for issue in blocking)
)
def test_form_validation_blocks_wrong_language(self):
issues = validate_text_nodes("ru", [("body.form.label", "Correo electrónico")])
blocking, _warnings = split_issues(issues)
self.assertTrue(
any(
issue.issue_type in {"known_bad_pattern", "form_language_mismatch"}
for issue in blocking
)
)
@override_settings(CONTENT_GUARD_BLOCK_MEDIUM=True)
def test_medium_can_be_blocked_in_strict_mode(self):
issues = validate_text_nodes(
"en",
[
(
"body.summary",
"le la les et avec pour vous une pas des extra words to trigger heuristic",
)
],
)
blocking, _warnings = split_issues(issues)
self.assertTrue(
any(issue.issue_type == "language_heuristic" for issue in blocking)
)
def test_language_agent_registry(self):
agent = get_language_agent("pt")
self.assertEqual(agent.locale, "pt")
self.assertEqual(agent.normalize_cta("contact"), "Agendar reunião introdutória")
def test_agent_rewrite_cleans_explanation_artifact(self):
agent = get_language_agent("ru")
text = 'Soporte y crecimiento" is Spanish, not Dutch. The translation from Spanish to Russian is: "Поддержка и рост".'
self.assertEqual(agent.rewrite(text, "body.headline"), "Поддержка и рост")
def test_portuguese_agent_contextual_badge_rewrite(self):
agent = get_language_agent("pt")
self.assertEqual(agent.rewrite("SERVICES", "body.cards[0].badge"), "SERVIÇOS")
self.assertEqual(
agent.rewrite("Transparent", "body.metrics[0].label"), "Investimento claro"
)
def test_french_agent_contextual_badge_rewrite(self):
agent = get_language_agent("fr")
self.assertEqual(agent.rewrite("PLAN", "body.cards[0].badge"), "FORFAIT")
self.assertEqual(agent.rewrite("Transparent", "body.cards[0].label"), "Clair")
def test_german_agent_normalizes_non_system_copy(self):
agent = get_language_agent("de")
self.assertEqual(agent.rewrite("New", "body.cards[0].badge"), "Neu")
self.assertEqual(
agent.rewrite("Intakegespräch", "body.stats[0].label"), "Erstgespräch"
)
self.assertEqual(
agent.rewrite("Was du bekommst", "body.heading"), "Was Sie erhalten"
)
self.assertEqual(
agent.rewrite("Sales-ready mit skalierbarem Stack", "body.cards[0].text"),
"Verkaufsbereit mit skalierbarer Architektur",
)
self.assertEqual(
agent.rewrite(
"Einführungsmeeting planen Projekte anzeigen Unverbindliches Gespräch, klares Angebot Wir entwickeln schnelle Websites und Webshops, die Ihr Team selbst pflegen kann.",
"rendered",
),
"Erstgespräch planen · Projekte ansehen · Unverbindliches Gespräch mit klarem Angebot. Wir entwickeln schnelle Websites und Webshops, die Ihr Team selbst pflegen kann.",
)
def test_rewrite_ai_output_validates_result(self):
rewritten = rewrite_ai_output(
"pt",
"body.cta_text",
'Sem cartão de crédito" is not Dutch; . The translation from German to Spanish is: "Sem cartão de crédito".',
)
self.assertEqual(rewritten, "Sem compromisso")
def test_portuguese_rewrite_candidates_are_detected(self):
issues = validate_text_nodes(
"pt",
[
(
"body.hero_text",
"Siti web e negozi online che sono rapidamente online e facili da gestire",
)
],
)
self.assertTrue(
any(issue.issue_type == "mixed_locale_heading" for issue in issues)
)
def test_french_foreign_ui_label_is_detected(self):
issues = validate_text_nodes(
"fr",
[("body.metric_label", "Durchschnittliche Lieferung")],
)
self.assertTrue(any(issue.issue_type == "foreign_ui_label" for issue in issues))
def test_de_canonical_system_strings_are_not_rewrite_candidates(self):
issues = validate_text_nodes(
"de",
[
("body.metric_label", "Durchschnittliche Lieferung"),
("body.badge", "PLAN"),
],
)
self.assertFalse(
any(issue.bad_value == "Durchschnittliche Lieferung" for issue in issues)
)
self.assertFalse(any(issue.bad_value == "PLAN" for issue in issues))
def test_extract_visible_rendered_text_ignores_hidden_script_and_style(self):
html = """
<html><body>
<style>.x{color:red}</style>
<script>var foo = 'bar';</script>
<h1>Visible title</h1>
<p style="display:none">Hidden copy</p>
<div aria-hidden="true"><p>Also hidden</p></div>
<a href="#">Visible link</a>
<noscript>Nope</noscript>
</body></html>
"""
extracted = extract_visible_rendered_text(html)
self.assertIn("Visible title", extracted)
self.assertIn("Visible link", extracted)
self.assertNotIn("Hidden copy", extracted)
self.assertNotIn("Also hidden", extracted)
self.assertNotIn("var foo", extracted)
def test_system_strings_are_centralized_for_fr_and_pt(self):
self.assertEqual(build_system_vocabulary("fr")["PLAN"], "FORFAIT")
self.assertEqual(
build_system_vocabulary("fr")["Reaktionszeit"], "Temps de réponse"
)
self.assertEqual(build_system_vocabulary("pt")["Transparent"], "Transparente")
self.assertEqual(
build_system_vocabulary("fr")["Transparente Investition"],
"Investissement transparent",
)
self.assertEqual(
build_system_vocabulary("pt")["Transparente Investition"],
"Investimento transparente",
)
self.assertEqual(
build_system_rewrite_candidates()["Durchschnittliche Lieferung"],
"foreign_ui_label",
)
class AuditLocalesCommandTests(SimpleTestCase):
@mock.patch(
"mandelblog_content_guard.management.commands.audit_locales.audit_locales"
)
def test_json_output(self, audit_locales_mock):
run = mock.Mock()
run.pk = 12
run.total_urls_checked = 2
run.issues_found = 1
run.summary = {
"en": {
"total_urls_checked": 2,
"issues_found": 1,
"by_severity": {"block": 1},
}
}
issue = mock.Mock(
url="/en/contact/",
title="Contact",
severity="block",
issue_type="wrong_language_fragment",
field_path="body.form.label",
bad_value="Correo electrónico",
replacement="Email",
fixed=False,
)
run.issues.all.return_value.order_by.return_value = [issue]
audit_locales_mock.return_value = run
out = StringIO()
call_command("audit_locales", "--locale", "en", "--format=json", stdout=out)
rendered = out.getvalue().strip()
payload = json.loads(rendered)
self.assertEqual(payload["run_id"], 12)
self.assertEqual(payload["issues"]["en"][0]["bad_value"], "Correo electrónico")
@mock.patch(
"mandelblog_content_guard.management.commands.audit_locales.audit_locales"
)
def test_rewrite_flags_are_forwarded(self, audit_locales_mock):
run = mock.Mock()
run.pk = 13
run.total_urls_checked = 1
run.issues_found = 0
run.summary = {
"pt": {
"total_urls_checked": 1,
"issues_found": 0,
"issues_fixed": 0,
"by_severity": {"block": 0, "warn": 0, "log": 0},
}
}
run.issues.all.return_value.order_by.return_value = []
audit_locales_mock.return_value = run
out = StringIO()
call_command(
"audit_locales", "--locale", "pt", "--rewrite", "--dry-run", stdout=out
)
audit_locales_mock.assert_called_once_with(
["pt"], fix=False, rewrite=True, dry_run=True
)