Add multilingual audit CI pipeline + extract mandelblog_content_guard
This commit is contained in:
15
mandelblog_content_guard/normalizers/nl.py
Normal file
15
mandelblog_content_guard/normalizers/nl.py
Normal file
@@ -0,0 +1,15 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
|
||||
NL_PHRASE_REPLACEMENTS = {
|
||||
"PLAN": "PLAN",
|
||||
}
|
||||
|
||||
|
||||
def normalize_nl_text(text: str, field_path: str = "") -> str:
|
||||
cleaned = text
|
||||
for source, target in NL_PHRASE_REPLACEMENTS.items():
|
||||
cleaned = cleaned.replace(source, target)
|
||||
return re.sub(r"\s+", " ", cleaned).strip()
|
||||
Reference in New Issue
Block a user