diff --git a/app/utils/arabic.py b/app/utils/arabic.py
new file mode 100644
index 0000000..dde196d
--- /dev/null
+++ b/app/utils/arabic.py
@@ -0,0 +1,110 @@
+"""
+Arabic text normalization utilities.
+
+Used both in the extraction pipeline (post-processing) and in the API
+(query parameter normalization) to ensure consistent matching against
+the deduplicated Neo4j graph.
+"""
+import re
+import unicodedata
+
+
+# ── Diacritical marks to strip ──────────────────────────────────────────────
+ARABIC_DIACRITICS = re.compile(
+    r"[\u0610-\u061A"   # Small signs
+    r"\u064B-\u065F"    # Tashkeel (harakat)
+    r"\u0670"           # Superscript Alef
+    r"\u06D6-\u06DC"    # Small Quranic marks
+    r"\u06DF-\u06E4"    # Small signs continued
+    r"\u06E7-\u06E8"    # Small signs continued
+    r"\u06EA-\u06ED"    # Small signs continued
+    r"]"
+)
+
+# ── Character normalization map ─────────────────────────────────────────────
+CHAR_MAP = {
+    "أ": "ا", "إ": "ا", "آ": "ا", "ٱ": "ا",  # Alef variants → bare Alef
+    "ؤ": "و",                                      # Waw with Hamza → Waw
+    "ئ": "ي",                                      # Ya with Hamza → Ya
+    "ى": "ي",                                      # Alef Maksura → Ya
+    "ة": "ه",                                      # Ta Marbuta → Ha
+    "ٰ": "",                                        # Dagger Alef → remove
+}
+
+# ── Honorific patterns to strip from names ──────────────────────────────────
+HONORIFICS = [
+    r"صلى\s*الله\s*عليه\s*وسلم",
+    r"عليه\s*السلام",
+    r"رضي\s*الله\s*عنه(ا|م|ما)?",
+    r"رحمه\s*الله",
+    r"تعالى",
+    r"عز\s*وجل",
+    r"ﷺ",
+    r"﷽",
+]
+HONORIFIC_PATTERN = re.compile(r"\s*(" + "|".join(HONORIFICS) + r")\s*", re.UNICODE)
+
+
+def strip_diacritics(text: str) -> str:
+    """Remove all Arabic diacritical marks (tashkeel/harakat)."""
+    return ARABIC_DIACRITICS.sub("", text)
+
+
+def normalize_chars(text: str) -> str:
+    """Normalize Alef variants, Ta Marbuta, Alef Maksura, etc."""
+    for src, dst in CHAR_MAP.items():
+        text = text.replace(src, dst)
+    return text
+
+
+def strip_honorifics(text: str) -> str:
+    """Remove common Arabic honorific phrases from names."""
+    return HONORIFIC_PATTERN.sub(" ", text).strip()
+
+
+def collapse_whitespace(text: str) -> str:
+    """Collapse multiple spaces / ZWNJ / ZWJ into single space."""
+    text = re.sub(r"[\u200B-\u200F\u202A-\u202E\uFEFF]", "", text)
+    return re.sub(r"\s+", " ", text).strip()
+
+
+def normalize_arabic(text: str) -> str:
+    """
+    Full normalization pipeline for Arabic text matching.
+    Steps: strip diacritics → normalize chars → collapse whitespace.
+    Does NOT strip honorifics (use normalize_name for that).
+    """
+    if not text:
+        return ""
+    text = strip_diacritics(text)
+    text = normalize_chars(text)
+    text = collapse_whitespace(text)
+    return text
+
+
+def normalize_name(text: str) -> str:
+    """
+    Normalize an Arabic name for matching against the graph.
+    Strips diacritics, honorifics, normalizes characters.
+    """
+    if not text:
+        return ""
+    text = strip_honorifics(text)
+    text = strip_diacritics(text)
+    text = normalize_chars(text)
+    text = collapse_whitespace(text)
+    return text
+
+
+def normalize_query(text: str) -> str:
+    """
+    Normalize a search query parameter.
+    Lighter than name normalization — preserves more structure
+    but still ensures matching against normalized graph data.
+    """
+    if not text:
+        return ""
+    text = strip_diacritics(text)
+    text = normalize_chars(text)
+    text = collapse_whitespace(text)
+    return text