diff --git a/app/models/schemas.py b/app/models/schemas.py
index c5e4e7c..5447914 100644
--- a/app/models/schemas.py
+++ b/app/models/schemas.py
@@ -1,14 +1,23 @@
+"""
+Pydantic response models for the Hadith Scholar API.
+
+v2.0 changes:
+ - All fields that Neo4j/PG can return as null are now Optional with defaults.
+ - Added PaginationMeta / PaginatedResponse for paginated list endpoints.
+ - All existing model_config / json_schema_extra examples preserved.
+"""
from pydantic import BaseModel, Field
from typing import Optional
+from datetime import datetime
-# ── Common ─────────────────────────────────────────────────────────────────
+# ── Pagination (NEW in v2.0) ───────────────────────────────────────────────
class PaginationMeta(BaseModel):
- total: int
- page: int
- per_page: int
- pages: int
+ total: int = Field(description="Total matching items")
+ page: int = Field(description="Current page (1-indexed)")
+ per_page: int = Field(description="Items per page")
+ pages: int = Field(description="Total pages")
model_config = {
"json_schema_extra": {
@@ -26,12 +35,12 @@ class PaginatedResponse(BaseModel):
class HadithSummary(BaseModel):
id: str = Field(description="Unique hadith UUID")
- collection: str = Field(description="Collection name in English")
- hadith_number: int = Field(description="Hadith number within collection")
+ collection: Optional[str] = Field(None, description="Collection name in English")
+ hadith_number: Optional[int] = Field(None, description="Hadith number within collection")
grade: Optional[str] = Field(None, description="Grading: Sahih, Hasan, Da'if, etc.")
- arabic_text: Optional[str] = Field(None, description="Full Arabic text (may be truncated in list views)")
- matn_text: Optional[str] = Field(None, description="Body text only (without isnad)")
- sanad_text: Optional[str] = Field(None, description="Chain of narration text only")
+ arabic_text: Optional[str] = Field(None, description="Arabic text (truncated in lists)")
+ sanad_text: Optional[str] = Field(None, description="Sanad (chain) text only")
+ matn_text: Optional[str] = Field(None, description="Matn (body) text only")
model_config = {
"json_schema_extra": {
@@ -40,36 +49,28 @@ class HadithSummary(BaseModel):
"collection": "Sahih Bukhari",
"hadith_number": 1,
"grade": "Sahih",
- "arabic_text": "حَدَّثَنَا الْحُمَيْدِيُّ عَبْدُ اللَّهِ بْنُ الزُّبَيْرِ قَالَ حَدَّثَنَا سُفْيَانُ...",
- "matn_text": "إِنَّمَا الأَعْمَالُ بِالنِّيَّاتِ وَإِنَّمَا لِكُلِّ امْرِئٍ مَا نَوَى...",
- "sanad_text": "حَدَّثَنَا الْحُمَيْدِيُّ عَبْدُ اللَّهِ بْنُ الزُّبَيْرِ قَالَ حَدَّثَنَا سُفْيَانُ قَالَ حَدَّثَنَا يَحْيَى بْنُ سَعِيدٍ الأَنْصَارِيُّ"
+ "arabic_text": "حَدَّثَنَا الْحُمَيْدِيُّ عَبْدُ اللَّهِ بْنُ الزُّبَيْرِ...",
+ "sanad_text": "حَدَّثَنَا الْحُمَيْدِيُّ...",
+ "matn_text": "إِنَّمَا الأَعْمَالُ بِالنِّيَّاتِ...",
}]
}
}
class TopicTag(BaseModel):
- topic_arabic: str = Field(description="Topic name in Arabic, e.g. الصلاة")
- topic_english: str = Field(description="Topic name in English, e.g. Prayer")
- category: str = Field(description="Broad Islamic category: عقيدة، فقه، سيرة، أخلاق، تفسير")
-
- model_config = {
- "json_schema_extra": {
- "examples": [{
- "topic_arabic": "النية",
- "topic_english": "Intention",
- "category": "فقه"
- }]
- }
- }
+ topic_arabic: str = Field("", description="Topic name in Arabic")
+ topic_english: str = Field("", description="Topic name in English")
+ category: str = Field("", description="Topic category (فقه, عقيدة, سيرة, etc.)")
class NarratorInChain(BaseModel):
- order: int = Field(description="Position in chain: 1=closest to compiler, last=closest to Prophet ﷺ")
- name_arabic: str = Field(description="Narrator's Arabic name as it appears in the hadith text")
- name_transliterated: Optional[str] = Field(None, description="Latin transliteration of the name")
- entity_type: Optional[str] = Field(None, description="PERSON, KUNYA (أبو/أم), NISBA (attributional), or TITLE (رسول الله)")
- transmission_verb: Optional[str] = Field(None, description="Exact Arabic transmission verb: حدثنا، أخبرنا، عن، سمعت")
+ order: Optional[int] = Field(None, description="Position in chain (1 = compiler-end)")
+ name_arabic: str = Field(description="Narrator Arabic name")
+ name_transliterated: str = Field("", description="Latin transliteration")
+ entity_type: str = Field("", description="PERSON, KUNYA, NISBA, TITLE")
+ transmission_verb: Optional[str] = Field(None, description="حدثنا, أخبرنا, عن, سمعت, etc.")
+ generation: Optional[str] = Field(None, description="صحابي, تابعي, etc.")
+ reliability_grade: Optional[str] = Field(None, description="ثقة, صدوق, ضعيف, etc.")
model_config = {
"json_schema_extra": {
@@ -78,7 +79,9 @@ class NarratorInChain(BaseModel):
"name_arabic": "الْحُمَيْدِيُّ",
"name_transliterated": "al-Humaydi",
"entity_type": "NISBA",
- "transmission_verb": "حَدَّثَنَا"
+ "transmission_verb": "حَدَّثَنَا",
+ "generation": "تابع التابعين",
+ "reliability_grade": "ثقة",
}]
}
}
@@ -86,11 +89,14 @@ class NarratorInChain(BaseModel):
class HadithDetail(BaseModel):
id: str = Field(description="Unique hadith UUID")
- collection: str = Field(description="Collection English name")
- hadith_number: int = Field(description="Number within collection")
- grade: Optional[str] = Field(None, description="Hadith grade")
- arabic_text: Optional[str] = Field(None, description="Complete Arabic text")
- sanad_text: Optional[str] = Field(None, description="Isnad (chain) text only")
+ collection: Optional[str] = Field(None, description="Collection name")
+ hadith_number: Optional[int] = Field(None, description="Hadith number")
+ book_number: Optional[int] = Field(None, description="Book number within collection")
+ grade: Optional[str] = Field(None, description="Grading")
+ arabic_text: Optional[str] = Field(None, description="Full Arabic text")
+ english_text: Optional[str] = Field(None, description="English translation")
+ urdu_text: Optional[str] = Field(None, description="Urdu translation")
+ sanad_text: Optional[str] = Field(None, description="Sanad (chain) text only")
matn_text: Optional[str] = Field(None, description="Matn (body) text only")
narrator_chain: list[NarratorInChain] = Field(default_factory=list, description="Ordered narrator chain from Neo4j graph")
topics: list[TopicTag] = Field(default_factory=list, description="Topic tags for searchability")
@@ -121,10 +127,10 @@ class HadithDetail(BaseModel):
class NarratorSummary(BaseModel):
name_arabic: str = Field(description="Primary Arabic name")
- name_transliterated: Optional[str] = Field(None, description="Latin transliteration")
- entity_type: Optional[str] = Field(None, description="PERSON, KUNYA, NISBA, TITLE")
+ name_transliterated: str = Field("", description="Latin transliteration")
+ entity_type: str = Field("", description="PERSON, KUNYA, NISBA, TITLE")
generation: Optional[str] = Field(None, description="طبقة: صحابي، تابعي، تابع التابعين")
- reliability_grade: Optional[str] = Field(None, description="جرح وتعديل: ثقة، صدوق، ضعيف، متروك")
+ reliability_grade: Optional[str] = Field(None, description="جرح وتعديل grade: ثقة، صدوق، ضعيف")
hadith_count: int = Field(0, description="Number of hadiths this narrator appears in")
model_config = {
@@ -142,6 +148,7 @@ class NarratorSummary(BaseModel):
class NameForm(BaseModel):
+ """Alternative name forms for a narrator (kunya, nisba, laqab, etc.)."""
name: str = Field(description="Alternative name form")
type: str = Field(description="Name type: PERSON, KUNYA, NISBA, TITLE")
@@ -165,9 +172,10 @@ class PlaceRelation(BaseModel):
class NarratorProfile(BaseModel):
+ """Complete narrator profile — the mobile app profile page."""
name_arabic: str = Field(description="Primary Arabic name")
- name_transliterated: Optional[str] = Field(None, description="Latin transliteration")
- entity_type: Optional[str] = Field(None, description="PERSON, KUNYA, NISBA, TITLE")
+ name_transliterated: str = Field("", description="Latin transliteration")
+ entity_type: str = Field("", description="PERSON, KUNYA, NISBA, TITLE")
full_nasab: Optional[str] = Field(None, description="Full lineage: فلان بن فلان بن فلان")
kunya: Optional[str] = Field(None, description="أبو/أم name (e.g. أبو هريرة)")
nisba: Optional[str] = Field(None, description="Attributional name (e.g. البخاري، المدني، الزهري)")
@@ -186,6 +194,8 @@ class NarratorProfile(BaseModel):
hadiths: list[HadithSummary] = Field(default_factory=list, description="Sample hadiths narrated (max 50)")
teachers: list[NarratorSummary] = Field(default_factory=list, description="Known teachers / شيوخ")
students: list[NarratorSummary] = Field(default_factory=list, description="Known students / تلاميذ")
+ name_forms: list[NameForm] = Field(default_factory=list, description="Alternative name forms")
+ family: Optional[FamilyInfo] = Field(None, description="Family info if known")
places: list[PlaceRelation] = Field(default_factory=list, description="Associated places (born, lived, died, traveled)")
tribes: list[str] = Field(default_factory=list, description="Tribal affiliations (e.g. قريش، دوس، الأنصار)")
bio_verified: bool = Field(False, description="Whether biography has been manually verified against classical sources")
@@ -208,12 +218,11 @@ class NarratorProfile(BaseModel):
"birth_year_ce": None,
"death_year_ce": 676,
"biography_summary_arabic": "أبو هريرة الدوسي، صحابي جليل، أكثر الصحابة رواية للحديث النبوي. أسلم عام خيبر ولازم النبي ﷺ.",
- "biography_summary_english": "Abu Hurayrah al-Dawsi, a prominent Companion and the most prolific narrator of hadith. He accepted Islam during Khaybar and remained close to the Prophet ﷺ.",
+ "biography_summary_english": "Abu Hurayrah al-Dawsi, a prominent Companion and the most prolific narrator of hadith.",
"total_hadiths_narrated_approx": 5374,
"hadith_count": 142,
- "hadiths": [],
- "teachers": [{"name_arabic": "رسول الله ﷺ", "name_transliterated": "Prophet Muhammad", "entity_type": "TITLE", "generation": None, "reliability_grade": None, "hadith_count": 0}],
- "students": [{"name_arabic": "الزهري", "name_transliterated": "al-Zuhri", "entity_type": "NISBA", "generation": "تابعي", "reliability_grade": "ثقة", "hadith_count": 89}],
+ "teachers": [{"name_arabic": "النبي ﷺ", "name_transliterated": "Prophet Muhammad", "entity_type": "TITLE", "generation": "نبي", "reliability_grade": None, "hadith_count": 0}],
+ "students": [{"name_arabic": "الزهري", "name_transliterated": "al-Zuhri", "entity_type": "NISBA", "generation": "تابعي", "reliability_grade": "ثقة", "hadith_count": 0}],
"places": [{"place": "المدينة", "relation": "LIVED_IN"}],
"tribes": ["دوس"],
"bio_verified": False,
@@ -222,27 +231,27 @@ class NarratorProfile(BaseModel):
}
-# ── Isnad Chain ────────────────────────────────────────────────────────────
+# ── Isnad Chain (D3-ready) ─────────────────────────────────────────────────
class IsnadNode(BaseModel):
name_arabic: str = Field(description="Narrator Arabic name")
- name_transliterated: Optional[str] = Field(None, description="Latin transliteration")
- entity_type: Optional[str] = Field(None, description="PERSON, KUNYA, NISBA, TITLE")
- generation: Optional[str] = Field(None, description="طبقة")
- reliability_grade: Optional[str] = Field(None, description="جرح وتعديل grade")
+ name_transliterated: str = Field("", description="Latin transliteration")
+ entity_type: str = Field("", description="PERSON, KUNYA, NISBA, TITLE")
+ generation: Optional[str] = Field(None, description="صحابي, تابعي, etc.")
+ reliability_grade: Optional[str] = Field(None, description="ثقة, صدوق, ضعيف, etc.")
class IsnadLink(BaseModel):
- source: str = Field(description="name_arabic of narrator who received the hadith")
- target: str = Field(description="name_arabic of narrator they received it from")
- transmission_verb: Optional[str] = Field(None, description="Exact verb: حدثنا، أخبرنا، عن، سمعت، أنبأنا")
+ source: str = Field(description="name_arabic of narrator who heard")
+ target: str = Field(description="name_arabic of narrator who transmitted")
+ transmission_verb: Optional[str] = Field(None, description="حدثنا, عن, أخبرنا, etc.")
class IsnadChain(BaseModel):
- hadith_id: str = Field(description="UUID of the hadith")
- collection: str = Field(description="Collection name")
- hadith_number: int = Field(description="Hadith number")
- nodes: list[IsnadNode] = Field(default_factory=list, description="Narrator nodes for graph visualization")
+ hadith_id: str = Field(description="Hadith UUID")
+ collection: Optional[str] = Field(None, description="Collection name")
+ hadith_number: Optional[int] = Field(None, description="Hadith number")
+ nodes: list[IsnadNode] = Field(default_factory=list, description="Narrators in the chain")
links: list[IsnadLink] = Field(default_factory=list, description="Directed edges: source heard from target")
model_config = {
@@ -271,12 +280,12 @@ class IsnadChain(BaseModel):
class NarratorInteraction(BaseModel):
narrator_a: str = Field(description="First narrator Arabic name")
- narrator_a_transliterated: Optional[str] = Field(None, description="First narrator transliteration")
+ narrator_a_transliterated: str = Field("", description="First narrator transliteration")
narrator_b: str = Field(description="Second narrator Arabic name")
- narrator_b_transliterated: Optional[str] = Field(None, description="Second narrator transliteration")
- relationship_type: str = Field(description="NARRATED_FROM, TEACHER_OF, HEARD_BY, STUDENT_OF")
+ narrator_b_transliterated: str = Field("", description="Second narrator transliteration")
+ relationship_type: str = Field("", description="NARRATED_FROM, TEACHER_OF, HEARD_BY, STUDENT_OF")
shared_hadith_count: int = Field(0, description="Number of hadiths connecting them")
- hadith_ids: list[str] = Field(default_factory=list, description="IDs of connecting hadiths (max 20)")
+ hadith_ids: list[str] = Field(default_factory=list, description="IDs of shared hadiths (max 20)")
model_config = {
"json_schema_extra": {
@@ -295,7 +304,7 @@ class NarratorInteraction(BaseModel):
class NarratorConnection(BaseModel):
narrator: str = Field(description="Connected narrator Arabic name")
- narrator_transliterated: Optional[str] = Field(None, description="Transliteration")
+ narrator_transliterated: str = Field("", description="Transliteration")
connection_type: str = Field(description="Relationship type")
direction: str = Field(description="'incoming' (they → this) or 'outgoing' (this → them)")
@@ -306,12 +315,26 @@ class NarratorNetwork(BaseModel):
total_connections: int = 0
+class PathNode(BaseModel):
+ name_arabic: str
+ name_transliterated: str = ""
+ generation: Optional[str] = None
+
+
+class WhoMetWhoResult(BaseModel):
+ narrator_a: str
+ narrator_b: str
+ path: list[PathNode] = Field(default_factory=list)
+ path_length: Optional[int] = None
+ relationship_types: list[str] = Field(default_factory=list)
+
+
# ── Search ─────────────────────────────────────────────────────────────────
class SemanticSearchResult(BaseModel):
hadith: HadithSummary = Field(description="Matching hadith")
score: float = Field(description="Cosine similarity score (0-1, higher = more relevant)")
- collection: Optional[str] = Field(None, description="Collection name")
+ collection: str = Field("", description="Collection name")
model_config = {
"json_schema_extra": {
@@ -332,8 +355,8 @@ class SemanticSearchResult(BaseModel):
class FullTextSearchResult(BaseModel):
hadith: HadithSummary = Field(description="Matching hadith")
- score: float = Field(description="Elasticsearch relevance score (higher = more relevant)")
- highlights: list[str] = Field(default_factory=list, description="Text fragments with highlighted matches")
+ score: float = Field(description="Elasticsearch relevance score")
+ highlights: list[str] = Field(default_factory=list, description="Text fragments with highlighted matches")
model_config = {
"json_schema_extra": {
@@ -346,7 +369,27 @@ class FullTextSearchResult(BaseModel):
"arabic_text": "..."
},
"score": 12.45,
- "highlights": ["...عن الصلاة في المسجد الحرام..."]
+ "highlights": ["...عن الصلاة في المسجد..."]
}]
}
}
+
+
+class CombinedSearchResult(BaseModel):
+ hadith: HadithSummary
+ semantic_score: Optional[float] = None
+ fulltext_score: Optional[float] = None
+ combined_score: float = 0.0
+ source: str = Field(description="semantic, fulltext, or both")
+
+
+# ── Stats ──────────────────────────────────────────────────────────────────
+
+class SystemStats(BaseModel):
+ hadiths_pg: Optional[int] = None
+ narrators_neo4j: Optional[int] = None
+ places_neo4j: Optional[int] = None
+ tribes_neo4j: Optional[int] = None
+ relationships_neo4j: Optional[int] = None
+ embeddings_qdrant: Optional[int] = None
+ documents_es: Optional[int] = None
diff --git a/app/routers/chains.py b/app/routers/chains.py
index 7d37d46..e9bb58d 100644
--- a/app/routers/chains.py
+++ b/app/routers/chains.py
@@ -1,27 +1,33 @@
"""
-Isnad chain endpoints — chain visualization data for hadith detail views.
+Isnad chain endpoints — chain visualization data (D3-ready nodes + links).
"""
-from fastapi import APIRouter, Query, HTTPException
+from fastapi import APIRouter, Query, Path, HTTPException
from app.services.database import db
-from app.models.schemas import IsnadChain, IsnadNode, IsnadLink
+from app.utils.arabic import normalize_name
+from app.models.schemas import (
+ IsnadChain, IsnadNode, IsnadLink,
+ PaginatedResponse, PaginationMeta,
+)
router = APIRouter(prefix="/chains", tags=["Isnad Chains"])
-@router.get("/hadith/{hadith_id}", response_model=IsnadChain,
- summary="Get isnad chain for a hadith",
- description="Returns the complete isnad (chain of narration) as a graph structure "
- "with nodes (narrators) and links (transmission relationships). "
- "Ready for visualization with D3.js, vis.js, Cytoscape.js, or any graph library. "
- "Each node includes narrator metadata (generation, reliability); "
- "each link includes the transmission verb (حدثنا، عن، أخبرنا).")
-async def get_isnad_chain(hadith_id: str):
+def _paginate(total: int, page: int, per_page: int) -> PaginationMeta:
+ pages = max(1, (total + per_page - 1) // per_page)
+ return PaginationMeta(total=total, page=page, per_page=per_page, pages=pages)
+
+
+# ── Chain for a single hadith ──────────────────────────────────────────────
+
+@router.get("/hadith/{hadith_id}", response_model=IsnadChain)
+async def get_isnad_chain(
+ hadith_id: str = Path(..., description="Hadith UUID"),
+):
"""
- Get the full isnad chain for a hadith as a graph (nodes + links)
- ready for visualization (D3.js, vis.js, etc.).
+ Get the isnad chain for a hadith as a directed graph (nodes + links).
+ Returns D3-compatible format for frontend visualization.
"""
- # Get hadith info
hadith = db.neo4j_query_one("""
MATCH (h:Hadith {id: $hid})
RETURN h.id AS id, h.collection AS collection, h.hadith_number AS hadith_number
@@ -30,108 +36,113 @@ async def get_isnad_chain(hadith_id: str):
if not hadith:
raise HTTPException(status_code=404, detail="Hadith not found in graph")
- # Get chain nodes
- nodes = db.neo4j_query("""
- MATCH (n:Narrator)-[r:APPEARS_IN]->(h:Hadith {id: $hid})
+ # Narrator nodes in the chain
+ nodes_rows = db.neo4j_query("""
+ MATCH (n:Narrator)-[a:APPEARS_IN]->(h:Hadith {id: $hid})
RETURN n.name_arabic AS name_arabic,
n.name_transliterated AS name_transliterated,
n.entity_type AS entity_type,
n.generation AS generation,
- n.reliability_grade AS reliability_grade,
- r.chain_order AS chain_order
- ORDER BY r.chain_order
+ n.reliability_grade AS reliability_grade
+ ORDER BY a.chain_order
""", {"hid": hadith_id})
- # Get chain links (NARRATED_FROM within this hadith's narrators)
- links = db.neo4j_query("""
- MATCH (a:Narrator)-[r1:APPEARS_IN]->(h:Hadith {id: $hid})
- MATCH (b:Narrator)-[r2:APPEARS_IN]->(h)
- MATCH (a)-[nf:NARRATED_FROM]->(b)
+ # Transmission links — NARRATED_FROM edges store hadith_ids as array
+ links_rows = db.neo4j_query("""
+ MATCH (a:Narrator)-[nf:NARRATED_FROM]->(b:Narrator)
WHERE $hid IN nf.hadith_ids
RETURN a.name_arabic AS source,
b.name_arabic AS target,
nf.transmission_verb AS transmission_verb
+ ORDER BY a.name_arabic
""", {"hid": hadith_id})
- # If no NARRATED_FROM edges with hadith_id, fall back to chain order
- if not links and len(nodes) > 1:
- sorted_nodes = sorted(nodes, key=lambda n: n.get("chain_order") or 999)
- links = []
- for i in range(len(sorted_nodes) - 1):
- links.append({
- "source": sorted_nodes[i]["name_arabic"],
- "target": sorted_nodes[i + 1]["name_arabic"],
- "transmission_verb": None,
- })
-
return IsnadChain(
- hadith_id=str(hadith["id"]),
- collection=hadith["collection"] or "",
- hadith_number=hadith["hadith_number"] or 0,
- nodes=[IsnadNode(**n) for n in nodes],
- links=[IsnadLink(**l) for l in links],
+ hadith_id=hadith_id,
+ collection=hadith.get("collection"),
+ hadith_number=hadith.get("hadith_number"),
+ nodes=[IsnadNode(**r) for r in nodes_rows],
+ links=[IsnadLink(**r) for r in links_rows],
)
-@router.get("/narrator/{name_arabic}", response_model=list[IsnadChain],
- summary="Get all chains for a narrator",
- description="Returns all isnad chains that include a specific narrator. "
- "Useful for visualizing how a narrator connects to the Prophet ﷺ "
- "through different transmission paths. "
- "Example: `/chains/narrator/الزهري`")
-async def get_narrator_chains(
- name_arabic: str,
- limit: int = Query(10, ge=1, le=50, description="Maximum chains to return"),
+# ── All chains containing a narrator (paginated) ──────────────────────────
+
+@router.get("/narrator/{name_arabic}", response_model=PaginatedResponse)
+async def chains_by_narrator(
+ name_arabic: str = Path(..., description="Narrator Arabic name"),
+ page: int = Query(1, ge=1),
+ per_page: int = Query(10, ge=1, le=50),
):
"""
- Get all isnad chains that include a specific narrator.
- Useful for seeing how a narrator connects to the Prophet ﷺ.
+ All isnad chains containing a narrator.
+ Useful for seeing how a narrator connects to the Prophet ﷺ across collections.
"""
+ q_norm = normalize_name(name_arabic)
+ skip = (page - 1) * per_page
+
+ total = db.neo4j_count("""
+ MATCH (n:Narrator)-[:APPEARS_IN]->(h:Hadith)
+ WHERE toLower(n.name_arabic) CONTAINS toLower($name)
+ RETURN count(DISTINCT h) AS count
+ """, {"name": q_norm})
+
hadith_ids = db.neo4j_query("""
- MATCH (n:Narrator {name_arabic: $name})-[:APPEARS_IN]->(h:Hadith)
- RETURN h.id AS id
- LIMIT $limit
- """, {"name": name_arabic, "limit": limit})
+ MATCH (n:Narrator)-[:APPEARS_IN]->(h:Hadith)
+ WHERE toLower(n.name_arabic) CONTAINS toLower($name)
+ RETURN DISTINCT h.id AS id
+ ORDER BY h.id
+ SKIP $skip LIMIT $limit
+ """, {"name": q_norm, "skip": skip, "limit": per_page})
chains = []
for row in hadith_ids:
chain = await get_isnad_chain(str(row["id"]))
chains.append(chain)
- return chains
+ return PaginatedResponse(
+ data=chains,
+ meta=_paginate(total, page, per_page),
+ )
-@router.get("/common-chains", response_model=list[dict],
- summary="Find shared chains between two narrators",
- description="Find hadiths where both narrators appear in the same isnad chain. "
- "Useful for verifying narrator relationships and finding corroborating chains. "
- "Example: `/chains/common-chains?narrator_a=الزهري&narrator_b=أنس بن مالك`")
+# ── Common chains between two narrators (paginated) ───────────────────────
+
+@router.get("/common", response_model=PaginatedResponse)
async def find_common_chains(
- narrator_a: str = Query(
- ..., description="First narrator (Arabic). Example: الزهري",
- examples=["الزهري"],
- ),
- narrator_b: str = Query(
- ..., description="Second narrator (Arabic). Example: أنس بن مالك",
- examples=["أنس بن مالك"],
- ),
- limit: int = Query(10, ge=1, le=50, description="Maximum results"),
+ narrator_a: str = Query(..., description="First narrator (Arabic)"),
+ narrator_b: str = Query(..., description="Second narrator (Arabic)"),
+ page: int = Query(1, ge=1),
+ per_page: int = Query(10, ge=1, le=50),
):
- """
- Find hadiths where both narrators appear in the same chain.
- Useful for verifying narrator relationships.
- """
+ """Find hadiths where both narrators appear in the same chain."""
+ a_norm = normalize_name(narrator_a)
+ b_norm = normalize_name(narrator_b)
+ skip = (page - 1) * per_page
+
+ total = db.neo4j_count("""
+ MATCH (a:Narrator)-[:APPEARS_IN]->(h:Hadith)<-[:APPEARS_IN]-(b:Narrator)
+ WHERE toLower(a.name_arabic) CONTAINS toLower($a)
+ AND toLower(b.name_arabic) CONTAINS toLower($b)
+ AND a <> b
+ RETURN count(DISTINCT h) AS count
+ """, {"a": a_norm, "b": b_norm})
+
rows = db.neo4j_query("""
MATCH (a:Narrator)-[:APPEARS_IN]->(h:Hadith)<-[:APPEARS_IN]-(b:Narrator)
- WHERE a.name_arabic CONTAINS $name_a
- AND b.name_arabic CONTAINS $name_b
+ WHERE toLower(a.name_arabic) CONTAINS toLower($a)
+ AND toLower(b.name_arabic) CONTAINS toLower($b)
AND a <> b
- RETURN h.id AS hadith_id,
+ RETURN DISTINCT h.id AS hadith_id,
h.collection AS collection,
h.hadith_number AS hadith_number,
a.name_arabic AS narrator_a,
b.name_arabic AS narrator_b
- LIMIT $limit
- """, {"name_a": narrator_a, "name_b": narrator_b, "limit": limit})
+ ORDER BY h.collection, h.hadith_number
+ SKIP $skip LIMIT $limit
+ """, {"a": a_norm, "b": b_norm, "skip": skip, "limit": per_page})
- return [dict(r) for r in rows]
+ return PaginatedResponse(
+ data=[dict(r) for r in rows],
+ meta=_paginate(total, page, per_page),
+ )
diff --git a/app/routers/hadiths.py b/app/routers/hadiths.py
index 226baf7..2c0162a 100644
--- a/app/routers/hadiths.py
+++ b/app/routers/hadiths.py
@@ -1,10 +1,13 @@
"""
Hadith endpoints — details, listing, search by keyword/narrator/topic/place.
+All query parameters are Arabic-normalized for consistent matching.
+All list endpoints support pagination via page + per_page.
"""
-from fastapi import APIRouter, Query, HTTPException
+from fastapi import APIRouter, Query, Path, HTTPException
from typing import Optional
from app.services.database import db
+from app.utils.arabic import normalize_query
from app.models.schemas import (
HadithDetail, HadithSummary, NarratorInChain, TopicTag,
PaginatedResponse, PaginationMeta,
@@ -13,36 +16,44 @@ from app.models.schemas import (
router = APIRouter(prefix="/hadiths", tags=["Hadiths"])
-@router.get("/{hadith_id}", response_model=HadithDetail,
- summary="Get hadith by ID",
- description="Retrieve full hadith details including Arabic text, sanad/matn separation, "
- "ordered narrator chain from the knowledge graph, and topic tags.")
-async def get_hadith(hadith_id: str):
+def _paginate(total: int, page: int, per_page: int) -> PaginationMeta:
+ pages = max(1, (total + per_page - 1) // per_page)
+ return PaginationMeta(total=total, page=page, per_page=per_page, pages=pages)
+
+
+# ── Single hadith by ID ────────────────────────────────────────────────────
+
+@router.get("/{hadith_id}", response_model=HadithDetail)
+async def get_hadith(hadith_id: str = Path(..., description="Hadith UUID")):
"""Get full hadith details by ID, including narrator chain and topics from Neo4j."""
# Base hadith from PostgreSQL
hadith = db.pg_query_one("""
SELECT h.id, c.name_english AS collection, h.hadith_number,
- h.grade, h.arabic_text, h.sanad, h.matn
+ h.book_number, h.grade, h.arabic_text, h.english_text,
+ h.urdu_text, h.sanad, h.matn
FROM hadiths h
JOIN collections c ON c.id = h.collection_id
- WHERE h.id = %s
+ WHERE h.id::text = %s
""", (hadith_id,))
if not hadith:
raise HTTPException(status_code=404, detail="Hadith not found")
- # Enrich with chain + topics from Neo4j
+ # Narrator chain from Neo4j
chain = db.neo4j_query("""
MATCH (n:Narrator)-[r:APPEARS_IN]->(h:Hadith {id: $hid})
RETURN n.name_arabic AS name_arabic,
n.name_transliterated AS name_transliterated,
n.entity_type AS entity_type,
+ n.generation AS generation,
+ n.reliability_grade AS reliability_grade,
r.chain_order AS order,
r.transmission_verb AS transmission_verb
ORDER BY r.chain_order
""", {"hid": hadith_id})
+ # Topics from Neo4j
topics = db.neo4j_query("""
MATCH (h:Hadith {id: $hid})-[:HAS_TOPIC]->(t:Topic)
RETURN t.topic_arabic AS topic_arabic,
@@ -52,10 +63,13 @@ async def get_hadith(hadith_id: str):
return HadithDetail(
id=str(hadith["id"]),
- collection=hadith["collection"],
- hadith_number=hadith["hadith_number"],
- grade=hadith["grade"],
- arabic_text=hadith["arabic_text"],
+ collection=hadith.get("collection"),
+ hadith_number=hadith.get("hadith_number"),
+ book_number=hadith.get("book_number"),
+ grade=hadith.get("grade"),
+ arabic_text=hadith.get("arabic_text"),
+ english_text=hadith.get("english_text"),
+ urdu_text=hadith.get("urdu_text"),
sanad_text=hadith.get("sanad"),
matn_text=hadith.get("matn"),
narrator_chain=[NarratorInChain(**c) for c in chain],
@@ -63,183 +77,155 @@ async def get_hadith(hadith_id: str):
)
-@router.get("/collection/{collection_name}", response_model=PaginatedResponse,
- summary="List hadiths by collection",
- description="Paginated listing of hadiths in a specific collection. "
- "Collection names use partial matching (e.g. 'bukhari' matches 'Sahih Bukhari').")
+# ── By collection + number ─────────────────────────────────────────────────
+
+@router.get("/by-ref/{collection}/{number}", response_model=HadithDetail)
+async def get_hadith_by_reference(
+ collection: str = Path(..., description="Collection name, e.g. 'Sahih Bukhari'"),
+ number: int = Path(..., description="Hadith number within the collection"),
+):
+ """Lookup hadith by collection name + hadith number."""
+ row = db.pg_query_one("""
+ SELECT h.id FROM hadiths h
+ JOIN collections c ON c.id = h.collection_id
+ WHERE c.name_english = %s AND h.hadith_number = %s
+ """, (collection, number))
+ if not row:
+ raise HTTPException(status_code=404, detail=f"Hadith {collection} #{number} not found")
+ return await get_hadith(str(row["id"]))
+
+
+# ── List by collection (paginated) ─────────────────────────────────────────
+
+@router.get("/collection/{collection_name}", response_model=PaginatedResponse)
async def list_by_collection(
- collection_name: str,
+ collection_name: str = Path(..., description="Collection name"),
page: int = Query(1, ge=1, description="Page number"),
- per_page: int = Query(20, ge=1, le=100, description="Results per page"),
+ per_page: int = Query(20, ge=1, le=100, description="Items per page"),
):
"""List hadiths in a collection with pagination."""
- offset = (page - 1) * per_page
-
- total_row = db.pg_query_one("""
- SELECT COUNT(*) AS total
- FROM hadiths h
+ total = db.pg_count("""
+ SELECT count(*) FROM hadiths h
JOIN collections c ON c.id = h.collection_id
- WHERE c.name_english ILIKE %s
- """, (f"%{collection_name}%",))
- total = total_row["total"] if total_row else 0
+ WHERE c.name_english = %s
+ """, (collection_name,))
+ offset = (page - 1) * per_page
rows = db.pg_query("""
SELECT h.id, c.name_english AS collection, h.hadith_number,
h.grade, LEFT(h.arabic_text, 300) AS arabic_text
FROM hadiths h
JOIN collections c ON c.id = h.collection_id
- WHERE c.name_english ILIKE %s
+ WHERE c.name_english = %s
ORDER BY h.hadith_number
LIMIT %s OFFSET %s
- """, (f"%{collection_name}%", per_page, offset))
+ """, (collection_name, per_page, offset))
- return PaginatedResponse(
- meta=PaginationMeta(
- total=total, page=page, per_page=per_page,
- pages=(total + per_page - 1) // per_page,
- ),
- data=[HadithSummary(
- id=str(r["id"]), collection=r["collection"],
- hadith_number=r["hadith_number"], grade=r["grade"],
- arabic_text=r["arabic_text"],
- ) for r in rows],
- )
+ data = [HadithSummary(id=str(r["id"]), **{k: r[k] for k in r if k != "id"}) for r in rows]
+ return PaginatedResponse(data=data, meta=_paginate(total, page, per_page))
-@router.get("/number/{collection_name}/{number}", response_model=HadithDetail)
-async def get_by_number(collection_name: str, number: int):
- """Get a hadith by collection name and number."""
- hadith = db.pg_query_one("""
- SELECT h.id
- FROM hadiths h
- JOIN collections c ON c.id = h.collection_id
- WHERE c.name_english ILIKE %s AND h.hadith_number = %s
- """, (f"%{collection_name}%", number))
+# ── Keyword search (paginated, normalized) ─────────────────────────────────
- if not hadith:
- raise HTTPException(status_code=404, detail=f"Hadith #{number} not found in {collection_name}")
-
- return await get_hadith(str(hadith["id"]))
-
-
-@router.get("/search/keyword", response_model=PaginatedResponse,
- summary="Search hadiths by Arabic keyword",
- description="Full-text keyword search across all hadith Arabic text. "
- "Supports both vocalized (مَكَّةَ) and unvocalized (مكة) Arabic.")
-async def search_by_keyword(
- q: str = Query(
- ..., min_length=2,
- description="Arabic keyword to search. Examples: صلاة (prayer), زكاة (zakat), صيام (fasting), حج (hajj), نية (intention)",
- examples=["صلاة", "الجنة", "رمضان"],
- ),
- collection: Optional[str] = Query(
- None,
- description="Filter by collection name. Examples: Sahih Bukhari, Sahih Muslim, Sunan Abu Dawood",
- examples=["Sahih Bukhari"],
- ),
- grade: Optional[str] = Query(
- None,
- description="Filter by hadith grade. Examples: Sahih, Hasan, Da'if",
- examples=["Sahih"],
- ),
- page: int = Query(1, ge=1, description="Page number (1-indexed)"),
- per_page: int = Query(20, ge=1, le=100, description="Results per page (max 100)"),
+@router.get("/search/keyword", response_model=PaginatedResponse)
+async def search_keyword(
+ q: str = Query(..., min_length=2, description="Arabic keyword(s) — diacritics stripped automatically"),
+ collection: Optional[str] = Query(None, description="Filter by collection"),
+ page: int = Query(1, ge=1),
+ per_page: int = Query(20, ge=1, le=100),
):
- """Search hadiths by Arabic keyword in text."""
- offset = (page - 1) * per_page
-
- conditions = ["h.arabic_text ILIKE %s"]
- params = [f"%{q}%"]
+ """Keyword search in Arabic hadith text. Query is normalized for consistent matching."""
+ q_norm = normalize_query(q)
+ where = "WHERE h.arabic_text ILIKE %s"
+ params: list = [f"%{q_norm}%"]
if collection:
- conditions.append("c.name_english ILIKE %s")
- params.append(f"%{collection}%")
- if grade:
- conditions.append("h.grade ILIKE %s")
- params.append(f"%{grade}%")
+ where += " AND c.name_english = %s"
+ params.append(collection)
- where = " AND ".join(conditions)
-
- total_row = db.pg_query_one(f"""
- SELECT COUNT(*) AS total
- FROM hadiths h
- JOIN collections c ON c.id = h.collection_id
- WHERE {where}
- """, tuple(params))
- total = total_row["total"] if total_row else 0
-
- params.extend([per_page, offset])
- rows = db.pg_query(f"""
- SELECT h.id, c.name_english AS collection, h.hadith_number,
- h.grade, LEFT(h.arabic_text, 300) AS arabic_text
- FROM hadiths h
- JOIN collections c ON c.id = h.collection_id
- WHERE {where}
- ORDER BY c.name_english, h.hadith_number
- LIMIT %s OFFSET %s
- """, tuple(params))
-
- return PaginatedResponse(
- meta=PaginationMeta(
- total=total, page=page, per_page=per_page,
- pages=(total + per_page - 1) // per_page,
- ),
- data=[HadithSummary(
- id=str(r["id"]), collection=r["collection"],
- hadith_number=r["hadith_number"], grade=r["grade"],
- arabic_text=r["arabic_text"],
- ) for r in rows],
+ total = db.pg_count(
+ f"SELECT count(*) FROM hadiths h JOIN collections c ON c.id = h.collection_id {where}",
+ tuple(params),
+ )
+ offset = (page - 1) * per_page
+ rows = db.pg_query(
+ f"SELECT h.id, c.name_english AS collection, h.hadith_number, "
+ f"h.grade, LEFT(h.arabic_text, 300) AS arabic_text "
+ f"FROM hadiths h JOIN collections c ON c.id = h.collection_id "
+ f"{where} ORDER BY c.name_english, h.hadith_number "
+ f"LIMIT %s OFFSET %s",
+ tuple(params + [per_page, offset]),
)
-
-@router.get("/search/topic/{topic}", response_model=list[HadithSummary])
-async def search_by_topic(topic: str, limit: int = Query(20, ge=1, le=100)):
- """Search hadiths by topic tag (from Neo4j)."""
- rows = db.neo4j_query("""
- CALL db.index.fulltext.queryNodes('hadith_arabic_text', $topic)
- YIELD node, score
- RETURN node.id AS id,
- node.collection AS collection,
- node.hadith_number AS hadith_number,
- node.grade AS grade,
- left(node.matn_text, 300) AS matn_text,
- score
- ORDER BY score DESC
- LIMIT $limit
- """, {"topic": topic, "limit": limit})
-
- return [HadithSummary(
- id=str(r["id"]), collection=r["collection"] or "",
- hadith_number=r["hadith_number"] or 0, grade=r["grade"],
- matn_text=r["matn_text"],
- ) for r in rows]
+ data = [HadithSummary(id=str(r["id"]), **{k: r[k] for k in r if k != "id"}) for r in rows]
+ return PaginatedResponse(data=data, meta=_paginate(total, page, per_page))
-@router.get("/search/narrator/{narrator_name}", response_model=list[HadithSummary],
- summary="Find hadiths by narrator",
- description="Find all hadiths where a specific narrator appears in the chain. "
- "Searches both Arabic name and transliteration. "
- "Example: `/hadiths/search/narrator/أبو هريرة`")
-async def search_by_narrator(
- narrator_name: str,
- limit: int = Query(50, ge=1, le=200, description="Maximum results"),
+# ── Search by topic (paginated, normalized) ────────────────────────────────
+
+@router.get("/search/topic", response_model=PaginatedResponse)
+async def search_by_topic(
+ q: str = Query(..., min_length=2, description="Topic keyword (Arabic or English)"),
+ page: int = Query(1, ge=1),
+ per_page: int = Query(20, ge=1, le=100),
):
- """Find all hadiths narrated by a specific person."""
+ """Find hadiths by topic tag from the knowledge graph."""
+ q_norm = normalize_query(q)
+ skip = (page - 1) * per_page
+
+ total = db.neo4j_count("""
+ MATCH (t:Topic)<-[:HAS_TOPIC]-(h:Hadith)
+ WHERE toLower(t.topic_arabic) CONTAINS toLower($q)
+ OR toLower(t.topic_english) CONTAINS toLower($q)
+ RETURN count(DISTINCT h) AS count
+ """, {"q": q_norm})
+
rows = db.neo4j_query("""
- MATCH (n:Narrator)-[r:APPEARS_IN]->(h:Hadith)
- WHERE n.name_arabic CONTAINS $name
- OR n.name_transliterated CONTAINS $name
- RETURN h.id AS id,
+ MATCH (t:Topic)<-[:HAS_TOPIC]-(h:Hadith)
+ WHERE toLower(t.topic_arabic) CONTAINS toLower($q)
+ OR toLower(t.topic_english) CONTAINS toLower($q)
+ RETURN DISTINCT h.id AS id,
h.collection AS collection,
h.hadith_number AS hadith_number,
h.grade AS grade,
- left(h.matn_text, 300) AS matn_text
+ substring(h.arabic_text, 0, 300) AS arabic_text
ORDER BY h.collection, h.hadith_number
- LIMIT $limit
- """, {"name": narrator_name, "limit": limit})
+ SKIP $skip LIMIT $limit
+ """, {"q": q_norm, "skip": skip, "limit": per_page})
- return [HadithSummary(
- id=str(r["id"]), collection=r["collection"] or "",
- hadith_number=r["hadith_number"] or 0, grade=r["grade"],
- matn_text=r["matn_text"],
- ) for r in rows]
+ data = [HadithSummary(**r) for r in rows]
+ return PaginatedResponse(data=data, meta=_paginate(total, page, per_page))
+
+
+# ── Search by narrator (paginated, normalized) ─────────────────────────────
+
+@router.get("/search/narrator", response_model=PaginatedResponse)
+async def search_by_narrator(
+ q: str = Query(..., min_length=2, description="Narrator name (Arabic)"),
+ page: int = Query(1, ge=1),
+ per_page: int = Query(20, ge=1, le=100),
+):
+ """Find all hadiths where a narrator appears in the chain."""
+ q_norm = normalize_query(q)
+ skip = (page - 1) * per_page
+
+ total = db.neo4j_count("""
+ MATCH (n:Narrator)-[:APPEARS_IN]->(h:Hadith)
+ WHERE toLower(n.name_arabic) CONTAINS toLower($q)
+ RETURN count(DISTINCT h) AS count
+ """, {"q": q_norm})
+
+ rows = db.neo4j_query("""
+ MATCH (n:Narrator)-[:APPEARS_IN]->(h:Hadith)
+ WHERE toLower(n.name_arabic) CONTAINS toLower($q)
+ RETURN DISTINCT h.id AS id,
+ h.collection AS collection,
+ h.hadith_number AS hadith_number,
+ h.grade AS grade,
+ substring(h.arabic_text, 0, 300) AS arabic_text
+ ORDER BY h.collection, h.hadith_number
+ SKIP $skip LIMIT $limit
+ """, {"q": q_norm, "skip": skip, "limit": per_page})
+
+ data = [HadithSummary(**r) for r in rows]
+ return PaginatedResponse(data=data, meta=_paginate(total, page, per_page))
diff --git a/app/routers/narrators.py b/app/routers/narrators.py
index 7a61dc4..9f42a93 100644
--- a/app/routers/narrators.py
+++ b/app/routers/narrators.py
@@ -1,317 +1,436 @@
"""
-Narrator endpoints — profiles, teacher/student network, relationships, who met who.
+Narrator endpoints — search, profiles, network queries.
+All queries normalize Arabic input to match post-dedup graph data.
"""
-from fastapi import APIRouter, Query, HTTPException
+from fastapi import APIRouter, Query, Path, HTTPException
from typing import Optional
from app.services.database import db
+from app.utils.arabic import normalize_query, normalize_name
from app.models.schemas import (
- NarratorProfile, NarratorSummary, HadithSummary,
- NarratorInteraction, PlaceRelation,
- PaginatedResponse, PaginationMeta,
+ NarratorSummary, NarratorProfile, NarratorInteraction,
+ NarratorConnection, NarratorNetwork,
+ WhoMetWhoResult, PathNode, PlaceRelation, NameForm, FamilyInfo,
+ HadithSummary, PaginatedResponse, PaginationMeta,
)
router = APIRouter(prefix="/narrators", tags=["Narrators"])
-@router.get("/search", response_model=list[NarratorSummary],
- summary="Search narrators by name",
- description="Full-text search across narrator names in both Arabic and Latin transliteration. "
- "Uses Neo4j full-text index for fast matching.")
+def _paginate(total: int, page: int, per_page: int) -> PaginationMeta:
+ pages = max(1, (total + per_page - 1) // per_page)
+ return PaginationMeta(total=total, page=page, per_page=per_page, pages=pages)
+
+
+# ── Search narrators by name (paginated, normalized) ───────────────────────
+
+@router.get("/search", response_model=PaginatedResponse)
async def search_narrators(
- q: str = Query(
- ..., min_length=2,
- description="Narrator name in Arabic or transliteration. Examples: أبو هريرة, الزهري, Anas, Bukhari",
- examples=["أبو هريرة", "الزهري", "Anas ibn Malik"],
- ),
- limit: int = Query(20, ge=1, le=100, description="Maximum results to return"),
+ q: str = Query(..., min_length=2, description="Narrator name (Arabic). Diacritics stripped automatically."),
+ page: int = Query(1, ge=1),
+ per_page: int = Query(20, ge=1, le=100),
):
- """Search narrators by name (Arabic or transliterated)."""
+ """
+ Search narrators by Arabic name. Input is normalized to match
+ the deduplicated graph (diacritics stripped, characters unified).
+ """
+ q_norm = normalize_query(q)
+ skip = (page - 1) * per_page
+
+ total = db.neo4j_count("""
+ MATCH (n:Narrator)
+ WHERE toLower(n.name_arabic) CONTAINS toLower($q)
+ RETURN count(n) AS count
+ """, {"q": q_norm})
+
rows = db.neo4j_query("""
- CALL db.index.fulltext.queryNodes('narrator_names', $query)
- YIELD node, score
- WITH node AS n, score
+ MATCH (n:Narrator)
+ WHERE toLower(n.name_arabic) CONTAINS toLower($q)
OPTIONAL MATCH (n)-[:APPEARS_IN]->(h:Hadith)
RETURN n.name_arabic AS name_arabic,
n.name_transliterated AS name_transliterated,
n.entity_type AS entity_type,
n.generation AS generation,
n.reliability_grade AS reliability_grade,
- count(h) AS hadith_count,
- score
- ORDER BY score DESC
- LIMIT $limit
- """, {"query": q, "limit": limit})
+ count(DISTINCT h) AS hadith_count
+ ORDER BY hadith_count DESC
+ SKIP $skip LIMIT $limit
+ """, {"q": q_norm, "skip": skip, "limit": per_page})
- return [NarratorSummary(**r) for r in rows]
+ data = [NarratorSummary(**r) for r in rows]
+ return PaginatedResponse(data=data, meta=_paginate(total, page, per_page))
-@router.get("/profile/{name_arabic}", response_model=NarratorProfile,
- summary="Get full narrator profile",
- description="Complete narrator profile for the mobile app. Includes biography from classical "
- "scholarship (Tahdhib al-Kamal, Taqrib al-Tahdhib), teacher/student network, "
- "hadiths narrated, places, and tribal affiliations. "
- "Example: `/narrators/profile/أبو هريرة`")
-async def get_narrator_profile(name_arabic: str):
+# ── Full narrator profile ──────────────────────────────────────────────────
+
+@router.get("/profile/{name_arabic}", response_model=NarratorProfile)
+async def get_narrator_profile(
+ name_arabic: str = Path(..., description="Narrator Arabic name (exact or close match)"),
+):
"""
- Full narrator profile — biography, hadiths, teachers, students,
- places, tribes. Powers the mobile app profile page.
+ Complete narrator profile — biography, hadiths, teachers, students, places, tribes.
+ This is the mobile app profile page query.
"""
- # Basic info
+ q_norm = normalize_name(name_arabic)
+
+ # Find the narrator node — exact first, then CONTAINS fallback
narrator = db.neo4j_query_one("""
- MATCH (n:Narrator {name_arabic: $name})
- RETURN n.name_arabic AS name_arabic,
- n.name_transliterated AS name_transliterated,
- n.entity_type AS entity_type,
- n.full_nasab AS full_nasab,
- n.kunya AS kunya,
- n.nisba AS nisba,
- n.laqab AS laqab,
- n.generation AS generation,
- n.reliability_grade AS reliability_grade,
- n.reliability_detail AS reliability_detail,
- n.birth_year_hijri AS birth_year_hijri,
- n.death_year_hijri AS death_year_hijri,
- n.birth_year_ce AS birth_year_ce,
- n.death_year_ce AS death_year_ce,
- n.biography_summary_arabic AS biography_summary_arabic,
- n.biography_summary_english AS biography_summary_english,
- n.total_hadiths_narrated_approx AS total_hadiths_narrated_approx,
- n.bio_verified AS bio_verified
- """, {"name": name_arabic})
+ MATCH (n:Narrator)
+ WHERE toLower(n.name_arabic) = toLower($q)
+ RETURN n
+ """, {"q": q_norm})
if not narrator:
- raise HTTPException(status_code=404, detail="Narrator not found")
+ narrator = db.neo4j_query_one("""
+ MATCH (n:Narrator)
+ WHERE toLower(n.name_arabic) CONTAINS toLower($q)
+ RETURN n
+ """, {"q": q_norm})
- # Hadiths
- hadiths = db.neo4j_query("""
+ if not narrator:
+ raise HTTPException(status_code=404, detail=f"Narrator not found: {name_arabic}")
+
+ n = narrator.get("n", {})
+ actual_name = n.get("name_arabic", q_norm)
+
+ # Hadith count + collections
+ stats = db.neo4j_query_one("""
+ MATCH (n:Narrator {name_arabic: $name})-[:APPEARS_IN]->(h:Hadith)
+ RETURN count(DISTINCT h) AS hadith_count,
+ collect(DISTINCT h.collection) AS collections
+ """, {"name": actual_name}) or {}
+
+ # Teachers: narrator NARRATED_FROM teacher + teacher TEACHER_OF narrator
+ teachers_nf = db.neo4j_query("""
+ MATCH (n:Narrator {name_arabic: $name})-[:NARRATED_FROM]->(t:Narrator)
+ OPTIONAL MATCH (t)-[:APPEARS_IN]->(h:Hadith)
+ RETURN t.name_arabic AS name_arabic,
+ t.name_transliterated AS name_transliterated,
+ t.entity_type AS entity_type,
+ t.generation AS generation,
+ t.reliability_grade AS reliability_grade,
+ count(DISTINCT h) AS hadith_count
+ """, {"name": actual_name})
+
+ teachers_to = db.neo4j_query("""
+ MATCH (t:Narrator)-[:TEACHER_OF]->(n:Narrator {name_arabic: $name})
+ OPTIONAL MATCH (t)-[:APPEARS_IN]->(h:Hadith)
+ RETURN t.name_arabic AS name_arabic,
+ t.name_transliterated AS name_transliterated,
+ t.entity_type AS entity_type,
+ t.generation AS generation,
+ t.reliability_grade AS reliability_grade,
+ count(DISTINCT h) AS hadith_count
+ """, {"name": actual_name})
+
+ # Deduplicate teachers
+ seen_teachers = set()
+ teachers = []
+ for r in teachers_nf + teachers_to:
+ if r["name_arabic"] not in seen_teachers:
+ seen_teachers.add(r["name_arabic"])
+ teachers.append(NarratorSummary(**r))
+
+ # Students: student NARRATED_FROM narrator + narrator TEACHER_OF student
+ students_nf = db.neo4j_query("""
+ MATCH (s:Narrator)-[:NARRATED_FROM]->(n:Narrator {name_arabic: $name})
+ OPTIONAL MATCH (s)-[:APPEARS_IN]->(h:Hadith)
+ RETURN s.name_arabic AS name_arabic,
+ s.name_transliterated AS name_transliterated,
+ s.entity_type AS entity_type,
+ s.generation AS generation,
+ s.reliability_grade AS reliability_grade,
+ count(DISTINCT h) AS hadith_count
+ """, {"name": actual_name})
+
+ students_to = db.neo4j_query("""
+ MATCH (n:Narrator {name_arabic: $name})-[:TEACHER_OF]->(s:Narrator)
+ OPTIONAL MATCH (s)-[:APPEARS_IN]->(h:Hadith)
+ RETURN s.name_arabic AS name_arabic,
+ s.name_transliterated AS name_transliterated,
+ s.entity_type AS entity_type,
+ s.generation AS generation,
+ s.reliability_grade AS reliability_grade,
+ count(DISTINCT h) AS hadith_count
+ """, {"name": actual_name})
+
+ seen_students = set()
+ students = []
+ for r in students_nf + students_to:
+ if r["name_arabic"] not in seen_students:
+ seen_students.add(r["name_arabic"])
+ students.append(NarratorSummary(**r))
+
+ # Places
+ places_rows = db.neo4j_query("""
+ MATCH (n:Narrator {name_arabic: $name})-[r]->(p:Place)
+ WHERE type(r) IN ['BORN_IN', 'LIVED_IN', 'DIED_IN', 'TRAVELED_TO']
+ RETURN p.name_arabic AS place, type(r) AS relation
+ """, {"name": actual_name})
+
+ # Tribes
+ tribe_rows = db.neo4j_query("""
+ MATCH (n:Narrator {name_arabic: $name})-[:BELONGS_TO_TRIBE]->(t:Tribe)
+ RETURN t.name_arabic AS name
+ """, {"name": actual_name})
+
+ # Name forms (alternative names via RELATED_TO)
+ name_form_rows = db.neo4j_query("""
+ MATCH (n:Narrator {name_arabic: $name})-[:RELATED_TO]-(alt:Narrator)
+ WHERE alt.name_arabic <> $name
+ RETURN alt.name_arabic AS name, alt.entity_type AS type
+ """, {"name": actual_name})
+
+ # Family info
+ family_row = db.neo4j_query_one("""
+ MATCH (n:Narrator {name_arabic: $name})
+ RETURN n.father AS father, n.mother AS mother,
+ n.spouse AS spouse, n.children AS children
+ """, {"name": actual_name})
+ family = None
+ if family_row and any(family_row.get(k) for k in ["father", "mother", "spouse", "children"]):
+ family = FamilyInfo(
+ father=family_row.get("father"),
+ mother=family_row.get("mother"),
+ spouse=family_row.get("spouse"),
+ children=family_row.get("children") or [],
+ )
+
+ # Sample hadiths
+ hadith_rows = db.neo4j_query("""
MATCH (n:Narrator {name_arabic: $name})-[:APPEARS_IN]->(h:Hadith)
RETURN h.id AS id,
h.collection AS collection,
h.hadith_number AS hadith_number,
h.grade AS grade,
- left(h.matn_text, 200) AS matn_text
+ substring(h.arabic_text, 0, 300) AS arabic_text
ORDER BY h.collection, h.hadith_number
LIMIT 50
- """, {"name": name_arabic})
-
- # Teachers (who taught this narrator)
- teachers = db.neo4j_query("""
- MATCH (teacher:Narrator)-[:TEACHER_OF]->(n:Narrator {name_arabic: $name})
- OPTIONAL MATCH (teacher)-[:APPEARS_IN]->(h:Hadith)
- RETURN teacher.name_arabic AS name_arabic,
- teacher.name_transliterated AS name_transliterated,
- teacher.entity_type AS entity_type,
- teacher.generation AS generation,
- teacher.reliability_grade AS reliability_grade,
- count(h) AS hadith_count
- """, {"name": name_arabic})
-
- # Students (who this narrator taught)
- students = db.neo4j_query("""
- MATCH (n:Narrator {name_arabic: $name})-[:TEACHER_OF]->(student:Narrator)
- OPTIONAL MATCH (student)-[:APPEARS_IN]->(h:Hadith)
- RETURN student.name_arabic AS name_arabic,
- student.name_transliterated AS name_transliterated,
- student.entity_type AS entity_type,
- student.generation AS generation,
- student.reliability_grade AS reliability_grade,
- count(h) AS hadith_count
- """, {"name": name_arabic})
-
- # Places
- places = db.neo4j_query("""
- MATCH (n:Narrator {name_arabic: $name})-[r:BORN_IN|LIVED_IN|DIED_IN|TRAVELED_TO]->(p:Place)
- RETURN p.name_arabic AS place, type(r) AS relation
- """, {"name": name_arabic})
-
- # Tribes
- tribes_rows = db.neo4j_query("""
- MATCH (n:Narrator {name_arabic: $name})-[:BELONGS_TO_TRIBE]->(t:Tribe)
- RETURN t.name_arabic AS tribe
- """, {"name": name_arabic})
+ """, {"name": actual_name})
return NarratorProfile(
- **narrator,
- hadith_count=len(hadiths),
- hadiths=[HadithSummary(
- id=str(h["id"]), collection=h["collection"] or "",
- hadith_number=h["hadith_number"] or 0, grade=h["grade"],
- matn_text=h["matn_text"],
- ) for h in hadiths],
- teachers=[NarratorSummary(**t) for t in teachers],
- students=[NarratorSummary(**s) for s in students],
- places=[PlaceRelation(**p) for p in places],
- tribes=[t["tribe"] for t in tribes_rows],
+ name_arabic=n.get("name_arabic", actual_name),
+ name_transliterated=n.get("name_transliterated", ""),
+ entity_type=n.get("entity_type", ""),
+ full_nasab=n.get("full_nasab"),
+ kunya=n.get("kunya"),
+ nisba=n.get("nisba"),
+ laqab=n.get("laqab"),
+ generation=n.get("generation"),
+ reliability_grade=n.get("reliability_grade"),
+ reliability_detail=n.get("reliability_detail"),
+ birth_year_hijri=n.get("birth_year_hijri"),
+ death_year_hijri=n.get("death_year_hijri"),
+ birth_year_ce=n.get("birth_year_ce"),
+ death_year_ce=n.get("death_year_ce"),
+ biography_summary_arabic=n.get("biography_summary_arabic"),
+ biography_summary_english=n.get("biography_summary_english"),
+ total_hadiths_narrated_approx=n.get("total_hadiths_narrated_approx"),
+ hadith_count=stats.get("hadith_count", 0),
+ hadiths=[HadithSummary(**r) for r in hadith_rows],
+ teachers=teachers,
+ students=students,
+ name_forms=[NameForm(**r) for r in name_form_rows],
+ family=family,
+ places=[PlaceRelation(**r) for r in places_rows],
+ tribes=[r["name"] for r in tribe_rows],
+ bio_verified=n.get("bio_verified", False),
)
-@router.get("/by-generation/{generation}", response_model=list[NarratorSummary])
+# ── Narrators by generation (paginated, normalized) ────────────────────────
+
+@router.get("/by-generation/{generation}", response_model=PaginatedResponse)
async def narrators_by_generation(
- generation: str,
- limit: int = Query(50, ge=1, le=200),
+ generation: str = Path(..., description="Generation: صحابي, تابعي, تابع التابعين, نبي"),
+ page: int = Query(1, ge=1),
+ per_page: int = Query(20, ge=1, le=100),
):
- """List narrators by generation (صحابي, تابعي, etc.)."""
+ """List narrators by generation (e.g. Companions, Successors)."""
+ q_norm = normalize_query(generation)
+ skip = (page - 1) * per_page
+
+ total = db.neo4j_count("""
+ MATCH (n:Narrator)
+ WHERE toLower(n.generation) CONTAINS toLower($gen)
+ RETURN count(n) AS count
+ """, {"gen": q_norm})
+
rows = db.neo4j_query("""
MATCH (n:Narrator)
- WHERE n.generation CONTAINS $gen
+ WHERE toLower(n.generation) CONTAINS toLower($gen)
OPTIONAL MATCH (n)-[:APPEARS_IN]->(h:Hadith)
RETURN n.name_arabic AS name_arabic,
n.name_transliterated AS name_transliterated,
n.entity_type AS entity_type,
n.generation AS generation,
n.reliability_grade AS reliability_grade,
- count(h) AS hadith_count
+ count(DISTINCT h) AS hadith_count
ORDER BY hadith_count DESC
- LIMIT $limit
- """, {"gen": generation, "limit": limit})
+ SKIP $skip LIMIT $limit
+ """, {"gen": q_norm, "skip": skip, "limit": per_page})
- return [NarratorSummary(**r) for r in rows]
+ data = [NarratorSummary(**r) for r in rows]
+ return PaginatedResponse(data=data, meta=_paginate(total, page, per_page))
-@router.get("/by-place/{place_name}", response_model=list[NarratorSummary])
+# ── Narrators by place (paginated, normalized) ─────────────────────────────
+
+@router.get("/by-place/{place_name}", response_model=PaginatedResponse)
async def narrators_by_place(
- place_name: str,
- limit: int = Query(50, ge=1, le=200),
+ place_name: str = Path(..., description="Place name in Arabic (e.g. مكة)"),
+ page: int = Query(1, ge=1),
+ per_page: int = Query(50, ge=1, le=100),
):
- """Find narrators associated with a place."""
+ """
+ Narrators associated with a place (born, lived, died, traveled).
+ Input is normalized — مكة المكرمة matches مكه المكرمه.
+ """
+ q_norm = normalize_query(place_name)
+ skip = (page - 1) * per_page
+
+ total = db.neo4j_count("""
+ MATCH (n:Narrator)-[r]->(p:Place)
+ WHERE type(r) IN ['BORN_IN', 'LIVED_IN', 'DIED_IN', 'TRAVELED_TO']
+ AND toLower(p.name_arabic) CONTAINS toLower($place)
+ RETURN count(DISTINCT n) AS count
+ """, {"place": q_norm})
+
rows = db.neo4j_query("""
- MATCH (n:Narrator)-[:BORN_IN|LIVED_IN|DIED_IN|TRAVELED_TO]->(p:Place)
- WHERE p.name_arabic CONTAINS $place
+ MATCH (n:Narrator)-[r]->(p:Place)
+ WHERE type(r) IN ['BORN_IN', 'LIVED_IN', 'DIED_IN', 'TRAVELED_TO']
+ AND toLower(p.name_arabic) CONTAINS toLower($place)
OPTIONAL MATCH (n)-[:APPEARS_IN]->(h:Hadith)
RETURN DISTINCT n.name_arabic AS name_arabic,
n.name_transliterated AS name_transliterated,
n.entity_type AS entity_type,
n.generation AS generation,
n.reliability_grade AS reliability_grade,
- count(h) AS hadith_count
+ count(DISTINCT h) AS hadith_count
ORDER BY hadith_count DESC
- LIMIT $limit
- """, {"place": place_name, "limit": limit})
+ SKIP $skip LIMIT $limit
+ """, {"place": q_norm, "skip": skip, "limit": per_page})
- return [NarratorSummary(**r) for r in rows]
+ data = [NarratorSummary(**r) for r in rows]
+ return PaginatedResponse(data=data, meta=_paginate(total, page, per_page))
-@router.get("/interactions/{name_arabic}", response_model=list[NarratorInteraction],
- summary="Get all narrator interactions",
- description="Lists all relationships for a narrator: who they narrated from, "
- "who narrated from them, their teachers, and their students. "
- "Each interaction includes shared hadith count. "
- "Example: `/narrators/interactions/الزهري`")
-async def get_interactions(
- name_arabic: str,
- limit: int = Query(50, ge=1, le=200, description="Maximum interactions to return"),
+# ── Narrator interactions ──────────────────────────────────────────────────
+
+@router.get("/interactions/{name_arabic}", response_model=list[NarratorInteraction])
+async def narrator_interactions(
+ name_arabic: str = Path(..., description="Narrator Arabic name"),
+ limit: int = Query(50, ge=1, le=200),
):
- """
- Get all interactions of a narrator — who they narrated from,
- who narrated from them, teachers, students.
- """
+ """All direct relationships for a narrator — who they narrated from/to."""
+ q_norm = normalize_name(name_arabic)
+
rows = db.neo4j_query("""
- MATCH (n:Narrator {name_arabic: $name})
- OPTIONAL MATCH (n)-[r1:NARRATED_FROM]->(other1:Narrator)
- WITH n, collect(DISTINCT {
- narrator_b: other1.name_arabic,
- narrator_b_trans: other1.name_transliterated,
- type: 'NARRATED_FROM',
- hadith_ids: r1.hadith_ids
- }) AS outgoing
- OPTIONAL MATCH (other2:Narrator)-[r2:NARRATED_FROM]->(n)
- WITH n, outgoing, collect(DISTINCT {
- narrator_b: other2.name_arabic,
- narrator_b_trans: other2.name_transliterated,
- type: 'HEARD_BY',
- hadith_ids: r2.hadith_ids
- }) AS incoming
- OPTIONAL MATCH (teacher:Narrator)-[r3:TEACHER_OF]->(n)
- WITH n, outgoing, incoming, collect(DISTINCT {
- narrator_b: teacher.name_arabic,
- narrator_b_trans: teacher.name_transliterated,
- type: 'TEACHER_OF',
- hadith_ids: []
- }) AS teacher_rels
- OPTIONAL MATCH (n)-[r4:TEACHER_OF]->(student:Narrator)
- WITH n, outgoing, incoming, teacher_rels, collect(DISTINCT {
- narrator_b: student.name_arabic,
- narrator_b_trans: student.name_transliterated,
- type: 'STUDENT_OF',
- hadith_ids: []
- }) AS student_rels
- RETURN n.name_arabic AS narrator_a,
- n.name_transliterated AS narrator_a_trans,
- outgoing + incoming + teacher_rels + student_rels AS interactions
- """, {"name": name_arabic})
-
- if not rows:
- raise HTTPException(status_code=404, detail="Narrator not found")
-
- result = []
- row = rows[0]
- for interaction in row["interactions"]:
- if not interaction.get("narrator_b"):
- continue
- hadith_ids = interaction.get("hadith_ids") or []
- result.append(NarratorInteraction(
- narrator_a=row["narrator_a"],
- narrator_a_transliterated=row.get("narrator_a_trans") or "",
- narrator_b=interaction["narrator_b"],
- narrator_b_transliterated=interaction.get("narrator_b_trans") or "",
- relationship_type=interaction["type"],
- shared_hadith_count=len(hadith_ids),
- hadith_ids=[str(h) for h in hadith_ids[:20]],
- ))
-
- return result[:limit]
-
-
-@router.get("/who-met-who", response_model=list[NarratorInteraction],
- summary="Check if two narrators are connected",
- description="Finds the shortest path between two narrators in the knowledge graph. "
- "Reveals whether they had a direct or indirect relationship through "
- "narration chains, teacher/student bonds, or shared connections. "
- "Example: `/narrators/who-met-who?narrator_a=الزهري&narrator_b=أنس بن مالك`")
-async def who_met_who(
- narrator_a: str = Query(
- ..., description="First narrator name (Arabic). Example: الزهري",
- examples=["الزهري", "أبو هريرة"],
- ),
- narrator_b: str = Query(
- ..., description="Second narrator name (Arabic). Example: أنس بن مالك",
- examples=["أنس بن مالك", "عمر بن الخطاب"],
- ),
-):
- """
- Check if two narrators had a relationship — did they meet,
- narrate from each other, or share a teacher/student bond?
- """
- rows = db.neo4j_query("""
- MATCH (a:Narrator), (b:Narrator)
- WHERE a.name_arabic CONTAINS $name_a
- AND b.name_arabic CONTAINS $name_b
- OPTIONAL MATCH path = shortestPath((a)-[*..6]-(b))
- WITH a, b, path,
- [r IN relationships(path) | {
- type: type(r),
- from: startNode(r).name_arabic,
- from_trans: startNode(r).name_transliterated,
- to: endNode(r).name_arabic,
- to_trans: endNode(r).name_transliterated
- }] AS rels
+ MATCH (a:Narrator)-[r]-(b:Narrator)
+ WHERE toLower(a.name_arabic) CONTAINS toLower($name)
+ AND type(r) IN ['NARRATED_FROM', 'TEACHER_OF']
+ WITH a, b, type(r) AS rel_type
+ OPTIONAL MATCH (a)-[:APPEARS_IN]->(h:Hadith)<-[:APPEARS_IN]-(b)
RETURN a.name_arabic AS narrator_a,
- a.name_transliterated AS narrator_a_trans,
+ a.name_transliterated AS narrator_a_transliterated,
b.name_arabic AS narrator_b,
- b.name_transliterated AS narrator_b_trans,
- length(path) AS distance,
- rels
- """, {"name_a": narrator_a, "name_b": narrator_b})
+ b.name_transliterated AS narrator_b_transliterated,
+ rel_type AS relationship_type,
+ count(DISTINCT h) AS shared_hadith_count,
+ collect(DISTINCT h.id)[..20] AS hadith_ids
+ ORDER BY shared_hadith_count DESC
+ LIMIT $limit
+ """, {"name": q_norm, "limit": limit})
- if not rows or rows[0].get("distance") is None:
- return []
+ return [NarratorInteraction(**r) for r in rows]
- row = rows[0]
- return [NarratorInteraction(
- narrator_a=rel["from"],
- narrator_a_transliterated=rel.get("from_trans") or "",
- narrator_b=rel["to"],
- narrator_b_transliterated=rel.get("to_trans") or "",
- relationship_type=rel["type"],
- ) for rel in (row.get("rels") or [])]
+
+# ── Narrator network (graph visualization) ─────────────────────────────────
+
+@router.get("/network/{name_arabic}", response_model=NarratorNetwork)
+async def narrator_network(
+ name_arabic: str = Path(..., description="Narrator Arabic name"),
+ limit: int = Query(50, ge=1, le=200),
+):
+ """
+ Get a narrator's connection network — all incoming/outgoing relationships.
+ Useful for network visualization.
+ """
+ q_norm = normalize_name(name_arabic)
+
+ # Center narrator
+ center_row = db.neo4j_query_one("""
+ MATCH (n:Narrator)
+ WHERE toLower(n.name_arabic) CONTAINS toLower($name)
+ OPTIONAL MATCH (n)-[:APPEARS_IN]->(h:Hadith)
+ RETURN n.name_arabic AS name_arabic,
+ n.name_transliterated AS name_transliterated,
+ n.entity_type AS entity_type,
+ n.generation AS generation,
+ n.reliability_grade AS reliability_grade,
+ count(DISTINCT h) AS hadith_count
+ """, {"name": q_norm})
+
+ if not center_row:
+ raise HTTPException(status_code=404, detail=f"Narrator not found: {name_arabic}")
+
+ # Connections
+ conn_rows = db.neo4j_query("""
+ MATCH (a:Narrator)-[r]-(b:Narrator)
+ WHERE toLower(a.name_arabic) CONTAINS toLower($name)
+ AND type(r) IN ['NARRATED_FROM', 'TEACHER_OF']
+ RETURN b.name_arabic AS narrator,
+ b.name_transliterated AS narrator_transliterated,
+ type(r) AS connection_type,
+ CASE WHEN startNode(r) = a THEN 'outgoing' ELSE 'incoming' END AS direction
+ LIMIT $limit
+ """, {"name": q_norm, "limit": limit})
+
+ return NarratorNetwork(
+ center=NarratorSummary(**center_row),
+ connections=[NarratorConnection(**r) for r in conn_rows],
+ total_connections=len(conn_rows),
+ )
+
+
+# ── Who met who (shortest path) ────────────────────────────────────────────
+
+@router.get("/who-met-who", response_model=WhoMetWhoResult)
+async def who_met_who(
+ narrator_a: str = Query(..., description="First narrator (Arabic)"),
+ narrator_b: str = Query(..., description="Second narrator (Arabic)"),
+):
+ """
+ Shortest path between two narrators in the knowledge graph.
+ Useful to see how a narrator connects to the Prophet ﷺ.
+ """
+ a_norm = normalize_name(narrator_a)
+ b_norm = normalize_name(narrator_b)
+
+ row = db.neo4j_query_one("""
+ MATCH (a:Narrator), (b:Narrator)
+ WHERE toLower(a.name_arabic) CONTAINS toLower($a)
+ AND toLower(b.name_arabic) CONTAINS toLower($b)
+ WITH a, b LIMIT 1
+ MATCH path = shortestPath((a)-[*..10]-(b))
+ RETURN [n IN nodes(path) |
+ {name_arabic: n.name_arabic,
+ name_transliterated: n.name_transliterated,
+ generation: n.generation}] AS path_nodes,
+ [r IN relationships(path) | type(r)] AS rel_types,
+ length(path) AS path_length
+ """, {"a": a_norm, "b": b_norm})
+
+ if not row:
+ raise HTTPException(
+ status_code=404,
+ detail=f"No path found between '{narrator_a}' and '{narrator_b}'",
+ )
+
+ return WhoMetWhoResult(
+ narrator_a=narrator_a,
+ narrator_b=narrator_b,
+ path=[PathNode(**n) for n in (row.get("path_nodes") or [])],
+ path_length=row.get("path_length"),
+ relationship_types=row.get("rel_types", []),
+ )
diff --git a/app/routers/search.py b/app/routers/search.py
index 3155ab2..e168adb 100644
--- a/app/routers/search.py
+++ b/app/routers/search.py
@@ -6,12 +6,16 @@ from typing import Optional
from app.services.database import db
from app.config import get_settings
-from app.models.schemas import SemanticSearchResult, FullTextSearchResult, HadithSummary
+from app.utils.arabic import normalize_query
+from app.models.schemas import (
+ SemanticSearchResult, FullTextSearchResult, CombinedSearchResult,
+ HadithSummary,
+)
router = APIRouter(prefix="/search", tags=["Search"])
-async def get_embedding(text: str) -> list[float]:
+async def _get_embedding(text: str) -> list[float]:
"""Get embedding vector from TEI (BGE-M3)."""
settings = get_settings()
response = await db.http_client.post(
@@ -22,7 +26,6 @@ async def get_embedding(text: str) -> list[float]:
raise HTTPException(status_code=502, detail=f"TEI embedding failed: {response.text}")
embeddings = response.json()
- # TEI returns list of embeddings; we sent one input
if isinstance(embeddings, list) and len(embeddings) > 0:
if isinstance(embeddings[0], list):
return embeddings[0]
@@ -30,34 +33,25 @@ async def get_embedding(text: str) -> list[float]:
raise HTTPException(status_code=502, detail="Unexpected TEI response format")
-@router.get("/semantic", response_model=list[SemanticSearchResult],
- summary="Semantic search (find by meaning)",
- description="Search hadiths by meaning using BGE-M3 multilingual embeddings + Qdrant. "
- "Supports cross-language queries: search in English and find Arabic hadiths, or vice versa. "
- "Example: `what did the prophet say about fasting` → finds Arabic hadiths about صيام")
+# ── Semantic search ─────────────────────────────────────────────────────────
+
+@router.get("/semantic", response_model=list[SemanticSearchResult])
async def semantic_search(
- q: str = Query(
- ..., min_length=2,
- description="Search query in any language. The embedding model handles Arabic, English, and Urdu.",
- examples=["what is the reward of prayer", "أحاديث عن الصيام", "حكم الربا"],
- ),
- collection: Optional[str] = Query(
- None,
- description="Filter by collection name. Example: Sahih Bukhari",
- ),
- limit: int = Query(10, ge=1, le=50, description="Number of results (max 50)"),
+ q: str = Query(..., min_length=2, description="Search query (any language — Arabic, English, etc.)"),
+ collection: Optional[str] = Query(None, description="Filter by collection name"),
+ limit: int = Query(10, ge=1, le=50),
):
"""
Semantic search — find hadiths by meaning, not just keywords.
- Supports Arabic, English, and cross-language queries.
+ Supports cross-language queries (English query → Arabic results).
Uses BGE-M3 embeddings + Qdrant vector search.
"""
+ if not db.qdrant_available():
+ raise HTTPException(status_code=503, detail="Qdrant unavailable")
+
settings = get_settings()
+ query_vector = await _get_embedding(q)
- # Get query embedding from TEI
- query_vector = await get_embedding(q)
-
- # Build Qdrant filter if collection specified
query_filter = None
if collection:
from qdrant_client.models import Filter, FieldCondition, MatchValue
@@ -65,7 +59,6 @@ async def semantic_search(
must=[FieldCondition(key="collection", match=MatchValue(value=collection))]
)
- # Search Qdrant
results = db.qdrant.search(
collection_name=settings.qdrant_collection,
query_vector=query_vector,
@@ -80,8 +73,8 @@ async def semantic_search(
output.append(SemanticSearchResult(
hadith=HadithSummary(
id=str(payload.get("id", hit.id)),
- collection=payload.get("collection", ""),
- hadith_number=payload.get("hadith_number", 0),
+ collection=payload.get("collection"),
+ hadith_number=payload.get("hadith_number"),
grade=payload.get("grade"),
arabic_text=(payload.get("arabic_text") or "")[:300],
),
@@ -92,74 +85,53 @@ async def semantic_search(
return output
-@router.get("/fulltext", response_model=list[FullTextSearchResult],
- summary="Full-text Arabic search",
- description="Keyword search using Elasticsearch with Arabic morphological analysis (stemming, root extraction). "
- "Returns highlighted text fragments showing where matches occurred. "
- "Handles both vocalized (الصَّلاة) and unvocalized (الصلاة) Arabic.")
+# ── Full-text Arabic search ─────────────────────────────────────────────────
+
+@router.get("/fulltext", response_model=list[FullTextSearchResult])
async def fulltext_search(
- q: str = Query(
- ..., min_length=2,
- description="Arabic text search query. Examples: الصلاة (prayer), النكاح (marriage), الجهاد (jihad)",
- examples=["الصلاة", "صيام رمضان", "بيع وشراء"],
- ),
- collection: Optional[str] = Query(
- None,
- description="Filter by collection. Example: Sahih Muslim",
- ),
- limit: int = Query(10, ge=1, le=50, description="Number of results (max 50)"),
+ q: str = Query(..., min_length=2, description="Arabic text search query"),
+ collection: Optional[str] = Query(None, description="Filter by collection"),
+ limit: int = Query(10, ge=1, le=50),
):
"""
Full-text Arabic search using Elasticsearch.
- Supports Arabic morphological analysis.
+ Supports Arabic morphological analysis (root-based matching).
"""
+ if not db.es_available():
+ raise HTTPException(status_code=503, detail="Elasticsearch unavailable")
+
settings = get_settings()
- # Build ES query
- must = [
- {
- "multi_match": {
- "query": q,
- "fields": ["arabic_text^3", "arabic_normalized^2", "matn", "sanad"],
- "type": "best_fields",
- "analyzer": "arabic",
- }
- }
- ]
+ must = [{"multi_match": {
+ "query": q,
+ "fields": ["arabic_text^3", "english_text", "urdu_text"],
+ "type": "best_fields",
+ "analyzer": "arabic",
+ }}]
if collection:
- must.append({"match": {"collection_name": collection}})
+ must.append({"match": {"collection": collection}})
body = {
"query": {"bool": {"must": must}},
"highlight": {
- "fields": {
- "arabic_text": {"fragment_size": 200, "number_of_fragments": 2},
- "matn": {"fragment_size": 200, "number_of_fragments": 1},
- }
+ "fields": {"arabic_text": {"fragment_size": 200, "number_of_fragments": 3}},
},
"size": limit,
}
- try:
- response = db.es.search(index=settings.es_index, body=body)
- except Exception as e:
- # ES index might not exist yet
- raise HTTPException(status_code=503, detail=f"Elasticsearch error: {str(e)}")
+ resp = db.es.search(index=settings.es_index, body=body)
+ hits = resp.get("hits", {}).get("hits", [])
output = []
- for hit in response["hits"]["hits"]:
+ for hit in hits:
src = hit["_source"]
- highlights = []
- if "highlight" in hit:
- for field_highlights in hit["highlight"].values():
- highlights.extend(field_highlights)
-
+ highlights = hit.get("highlight", {}).get("arabic_text", [])
output.append(FullTextSearchResult(
hadith=HadithSummary(
id=str(src.get("id", hit["_id"])),
- collection=src.get("collection_name", ""),
- hadith_number=src.get("hadith_number", 0),
+ collection=src.get("collection"),
+ hadith_number=src.get("hadith_number"),
grade=src.get("grade"),
arabic_text=(src.get("arabic_text") or "")[:300],
),
@@ -170,38 +142,55 @@ async def fulltext_search(
return output
-@router.get("/combined", response_model=dict,
- summary="Combined search (semantic + full-text)",
- description="Runs both semantic and full-text search in parallel and returns merged results. "
- "Best for the mobile app search bar — gives both meaning-based and keyword-based results. "
- "Returns `{semantic: [...], fulltext: [...], query: '...'}`")
+# ── Combined search (semantic + fulltext) ───────────────────────────────────
+
+@router.get("/combined", response_model=list[CombinedSearchResult])
async def combined_search(
- q: str = Query(
- ..., min_length=2,
- description="Search query. Works with Arabic keywords or natural language in any language.",
- examples=["الصلاة في وقتها", "hadith about charity"],
- ),
- collection: Optional[str] = Query(None, description="Filter by collection name"),
- limit: int = Query(10, ge=1, le=20, description="Results per search type (max 20)"),
+ q: str = Query(..., min_length=2, description="Search query"),
+ collection: Optional[str] = Query(None),
+ limit: int = Query(10, ge=1, le=50),
+ semantic_weight: float = Query(0.6, ge=0, le=1, description="Weight for semantic score (0-1)"),
):
- """
- Combined search — runs both semantic and full-text in parallel,
- returns merged results. Best for the mobile app search bar.
- """
- import asyncio
+ """Combined semantic + full-text search. Results merged and ranked by weighted score."""
+ results_map: dict[str, CombinedSearchResult] = {}
- semantic_task = semantic_search(q=q, collection=collection, limit=limit)
- # Full-text only makes sense for Arabic queries
- fulltext_task = fulltext_search(q=q, collection=collection, limit=limit)
+ # Semantic
+ if db.qdrant_available():
+ try:
+ sem_results = await semantic_search(q=q, collection=collection, limit=limit)
+ for sr in sem_results:
+ hid = sr.hadith.id
+ results_map[hid] = CombinedSearchResult(
+ hadith=sr.hadith,
+ semantic_score=sr.score,
+ combined_score=sr.score * semantic_weight,
+ source="semantic",
+ )
+ except Exception:
+ pass
- semantic_results, fulltext_results = await asyncio.gather(
- semantic_task,
- fulltext_task,
- return_exceptions=True,
- )
+ # Full-text
+ if db.es_available():
+ try:
+ ft_results = await fulltext_search(q=q, collection=collection, limit=limit)
+ ft_weight = 1.0 - semantic_weight
+ for fr in ft_results:
+ hid = fr.hadith.id
+ norm_score = min(fr.score / 20.0, 1.0)
+ if hid in results_map:
+ existing = results_map[hid]
+ existing.fulltext_score = norm_score
+ existing.combined_score += norm_score * ft_weight
+ existing.source = "both"
+ else:
+ results_map[hid] = CombinedSearchResult(
+ hadith=fr.hadith,
+ fulltext_score=norm_score,
+ combined_score=norm_score * ft_weight,
+ source="fulltext",
+ )
+ except Exception:
+ pass
- return {
- "semantic": semantic_results if not isinstance(semantic_results, Exception) else [],
- "fulltext": fulltext_results if not isinstance(fulltext_results, Exception) else [],
- "query": q,
- }
+ results = sorted(results_map.values(), key=lambda x: x.combined_score, reverse=True)
+ return results[:limit]
diff --git a/app/services/database.py b/app/services/database.py
index 1c5ad62..d2a33bc 100644
--- a/app/services/database.py
+++ b/app/services/database.py
@@ -1,6 +1,7 @@
"""
-Database connection manager — initializes and provides access to
-PostgreSQL, Neo4j, Qdrant, and Elasticsearch clients.
+Database connections — PostgreSQL, Neo4j, Qdrant, Elasticsearch, TEI.
+Resilient startup: each backend wrapped in try/except so the app
+starts even if some services are temporarily unavailable.
"""
import psycopg2
import psycopg2.pool
@@ -53,7 +54,7 @@ class Database:
except Exception as e:
print(f"⚠️ Neo4j failed: {e}")
- # Qdrant
+ # Qdrant (URL-based connection — matches the working k8s setup)
try:
self.qdrant = QdrantClient(
url=f"http://{settings.qdrant_host}:{settings.qdrant_port}",
@@ -92,13 +93,6 @@ class Database:
# ── PostgreSQL helpers ──
- def get_pg(self):
- conn = self.pg_pool.getconn()
- try:
- yield conn
- finally:
- self.pg_pool.putconn(conn)
-
def pg_query(self, query: str, params: tuple = None) -> list[dict]:
conn = self.pg_pool.getconn()
try:
@@ -112,9 +106,16 @@ class Database:
rows = self.pg_query(query, params)
return rows[0] if rows else None
+ def pg_count(self, query: str, params: tuple = None) -> int:
+ """Execute a SELECT count(*) query and return the integer."""
+ row = self.pg_query_one(query, params)
+ return row.get("count", 0) if row else 0
+
# ── Neo4j helpers ──
def neo4j_query(self, query: str, params: dict = None) -> list[dict]:
+ if not self.neo4j_driver:
+ return []
with self.neo4j_driver.session() as session:
result = session.run(query, params or {})
return [dict(record) for record in result]
@@ -123,6 +124,25 @@ class Database:
rows = self.neo4j_query(query, params)
return rows[0] if rows else None
+ def neo4j_count(self, query: str, params: dict = None) -> int:
+ """Execute a RETURN count(...) AS count query and return the integer."""
+ row = self.neo4j_query_one(query, params)
+ return row.get("count", 0) if row else 0
+
+ # ── Service availability checks ──
+
+ def pg_available(self) -> bool:
+ return self.pg_pool is not None
+
+ def neo4j_available(self) -> bool:
+ return self.neo4j_driver is not None
+
+ def qdrant_available(self) -> bool:
+ return self.qdrant is not None
+
+ def es_available(self) -> bool:
+ return self.es is not None
+
# Global instance
db = Database()