Refactor narrator endpoints for improved search and profile retrieval

- Enhanced search functionality with pagination and normalization for Arabic input.
- Updated profile retrieval to include comprehensive data about narrators, including hadith counts, teachers, students, and family information.
- Introduced new endpoints for narrator interactions and network visualization.
- Improved error handling and response structures across endpoints.
- Added utility functions for database queries and service availability checks.
- Refactored search module to support combined semantic and full-text search with weighted scoring.
This commit is contained in:
salah 2026-03-02 21:51:04 +01:00
parent abb091685e
commit 9aa76cddaf
6 changed files with 831 additions and 663 deletions

View File

@ -1,14 +1,23 @@
"""
Pydantic response models for the Hadith Scholar API.
v2.0 changes:
- All fields that Neo4j/PG can return as null are now Optional with defaults.
- Added PaginationMeta / PaginatedResponse for paginated list endpoints.
- All existing model_config / json_schema_extra examples preserved.
"""
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from typing import Optional from typing import Optional
from datetime import datetime
# ── Common ───────────────────────────────────────────────────────────────── # ── Pagination (NEW in v2.0) ───────────────────────────────────────────────
class PaginationMeta(BaseModel): class PaginationMeta(BaseModel):
total: int total: int = Field(description="Total matching items")
page: int page: int = Field(description="Current page (1-indexed)")
per_page: int per_page: int = Field(description="Items per page")
pages: int pages: int = Field(description="Total pages")
model_config = { model_config = {
"json_schema_extra": { "json_schema_extra": {
@ -26,12 +35,12 @@ class PaginatedResponse(BaseModel):
class HadithSummary(BaseModel): class HadithSummary(BaseModel):
id: str = Field(description="Unique hadith UUID") id: str = Field(description="Unique hadith UUID")
collection: str = Field(description="Collection name in English") collection: Optional[str] = Field(None, description="Collection name in English")
hadith_number: int = Field(description="Hadith number within collection") hadith_number: Optional[int] = Field(None, description="Hadith number within collection")
grade: Optional[str] = Field(None, description="Grading: Sahih, Hasan, Da'if, etc.") grade: Optional[str] = Field(None, description="Grading: Sahih, Hasan, Da'if, etc.")
arabic_text: Optional[str] = Field(None, description="Full Arabic text (may be truncated in list views)") arabic_text: Optional[str] = Field(None, description="Arabic text (truncated in lists)")
matn_text: Optional[str] = Field(None, description="Body text only (without isnad)") sanad_text: Optional[str] = Field(None, description="Sanad (chain) text only")
sanad_text: Optional[str] = Field(None, description="Chain of narration text only") matn_text: Optional[str] = Field(None, description="Matn (body) text only")
model_config = { model_config = {
"json_schema_extra": { "json_schema_extra": {
@ -40,36 +49,28 @@ class HadithSummary(BaseModel):
"collection": "Sahih Bukhari", "collection": "Sahih Bukhari",
"hadith_number": 1, "hadith_number": 1,
"grade": "Sahih", "grade": "Sahih",
"arabic_text": "حَدَّثَنَا الْحُمَيْدِيُّ عَبْدُ اللَّهِ بْنُ الزُّبَيْرِ قَالَ حَدَّثَنَا سُفْيَانُ...", "arabic_text": "حَدَّثَنَا الْحُمَيْدِيُّ عَبْدُ اللَّهِ بْنُ الزُّبَيْرِ...",
"matn_text": "إِنَّمَا الأَعْمَالُ بِالنِّيَّاتِ وَإِنَّمَا لِكُلِّ امْرِئٍ مَا نَوَى...", "sanad_text": "حَدَّثَنَا الْحُمَيْدِيُّ...",
"sanad_text": "حَدَّثَنَا الْحُمَيْدِيُّ عَبْدُ اللَّهِ بْنُ الزُّبَيْرِ قَالَ حَدَّثَنَا سُفْيَانُ قَالَ حَدَّثَنَا يَحْيَى بْنُ سَعِيدٍ الأَنْصَارِيُّ" "matn_text": "إِنَّمَا الأَعْمَالُ بِالنِّيَّاتِ...",
}] }]
} }
} }
class TopicTag(BaseModel): class TopicTag(BaseModel):
topic_arabic: str = Field(description="Topic name in Arabic, e.g. الصلاة") topic_arabic: str = Field("", description="Topic name in Arabic")
topic_english: str = Field(description="Topic name in English, e.g. Prayer") topic_english: str = Field("", description="Topic name in English")
category: str = Field(description="Broad Islamic category: عقيدة، فقه، سيرة، أخلاق، تفسير") category: str = Field("", description="Topic category (فقه, عقيدة, سيرة, etc.)")
model_config = {
"json_schema_extra": {
"examples": [{
"topic_arabic": "النية",
"topic_english": "Intention",
"category": "فقه"
}]
}
}
class NarratorInChain(BaseModel): class NarratorInChain(BaseModel):
order: int = Field(description="Position in chain: 1=closest to compiler, last=closest to Prophet ﷺ") order: Optional[int] = Field(None, description="Position in chain (1 = compiler-end)")
name_arabic: str = Field(description="Narrator's Arabic name as it appears in the hadith text") name_arabic: str = Field(description="Narrator Arabic name")
name_transliterated: Optional[str] = Field(None, description="Latin transliteration of the name") name_transliterated: str = Field("", description="Latin transliteration")
entity_type: Optional[str] = Field(None, description="PERSON, KUNYA (أبو/أم), NISBA (attributional), or TITLE (رسول الله)") entity_type: str = Field("", description="PERSON, KUNYA, NISBA, TITLE")
transmission_verb: Optional[str] = Field(None, description="Exact Arabic transmission verb: حدثنا، أخبرنا، عن، سمعت") transmission_verb: Optional[str] = Field(None, description="حدثنا, أخبرنا, عن, سمعت, etc.")
generation: Optional[str] = Field(None, description="صحابي, تابعي, etc.")
reliability_grade: Optional[str] = Field(None, description="ثقة, صدوق, ضعيف, etc.")
model_config = { model_config = {
"json_schema_extra": { "json_schema_extra": {
@ -78,7 +79,9 @@ class NarratorInChain(BaseModel):
"name_arabic": "الْحُمَيْدِيُّ", "name_arabic": "الْحُمَيْدِيُّ",
"name_transliterated": "al-Humaydi", "name_transliterated": "al-Humaydi",
"entity_type": "NISBA", "entity_type": "NISBA",
"transmission_verb": "حَدَّثَنَا" "transmission_verb": "حَدَّثَنَا",
"generation": "تابع التابعين",
"reliability_grade": "ثقة",
}] }]
} }
} }
@ -86,11 +89,14 @@ class NarratorInChain(BaseModel):
class HadithDetail(BaseModel): class HadithDetail(BaseModel):
id: str = Field(description="Unique hadith UUID") id: str = Field(description="Unique hadith UUID")
collection: str = Field(description="Collection English name") collection: Optional[str] = Field(None, description="Collection name")
hadith_number: int = Field(description="Number within collection") hadith_number: Optional[int] = Field(None, description="Hadith number")
grade: Optional[str] = Field(None, description="Hadith grade") book_number: Optional[int] = Field(None, description="Book number within collection")
arabic_text: Optional[str] = Field(None, description="Complete Arabic text") grade: Optional[str] = Field(None, description="Grading")
sanad_text: Optional[str] = Field(None, description="Isnad (chain) text only") arabic_text: Optional[str] = Field(None, description="Full Arabic text")
english_text: Optional[str] = Field(None, description="English translation")
urdu_text: Optional[str] = Field(None, description="Urdu translation")
sanad_text: Optional[str] = Field(None, description="Sanad (chain) text only")
matn_text: Optional[str] = Field(None, description="Matn (body) text only") matn_text: Optional[str] = Field(None, description="Matn (body) text only")
narrator_chain: list[NarratorInChain] = Field(default_factory=list, description="Ordered narrator chain from Neo4j graph") narrator_chain: list[NarratorInChain] = Field(default_factory=list, description="Ordered narrator chain from Neo4j graph")
topics: list[TopicTag] = Field(default_factory=list, description="Topic tags for searchability") topics: list[TopicTag] = Field(default_factory=list, description="Topic tags for searchability")
@ -121,10 +127,10 @@ class HadithDetail(BaseModel):
class NarratorSummary(BaseModel): class NarratorSummary(BaseModel):
name_arabic: str = Field(description="Primary Arabic name") name_arabic: str = Field(description="Primary Arabic name")
name_transliterated: Optional[str] = Field(None, description="Latin transliteration") name_transliterated: str = Field("", description="Latin transliteration")
entity_type: Optional[str] = Field(None, description="PERSON, KUNYA, NISBA, TITLE") entity_type: str = Field("", description="PERSON, KUNYA, NISBA, TITLE")
generation: Optional[str] = Field(None, description="طبقة: صحابي، تابعي، تابع التابعين") generation: Optional[str] = Field(None, description="طبقة: صحابي، تابعي، تابع التابعين")
reliability_grade: Optional[str] = Field(None, description="جرح وتعديل: ثقة، صدوق، ضعيف، متروك") reliability_grade: Optional[str] = Field(None, description="جرح وتعديل grade: ثقة، صدوق، ضعيف")
hadith_count: int = Field(0, description="Number of hadiths this narrator appears in") hadith_count: int = Field(0, description="Number of hadiths this narrator appears in")
model_config = { model_config = {
@ -142,6 +148,7 @@ class NarratorSummary(BaseModel):
class NameForm(BaseModel): class NameForm(BaseModel):
"""Alternative name forms for a narrator (kunya, nisba, laqab, etc.)."""
name: str = Field(description="Alternative name form") name: str = Field(description="Alternative name form")
type: str = Field(description="Name type: PERSON, KUNYA, NISBA, TITLE") type: str = Field(description="Name type: PERSON, KUNYA, NISBA, TITLE")
@ -165,9 +172,10 @@ class PlaceRelation(BaseModel):
class NarratorProfile(BaseModel): class NarratorProfile(BaseModel):
"""Complete narrator profile — the mobile app profile page."""
name_arabic: str = Field(description="Primary Arabic name") name_arabic: str = Field(description="Primary Arabic name")
name_transliterated: Optional[str] = Field(None, description="Latin transliteration") name_transliterated: str = Field("", description="Latin transliteration")
entity_type: Optional[str] = Field(None, description="PERSON, KUNYA, NISBA, TITLE") entity_type: str = Field("", description="PERSON, KUNYA, NISBA, TITLE")
full_nasab: Optional[str] = Field(None, description="Full lineage: فلان بن فلان بن فلان") full_nasab: Optional[str] = Field(None, description="Full lineage: فلان بن فلان بن فلان")
kunya: Optional[str] = Field(None, description="أبو/أم name (e.g. أبو هريرة)") kunya: Optional[str] = Field(None, description="أبو/أم name (e.g. أبو هريرة)")
nisba: Optional[str] = Field(None, description="Attributional name (e.g. البخاري، المدني، الزهري)") nisba: Optional[str] = Field(None, description="Attributional name (e.g. البخاري، المدني، الزهري)")
@ -186,6 +194,8 @@ class NarratorProfile(BaseModel):
hadiths: list[HadithSummary] = Field(default_factory=list, description="Sample hadiths narrated (max 50)") hadiths: list[HadithSummary] = Field(default_factory=list, description="Sample hadiths narrated (max 50)")
teachers: list[NarratorSummary] = Field(default_factory=list, description="Known teachers / شيوخ") teachers: list[NarratorSummary] = Field(default_factory=list, description="Known teachers / شيوخ")
students: list[NarratorSummary] = Field(default_factory=list, description="Known students / تلاميذ") students: list[NarratorSummary] = Field(default_factory=list, description="Known students / تلاميذ")
name_forms: list[NameForm] = Field(default_factory=list, description="Alternative name forms")
family: Optional[FamilyInfo] = Field(None, description="Family info if known")
places: list[PlaceRelation] = Field(default_factory=list, description="Associated places (born, lived, died, traveled)") places: list[PlaceRelation] = Field(default_factory=list, description="Associated places (born, lived, died, traveled)")
tribes: list[str] = Field(default_factory=list, description="Tribal affiliations (e.g. قريش، دوس، الأنصار)") tribes: list[str] = Field(default_factory=list, description="Tribal affiliations (e.g. قريش، دوس، الأنصار)")
bio_verified: bool = Field(False, description="Whether biography has been manually verified against classical sources") bio_verified: bool = Field(False, description="Whether biography has been manually verified against classical sources")
@ -208,12 +218,11 @@ class NarratorProfile(BaseModel):
"birth_year_ce": None, "birth_year_ce": None,
"death_year_ce": 676, "death_year_ce": 676,
"biography_summary_arabic": "أبو هريرة الدوسي، صحابي جليل، أكثر الصحابة رواية للحديث النبوي. أسلم عام خيبر ولازم النبي ﷺ.", "biography_summary_arabic": "أبو هريرة الدوسي، صحابي جليل، أكثر الصحابة رواية للحديث النبوي. أسلم عام خيبر ولازم النبي ﷺ.",
"biography_summary_english": "Abu Hurayrah al-Dawsi, a prominent Companion and the most prolific narrator of hadith. He accepted Islam during Khaybar and remained close to the Prophet ﷺ.", "biography_summary_english": "Abu Hurayrah al-Dawsi, a prominent Companion and the most prolific narrator of hadith.",
"total_hadiths_narrated_approx": 5374, "total_hadiths_narrated_approx": 5374,
"hadith_count": 142, "hadith_count": 142,
"hadiths": [], "teachers": [{"name_arabic": "النبي ﷺ", "name_transliterated": "Prophet Muhammad", "entity_type": "TITLE", "generation": "نبي", "reliability_grade": None, "hadith_count": 0}],
"teachers": [{"name_arabic": "رسول الله ﷺ", "name_transliterated": "Prophet Muhammad", "entity_type": "TITLE", "generation": None, "reliability_grade": None, "hadith_count": 0}], "students": [{"name_arabic": "الزهري", "name_transliterated": "al-Zuhri", "entity_type": "NISBA", "generation": "تابعي", "reliability_grade": "ثقة", "hadith_count": 0}],
"students": [{"name_arabic": "الزهري", "name_transliterated": "al-Zuhri", "entity_type": "NISBA", "generation": "تابعي", "reliability_grade": "ثقة", "hadith_count": 89}],
"places": [{"place": "المدينة", "relation": "LIVED_IN"}], "places": [{"place": "المدينة", "relation": "LIVED_IN"}],
"tribes": ["دوس"], "tribes": ["دوس"],
"bio_verified": False, "bio_verified": False,
@ -222,27 +231,27 @@ class NarratorProfile(BaseModel):
} }
# ── Isnad Chain ──────────────────────────────────────────────────────────── # ── Isnad Chain (D3-ready) ─────────────────────────────────────────────────
class IsnadNode(BaseModel): class IsnadNode(BaseModel):
name_arabic: str = Field(description="Narrator Arabic name") name_arabic: str = Field(description="Narrator Arabic name")
name_transliterated: Optional[str] = Field(None, description="Latin transliteration") name_transliterated: str = Field("", description="Latin transliteration")
entity_type: Optional[str] = Field(None, description="PERSON, KUNYA, NISBA, TITLE") entity_type: str = Field("", description="PERSON, KUNYA, NISBA, TITLE")
generation: Optional[str] = Field(None, description="طبقة") generation: Optional[str] = Field(None, description="صحابي, تابعي, etc.")
reliability_grade: Optional[str] = Field(None, description="جرح وتعديل grade") reliability_grade: Optional[str] = Field(None, description="ثقة, صدوق, ضعيف, etc.")
class IsnadLink(BaseModel): class IsnadLink(BaseModel):
source: str = Field(description="name_arabic of narrator who received the hadith") source: str = Field(description="name_arabic of narrator who heard")
target: str = Field(description="name_arabic of narrator they received it from") target: str = Field(description="name_arabic of narrator who transmitted")
transmission_verb: Optional[str] = Field(None, description="Exact verb: حدثنا، أخبرنا، عن، سمعت، أنبأنا") transmission_verb: Optional[str] = Field(None, description="حدثنا, عن, أخبرنا, etc.")
class IsnadChain(BaseModel): class IsnadChain(BaseModel):
hadith_id: str = Field(description="UUID of the hadith") hadith_id: str = Field(description="Hadith UUID")
collection: str = Field(description="Collection name") collection: Optional[str] = Field(None, description="Collection name")
hadith_number: int = Field(description="Hadith number") hadith_number: Optional[int] = Field(None, description="Hadith number")
nodes: list[IsnadNode] = Field(default_factory=list, description="Narrator nodes for graph visualization") nodes: list[IsnadNode] = Field(default_factory=list, description="Narrators in the chain")
links: list[IsnadLink] = Field(default_factory=list, description="Directed edges: source heard from target") links: list[IsnadLink] = Field(default_factory=list, description="Directed edges: source heard from target")
model_config = { model_config = {
@ -271,12 +280,12 @@ class IsnadChain(BaseModel):
class NarratorInteraction(BaseModel): class NarratorInteraction(BaseModel):
narrator_a: str = Field(description="First narrator Arabic name") narrator_a: str = Field(description="First narrator Arabic name")
narrator_a_transliterated: Optional[str] = Field(None, description="First narrator transliteration") narrator_a_transliterated: str = Field("", description="First narrator transliteration")
narrator_b: str = Field(description="Second narrator Arabic name") narrator_b: str = Field(description="Second narrator Arabic name")
narrator_b_transliterated: Optional[str] = Field(None, description="Second narrator transliteration") narrator_b_transliterated: str = Field("", description="Second narrator transliteration")
relationship_type: str = Field(description="NARRATED_FROM, TEACHER_OF, HEARD_BY, STUDENT_OF") relationship_type: str = Field("", description="NARRATED_FROM, TEACHER_OF, HEARD_BY, STUDENT_OF")
shared_hadith_count: int = Field(0, description="Number of hadiths connecting them") shared_hadith_count: int = Field(0, description="Number of hadiths connecting them")
hadith_ids: list[str] = Field(default_factory=list, description="IDs of connecting hadiths (max 20)") hadith_ids: list[str] = Field(default_factory=list, description="IDs of shared hadiths (max 20)")
model_config = { model_config = {
"json_schema_extra": { "json_schema_extra": {
@ -295,7 +304,7 @@ class NarratorInteraction(BaseModel):
class NarratorConnection(BaseModel): class NarratorConnection(BaseModel):
narrator: str = Field(description="Connected narrator Arabic name") narrator: str = Field(description="Connected narrator Arabic name")
narrator_transliterated: Optional[str] = Field(None, description="Transliteration") narrator_transliterated: str = Field("", description="Transliteration")
connection_type: str = Field(description="Relationship type") connection_type: str = Field(description="Relationship type")
direction: str = Field(description="'incoming' (they → this) or 'outgoing' (this → them)") direction: str = Field(description="'incoming' (they → this) or 'outgoing' (this → them)")
@ -306,12 +315,26 @@ class NarratorNetwork(BaseModel):
total_connections: int = 0 total_connections: int = 0
class PathNode(BaseModel):
name_arabic: str
name_transliterated: str = ""
generation: Optional[str] = None
class WhoMetWhoResult(BaseModel):
narrator_a: str
narrator_b: str
path: list[PathNode] = Field(default_factory=list)
path_length: Optional[int] = None
relationship_types: list[str] = Field(default_factory=list)
# ── Search ───────────────────────────────────────────────────────────────── # ── Search ─────────────────────────────────────────────────────────────────
class SemanticSearchResult(BaseModel): class SemanticSearchResult(BaseModel):
hadith: HadithSummary = Field(description="Matching hadith") hadith: HadithSummary = Field(description="Matching hadith")
score: float = Field(description="Cosine similarity score (0-1, higher = more relevant)") score: float = Field(description="Cosine similarity score (0-1, higher = more relevant)")
collection: Optional[str] = Field(None, description="Collection name") collection: str = Field("", description="Collection name")
model_config = { model_config = {
"json_schema_extra": { "json_schema_extra": {
@ -332,8 +355,8 @@ class SemanticSearchResult(BaseModel):
class FullTextSearchResult(BaseModel): class FullTextSearchResult(BaseModel):
hadith: HadithSummary = Field(description="Matching hadith") hadith: HadithSummary = Field(description="Matching hadith")
score: float = Field(description="Elasticsearch relevance score (higher = more relevant)") score: float = Field(description="Elasticsearch relevance score")
highlights: list[str] = Field(default_factory=list, description="Text fragments with <em>highlighted</em> matches") highlights: list[str] = Field(default_factory=list, description="Text fragments with <em> highlighted matches")
model_config = { model_config = {
"json_schema_extra": { "json_schema_extra": {
@ -346,7 +369,27 @@ class FullTextSearchResult(BaseModel):
"arabic_text": "..." "arabic_text": "..."
}, },
"score": 12.45, "score": 12.45,
"highlights": ["...عن <em>الصلاة</em> في المسجد الحرام..."] "highlights": ["...عن <em>الصلاة</em> في المسجد..."]
}] }]
} }
} }
class CombinedSearchResult(BaseModel):
hadith: HadithSummary
semantic_score: Optional[float] = None
fulltext_score: Optional[float] = None
combined_score: float = 0.0
source: str = Field(description="semantic, fulltext, or both")
# ── Stats ──────────────────────────────────────────────────────────────────
class SystemStats(BaseModel):
hadiths_pg: Optional[int] = None
narrators_neo4j: Optional[int] = None
places_neo4j: Optional[int] = None
tribes_neo4j: Optional[int] = None
relationships_neo4j: Optional[int] = None
embeddings_qdrant: Optional[int] = None
documents_es: Optional[int] = None

View File

@ -1,27 +1,33 @@
""" """
Isnad chain endpoints chain visualization data for hadith detail views. Isnad chain endpoints chain visualization data (D3-ready nodes + links).
""" """
from fastapi import APIRouter, Query, HTTPException from fastapi import APIRouter, Query, Path, HTTPException
from app.services.database import db from app.services.database import db
from app.models.schemas import IsnadChain, IsnadNode, IsnadLink from app.utils.arabic import normalize_name
from app.models.schemas import (
IsnadChain, IsnadNode, IsnadLink,
PaginatedResponse, PaginationMeta,
)
router = APIRouter(prefix="/chains", tags=["Isnad Chains"]) router = APIRouter(prefix="/chains", tags=["Isnad Chains"])
@router.get("/hadith/{hadith_id}", response_model=IsnadChain, def _paginate(total: int, page: int, per_page: int) -> PaginationMeta:
summary="Get isnad chain for a hadith", pages = max(1, (total + per_page - 1) // per_page)
description="Returns the complete isnad (chain of narration) as a graph structure " return PaginationMeta(total=total, page=page, per_page=per_page, pages=pages)
"with nodes (narrators) and links (transmission relationships). "
"Ready for visualization with D3.js, vis.js, Cytoscape.js, or any graph library. "
"Each node includes narrator metadata (generation, reliability); " # ── Chain for a single hadith ──────────────────────────────────────────────
"each link includes the transmission verb (حدثنا، عن، أخبرنا).")
async def get_isnad_chain(hadith_id: str): @router.get("/hadith/{hadith_id}", response_model=IsnadChain)
async def get_isnad_chain(
hadith_id: str = Path(..., description="Hadith UUID"),
):
""" """
Get the full isnad chain for a hadith as a graph (nodes + links) Get the isnad chain for a hadith as a directed graph (nodes + links).
ready for visualization (D3.js, vis.js, etc.). Returns D3-compatible format for frontend visualization.
""" """
# Get hadith info
hadith = db.neo4j_query_one(""" hadith = db.neo4j_query_one("""
MATCH (h:Hadith {id: $hid}) MATCH (h:Hadith {id: $hid})
RETURN h.id AS id, h.collection AS collection, h.hadith_number AS hadith_number RETURN h.id AS id, h.collection AS collection, h.hadith_number AS hadith_number
@ -30,108 +36,113 @@ async def get_isnad_chain(hadith_id: str):
if not hadith: if not hadith:
raise HTTPException(status_code=404, detail="Hadith not found in graph") raise HTTPException(status_code=404, detail="Hadith not found in graph")
# Get chain nodes # Narrator nodes in the chain
nodes = db.neo4j_query(""" nodes_rows = db.neo4j_query("""
MATCH (n:Narrator)-[r:APPEARS_IN]->(h:Hadith {id: $hid}) MATCH (n:Narrator)-[a:APPEARS_IN]->(h:Hadith {id: $hid})
RETURN n.name_arabic AS name_arabic, RETURN n.name_arabic AS name_arabic,
n.name_transliterated AS name_transliterated, n.name_transliterated AS name_transliterated,
n.entity_type AS entity_type, n.entity_type AS entity_type,
n.generation AS generation, n.generation AS generation,
n.reliability_grade AS reliability_grade, n.reliability_grade AS reliability_grade
r.chain_order AS chain_order ORDER BY a.chain_order
ORDER BY r.chain_order
""", {"hid": hadith_id}) """, {"hid": hadith_id})
# Get chain links (NARRATED_FROM within this hadith's narrators) # Transmission links — NARRATED_FROM edges store hadith_ids as array
links = db.neo4j_query(""" links_rows = db.neo4j_query("""
MATCH (a:Narrator)-[r1:APPEARS_IN]->(h:Hadith {id: $hid}) MATCH (a:Narrator)-[nf:NARRATED_FROM]->(b:Narrator)
MATCH (b:Narrator)-[r2:APPEARS_IN]->(h)
MATCH (a)-[nf:NARRATED_FROM]->(b)
WHERE $hid IN nf.hadith_ids WHERE $hid IN nf.hadith_ids
RETURN a.name_arabic AS source, RETURN a.name_arabic AS source,
b.name_arabic AS target, b.name_arabic AS target,
nf.transmission_verb AS transmission_verb nf.transmission_verb AS transmission_verb
ORDER BY a.name_arabic
""", {"hid": hadith_id}) """, {"hid": hadith_id})
# If no NARRATED_FROM edges with hadith_id, fall back to chain order
if not links and len(nodes) > 1:
sorted_nodes = sorted(nodes, key=lambda n: n.get("chain_order") or 999)
links = []
for i in range(len(sorted_nodes) - 1):
links.append({
"source": sorted_nodes[i]["name_arabic"],
"target": sorted_nodes[i + 1]["name_arabic"],
"transmission_verb": None,
})
return IsnadChain( return IsnadChain(
hadith_id=str(hadith["id"]), hadith_id=hadith_id,
collection=hadith["collection"] or "", collection=hadith.get("collection"),
hadith_number=hadith["hadith_number"] or 0, hadith_number=hadith.get("hadith_number"),
nodes=[IsnadNode(**n) for n in nodes], nodes=[IsnadNode(**r) for r in nodes_rows],
links=[IsnadLink(**l) for l in links], links=[IsnadLink(**r) for r in links_rows],
) )
@router.get("/narrator/{name_arabic}", response_model=list[IsnadChain], # ── All chains containing a narrator (paginated) ──────────────────────────
summary="Get all chains for a narrator",
description="Returns all isnad chains that include a specific narrator. " @router.get("/narrator/{name_arabic}", response_model=PaginatedResponse)
"Useful for visualizing how a narrator connects to the Prophet ﷺ " async def chains_by_narrator(
"through different transmission paths. " name_arabic: str = Path(..., description="Narrator Arabic name"),
"Example: `/chains/narrator/الزهري`") page: int = Query(1, ge=1),
async def get_narrator_chains( per_page: int = Query(10, ge=1, le=50),
name_arabic: str,
limit: int = Query(10, ge=1, le=50, description="Maximum chains to return"),
): ):
""" """
Get all isnad chains that include a specific narrator. All isnad chains containing a narrator.
Useful for seeing how a narrator connects to the Prophet . Useful for seeing how a narrator connects to the Prophet across collections.
""" """
q_norm = normalize_name(name_arabic)
skip = (page - 1) * per_page
total = db.neo4j_count("""
MATCH (n:Narrator)-[:APPEARS_IN]->(h:Hadith)
WHERE toLower(n.name_arabic) CONTAINS toLower($name)
RETURN count(DISTINCT h) AS count
""", {"name": q_norm})
hadith_ids = db.neo4j_query(""" hadith_ids = db.neo4j_query("""
MATCH (n:Narrator {name_arabic: $name})-[:APPEARS_IN]->(h:Hadith) MATCH (n:Narrator)-[:APPEARS_IN]->(h:Hadith)
RETURN h.id AS id WHERE toLower(n.name_arabic) CONTAINS toLower($name)
LIMIT $limit RETURN DISTINCT h.id AS id
""", {"name": name_arabic, "limit": limit}) ORDER BY h.id
SKIP $skip LIMIT $limit
""", {"name": q_norm, "skip": skip, "limit": per_page})
chains = [] chains = []
for row in hadith_ids: for row in hadith_ids:
chain = await get_isnad_chain(str(row["id"])) chain = await get_isnad_chain(str(row["id"]))
chains.append(chain) chains.append(chain)
return chains return PaginatedResponse(
data=chains,
meta=_paginate(total, page, per_page),
)
@router.get("/common-chains", response_model=list[dict], # ── Common chains between two narrators (paginated) ───────────────────────
summary="Find shared chains between two narrators",
description="Find hadiths where both narrators appear in the same isnad chain. " @router.get("/common", response_model=PaginatedResponse)
"Useful for verifying narrator relationships and finding corroborating chains. "
"Example: `/chains/common-chains?narrator_a=الزهري&narrator_b=أنس بن مالك`")
async def find_common_chains( async def find_common_chains(
narrator_a: str = Query( narrator_a: str = Query(..., description="First narrator (Arabic)"),
..., description="First narrator (Arabic). Example: الزهري", narrator_b: str = Query(..., description="Second narrator (Arabic)"),
examples=["الزهري"], page: int = Query(1, ge=1),
), per_page: int = Query(10, ge=1, le=50),
narrator_b: str = Query(
..., description="Second narrator (Arabic). Example: أنس بن مالك",
examples=["أنس بن مالك"],
),
limit: int = Query(10, ge=1, le=50, description="Maximum results"),
): ):
""" """Find hadiths where both narrators appear in the same chain."""
Find hadiths where both narrators appear in the same chain. a_norm = normalize_name(narrator_a)
Useful for verifying narrator relationships. b_norm = normalize_name(narrator_b)
""" skip = (page - 1) * per_page
total = db.neo4j_count("""
MATCH (a:Narrator)-[:APPEARS_IN]->(h:Hadith)<-[:APPEARS_IN]-(b:Narrator)
WHERE toLower(a.name_arabic) CONTAINS toLower($a)
AND toLower(b.name_arabic) CONTAINS toLower($b)
AND a <> b
RETURN count(DISTINCT h) AS count
""", {"a": a_norm, "b": b_norm})
rows = db.neo4j_query(""" rows = db.neo4j_query("""
MATCH (a:Narrator)-[:APPEARS_IN]->(h:Hadith)<-[:APPEARS_IN]-(b:Narrator) MATCH (a:Narrator)-[:APPEARS_IN]->(h:Hadith)<-[:APPEARS_IN]-(b:Narrator)
WHERE a.name_arabic CONTAINS $name_a WHERE toLower(a.name_arabic) CONTAINS toLower($a)
AND b.name_arabic CONTAINS $name_b AND toLower(b.name_arabic) CONTAINS toLower($b)
AND a <> b AND a <> b
RETURN h.id AS hadith_id, RETURN DISTINCT h.id AS hadith_id,
h.collection AS collection, h.collection AS collection,
h.hadith_number AS hadith_number, h.hadith_number AS hadith_number,
a.name_arabic AS narrator_a, a.name_arabic AS narrator_a,
b.name_arabic AS narrator_b b.name_arabic AS narrator_b
LIMIT $limit ORDER BY h.collection, h.hadith_number
""", {"name_a": narrator_a, "name_b": narrator_b, "limit": limit}) SKIP $skip LIMIT $limit
""", {"a": a_norm, "b": b_norm, "skip": skip, "limit": per_page})
return [dict(r) for r in rows] return PaginatedResponse(
data=[dict(r) for r in rows],
meta=_paginate(total, page, per_page),
)

View File

@ -1,10 +1,13 @@
""" """
Hadith endpoints details, listing, search by keyword/narrator/topic/place. Hadith endpoints details, listing, search by keyword/narrator/topic/place.
All query parameters are Arabic-normalized for consistent matching.
All list endpoints support pagination via page + per_page.
""" """
from fastapi import APIRouter, Query, HTTPException from fastapi import APIRouter, Query, Path, HTTPException
from typing import Optional from typing import Optional
from app.services.database import db from app.services.database import db
from app.utils.arabic import normalize_query
from app.models.schemas import ( from app.models.schemas import (
HadithDetail, HadithSummary, NarratorInChain, TopicTag, HadithDetail, HadithSummary, NarratorInChain, TopicTag,
PaginatedResponse, PaginationMeta, PaginatedResponse, PaginationMeta,
@ -13,36 +16,44 @@ from app.models.schemas import (
router = APIRouter(prefix="/hadiths", tags=["Hadiths"]) router = APIRouter(prefix="/hadiths", tags=["Hadiths"])
@router.get("/{hadith_id}", response_model=HadithDetail, def _paginate(total: int, page: int, per_page: int) -> PaginationMeta:
summary="Get hadith by ID", pages = max(1, (total + per_page - 1) // per_page)
description="Retrieve full hadith details including Arabic text, sanad/matn separation, " return PaginationMeta(total=total, page=page, per_page=per_page, pages=pages)
"ordered narrator chain from the knowledge graph, and topic tags.")
async def get_hadith(hadith_id: str):
# ── Single hadith by ID ────────────────────────────────────────────────────
@router.get("/{hadith_id}", response_model=HadithDetail)
async def get_hadith(hadith_id: str = Path(..., description="Hadith UUID")):
"""Get full hadith details by ID, including narrator chain and topics from Neo4j.""" """Get full hadith details by ID, including narrator chain and topics from Neo4j."""
# Base hadith from PostgreSQL # Base hadith from PostgreSQL
hadith = db.pg_query_one(""" hadith = db.pg_query_one("""
SELECT h.id, c.name_english AS collection, h.hadith_number, SELECT h.id, c.name_english AS collection, h.hadith_number,
h.grade, h.arabic_text, h.sanad, h.matn h.book_number, h.grade, h.arabic_text, h.english_text,
h.urdu_text, h.sanad, h.matn
FROM hadiths h FROM hadiths h
JOIN collections c ON c.id = h.collection_id JOIN collections c ON c.id = h.collection_id
WHERE h.id = %s WHERE h.id::text = %s
""", (hadith_id,)) """, (hadith_id,))
if not hadith: if not hadith:
raise HTTPException(status_code=404, detail="Hadith not found") raise HTTPException(status_code=404, detail="Hadith not found")
# Enrich with chain + topics from Neo4j # Narrator chain from Neo4j
chain = db.neo4j_query(""" chain = db.neo4j_query("""
MATCH (n:Narrator)-[r:APPEARS_IN]->(h:Hadith {id: $hid}) MATCH (n:Narrator)-[r:APPEARS_IN]->(h:Hadith {id: $hid})
RETURN n.name_arabic AS name_arabic, RETURN n.name_arabic AS name_arabic,
n.name_transliterated AS name_transliterated, n.name_transliterated AS name_transliterated,
n.entity_type AS entity_type, n.entity_type AS entity_type,
n.generation AS generation,
n.reliability_grade AS reliability_grade,
r.chain_order AS order, r.chain_order AS order,
r.transmission_verb AS transmission_verb r.transmission_verb AS transmission_verb
ORDER BY r.chain_order ORDER BY r.chain_order
""", {"hid": hadith_id}) """, {"hid": hadith_id})
# Topics from Neo4j
topics = db.neo4j_query(""" topics = db.neo4j_query("""
MATCH (h:Hadith {id: $hid})-[:HAS_TOPIC]->(t:Topic) MATCH (h:Hadith {id: $hid})-[:HAS_TOPIC]->(t:Topic)
RETURN t.topic_arabic AS topic_arabic, RETURN t.topic_arabic AS topic_arabic,
@ -52,10 +63,13 @@ async def get_hadith(hadith_id: str):
return HadithDetail( return HadithDetail(
id=str(hadith["id"]), id=str(hadith["id"]),
collection=hadith["collection"], collection=hadith.get("collection"),
hadith_number=hadith["hadith_number"], hadith_number=hadith.get("hadith_number"),
grade=hadith["grade"], book_number=hadith.get("book_number"),
arabic_text=hadith["arabic_text"], grade=hadith.get("grade"),
arabic_text=hadith.get("arabic_text"),
english_text=hadith.get("english_text"),
urdu_text=hadith.get("urdu_text"),
sanad_text=hadith.get("sanad"), sanad_text=hadith.get("sanad"),
matn_text=hadith.get("matn"), matn_text=hadith.get("matn"),
narrator_chain=[NarratorInChain(**c) for c in chain], narrator_chain=[NarratorInChain(**c) for c in chain],
@ -63,183 +77,155 @@ async def get_hadith(hadith_id: str):
) )
@router.get("/collection/{collection_name}", response_model=PaginatedResponse, # ── By collection + number ─────────────────────────────────────────────────
summary="List hadiths by collection",
description="Paginated listing of hadiths in a specific collection. " @router.get("/by-ref/{collection}/{number}", response_model=HadithDetail)
"Collection names use partial matching (e.g. 'bukhari' matches 'Sahih Bukhari').") async def get_hadith_by_reference(
collection: str = Path(..., description="Collection name, e.g. 'Sahih Bukhari'"),
number: int = Path(..., description="Hadith number within the collection"),
):
"""Lookup hadith by collection name + hadith number."""
row = db.pg_query_one("""
SELECT h.id FROM hadiths h
JOIN collections c ON c.id = h.collection_id
WHERE c.name_english = %s AND h.hadith_number = %s
""", (collection, number))
if not row:
raise HTTPException(status_code=404, detail=f"Hadith {collection} #{number} not found")
return await get_hadith(str(row["id"]))
# ── List by collection (paginated) ─────────────────────────────────────────
@router.get("/collection/{collection_name}", response_model=PaginatedResponse)
async def list_by_collection( async def list_by_collection(
collection_name: str, collection_name: str = Path(..., description="Collection name"),
page: int = Query(1, ge=1, description="Page number"), page: int = Query(1, ge=1, description="Page number"),
per_page: int = Query(20, ge=1, le=100, description="Results per page"), per_page: int = Query(20, ge=1, le=100, description="Items per page"),
): ):
"""List hadiths in a collection with pagination.""" """List hadiths in a collection with pagination."""
offset = (page - 1) * per_page total = db.pg_count("""
SELECT count(*) FROM hadiths h
total_row = db.pg_query_one("""
SELECT COUNT(*) AS total
FROM hadiths h
JOIN collections c ON c.id = h.collection_id JOIN collections c ON c.id = h.collection_id
WHERE c.name_english ILIKE %s WHERE c.name_english = %s
""", (f"%{collection_name}%",)) """, (collection_name,))
total = total_row["total"] if total_row else 0
offset = (page - 1) * per_page
rows = db.pg_query(""" rows = db.pg_query("""
SELECT h.id, c.name_english AS collection, h.hadith_number, SELECT h.id, c.name_english AS collection, h.hadith_number,
h.grade, LEFT(h.arabic_text, 300) AS arabic_text h.grade, LEFT(h.arabic_text, 300) AS arabic_text
FROM hadiths h FROM hadiths h
JOIN collections c ON c.id = h.collection_id JOIN collections c ON c.id = h.collection_id
WHERE c.name_english ILIKE %s WHERE c.name_english = %s
ORDER BY h.hadith_number ORDER BY h.hadith_number
LIMIT %s OFFSET %s LIMIT %s OFFSET %s
""", (f"%{collection_name}%", per_page, offset)) """, (collection_name, per_page, offset))
return PaginatedResponse( data = [HadithSummary(id=str(r["id"]), **{k: r[k] for k in r if k != "id"}) for r in rows]
meta=PaginationMeta( return PaginatedResponse(data=data, meta=_paginate(total, page, per_page))
total=total, page=page, per_page=per_page,
pages=(total + per_page - 1) // per_page,
),
data=[HadithSummary(
id=str(r["id"]), collection=r["collection"],
hadith_number=r["hadith_number"], grade=r["grade"],
arabic_text=r["arabic_text"],
) for r in rows],
)
@router.get("/number/{collection_name}/{number}", response_model=HadithDetail) # ── Keyword search (paginated, normalized) ─────────────────────────────────
async def get_by_number(collection_name: str, number: int):
"""Get a hadith by collection name and number."""
hadith = db.pg_query_one("""
SELECT h.id
FROM hadiths h
JOIN collections c ON c.id = h.collection_id
WHERE c.name_english ILIKE %s AND h.hadith_number = %s
""", (f"%{collection_name}%", number))
if not hadith: @router.get("/search/keyword", response_model=PaginatedResponse)
raise HTTPException(status_code=404, detail=f"Hadith #{number} not found in {collection_name}") async def search_keyword(
q: str = Query(..., min_length=2, description="Arabic keyword(s) — diacritics stripped automatically"),
return await get_hadith(str(hadith["id"])) collection: Optional[str] = Query(None, description="Filter by collection"),
page: int = Query(1, ge=1),
per_page: int = Query(20, ge=1, le=100),
@router.get("/search/keyword", response_model=PaginatedResponse,
summary="Search hadiths by Arabic keyword",
description="Full-text keyword search across all hadith Arabic text. "
"Supports both vocalized (مَكَّةَ) and unvocalized (مكة) Arabic.")
async def search_by_keyword(
q: str = Query(
..., min_length=2,
description="Arabic keyword to search. Examples: صلاة (prayer), زكاة (zakat), صيام (fasting), حج (hajj), نية (intention)",
examples=["صلاة", "الجنة", "رمضان"],
),
collection: Optional[str] = Query(
None,
description="Filter by collection name. Examples: Sahih Bukhari, Sahih Muslim, Sunan Abu Dawood",
examples=["Sahih Bukhari"],
),
grade: Optional[str] = Query(
None,
description="Filter by hadith grade. Examples: Sahih, Hasan, Da'if",
examples=["Sahih"],
),
page: int = Query(1, ge=1, description="Page number (1-indexed)"),
per_page: int = Query(20, ge=1, le=100, description="Results per page (max 100)"),
): ):
"""Search hadiths by Arabic keyword in text.""" """Keyword search in Arabic hadith text. Query is normalized for consistent matching."""
offset = (page - 1) * per_page q_norm = normalize_query(q)
conditions = ["h.arabic_text ILIKE %s"]
params = [f"%{q}%"]
where = "WHERE h.arabic_text ILIKE %s"
params: list = [f"%{q_norm}%"]
if collection: if collection:
conditions.append("c.name_english ILIKE %s") where += " AND c.name_english = %s"
params.append(f"%{collection}%") params.append(collection)
if grade:
conditions.append("h.grade ILIKE %s")
params.append(f"%{grade}%")
where = " AND ".join(conditions) total = db.pg_count(
f"SELECT count(*) FROM hadiths h JOIN collections c ON c.id = h.collection_id {where}",
total_row = db.pg_query_one(f""" tuple(params),
SELECT COUNT(*) AS total )
FROM hadiths h offset = (page - 1) * per_page
JOIN collections c ON c.id = h.collection_id rows = db.pg_query(
WHERE {where} f"SELECT h.id, c.name_english AS collection, h.hadith_number, "
""", tuple(params)) f"h.grade, LEFT(h.arabic_text, 300) AS arabic_text "
total = total_row["total"] if total_row else 0 f"FROM hadiths h JOIN collections c ON c.id = h.collection_id "
f"{where} ORDER BY c.name_english, h.hadith_number "
params.extend([per_page, offset]) f"LIMIT %s OFFSET %s",
rows = db.pg_query(f""" tuple(params + [per_page, offset]),
SELECT h.id, c.name_english AS collection, h.hadith_number,
h.grade, LEFT(h.arabic_text, 300) AS arabic_text
FROM hadiths h
JOIN collections c ON c.id = h.collection_id
WHERE {where}
ORDER BY c.name_english, h.hadith_number
LIMIT %s OFFSET %s
""", tuple(params))
return PaginatedResponse(
meta=PaginationMeta(
total=total, page=page, per_page=per_page,
pages=(total + per_page - 1) // per_page,
),
data=[HadithSummary(
id=str(r["id"]), collection=r["collection"],
hadith_number=r["hadith_number"], grade=r["grade"],
arabic_text=r["arabic_text"],
) for r in rows],
) )
data = [HadithSummary(id=str(r["id"]), **{k: r[k] for k in r if k != "id"}) for r in rows]
@router.get("/search/topic/{topic}", response_model=list[HadithSummary]) return PaginatedResponse(data=data, meta=_paginate(total, page, per_page))
async def search_by_topic(topic: str, limit: int = Query(20, ge=1, le=100)):
"""Search hadiths by topic tag (from Neo4j)."""
rows = db.neo4j_query("""
CALL db.index.fulltext.queryNodes('hadith_arabic_text', $topic)
YIELD node, score
RETURN node.id AS id,
node.collection AS collection,
node.hadith_number AS hadith_number,
node.grade AS grade,
left(node.matn_text, 300) AS matn_text,
score
ORDER BY score DESC
LIMIT $limit
""", {"topic": topic, "limit": limit})
return [HadithSummary(
id=str(r["id"]), collection=r["collection"] or "",
hadith_number=r["hadith_number"] or 0, grade=r["grade"],
matn_text=r["matn_text"],
) for r in rows]
@router.get("/search/narrator/{narrator_name}", response_model=list[HadithSummary], # ── Search by topic (paginated, normalized) ────────────────────────────────
summary="Find hadiths by narrator",
description="Find all hadiths where a specific narrator appears in the chain. " @router.get("/search/topic", response_model=PaginatedResponse)
"Searches both Arabic name and transliteration. " async def search_by_topic(
"Example: `/hadiths/search/narrator/أبو هريرة`") q: str = Query(..., min_length=2, description="Topic keyword (Arabic or English)"),
async def search_by_narrator( page: int = Query(1, ge=1),
narrator_name: str, per_page: int = Query(20, ge=1, le=100),
limit: int = Query(50, ge=1, le=200, description="Maximum results"),
): ):
"""Find all hadiths narrated by a specific person.""" """Find hadiths by topic tag from the knowledge graph."""
q_norm = normalize_query(q)
skip = (page - 1) * per_page
total = db.neo4j_count("""
MATCH (t:Topic)<-[:HAS_TOPIC]-(h:Hadith)
WHERE toLower(t.topic_arabic) CONTAINS toLower($q)
OR toLower(t.topic_english) CONTAINS toLower($q)
RETURN count(DISTINCT h) AS count
""", {"q": q_norm})
rows = db.neo4j_query(""" rows = db.neo4j_query("""
MATCH (n:Narrator)-[r:APPEARS_IN]->(h:Hadith) MATCH (t:Topic)<-[:HAS_TOPIC]-(h:Hadith)
WHERE n.name_arabic CONTAINS $name WHERE toLower(t.topic_arabic) CONTAINS toLower($q)
OR n.name_transliterated CONTAINS $name OR toLower(t.topic_english) CONTAINS toLower($q)
RETURN h.id AS id, RETURN DISTINCT h.id AS id,
h.collection AS collection, h.collection AS collection,
h.hadith_number AS hadith_number, h.hadith_number AS hadith_number,
h.grade AS grade, h.grade AS grade,
left(h.matn_text, 300) AS matn_text substring(h.arabic_text, 0, 300) AS arabic_text
ORDER BY h.collection, h.hadith_number ORDER BY h.collection, h.hadith_number
LIMIT $limit SKIP $skip LIMIT $limit
""", {"name": narrator_name, "limit": limit}) """, {"q": q_norm, "skip": skip, "limit": per_page})
return [HadithSummary( data = [HadithSummary(**r) for r in rows]
id=str(r["id"]), collection=r["collection"] or "", return PaginatedResponse(data=data, meta=_paginate(total, page, per_page))
hadith_number=r["hadith_number"] or 0, grade=r["grade"],
matn_text=r["matn_text"],
) for r in rows] # ── Search by narrator (paginated, normalized) ─────────────────────────────
@router.get("/search/narrator", response_model=PaginatedResponse)
async def search_by_narrator(
q: str = Query(..., min_length=2, description="Narrator name (Arabic)"),
page: int = Query(1, ge=1),
per_page: int = Query(20, ge=1, le=100),
):
"""Find all hadiths where a narrator appears in the chain."""
q_norm = normalize_query(q)
skip = (page - 1) * per_page
total = db.neo4j_count("""
MATCH (n:Narrator)-[:APPEARS_IN]->(h:Hadith)
WHERE toLower(n.name_arabic) CONTAINS toLower($q)
RETURN count(DISTINCT h) AS count
""", {"q": q_norm})
rows = db.neo4j_query("""
MATCH (n:Narrator)-[:APPEARS_IN]->(h:Hadith)
WHERE toLower(n.name_arabic) CONTAINS toLower($q)
RETURN DISTINCT h.id AS id,
h.collection AS collection,
h.hadith_number AS hadith_number,
h.grade AS grade,
substring(h.arabic_text, 0, 300) AS arabic_text
ORDER BY h.collection, h.hadith_number
SKIP $skip LIMIT $limit
""", {"q": q_norm, "skip": skip, "limit": per_page})
data = [HadithSummary(**r) for r in rows]
return PaginatedResponse(data=data, meta=_paginate(total, page, per_page))

View File

@ -1,317 +1,436 @@
""" """
Narrator endpoints profiles, teacher/student network, relationships, who met who. Narrator endpoints search, profiles, network queries.
All queries normalize Arabic input to match post-dedup graph data.
""" """
from fastapi import APIRouter, Query, HTTPException from fastapi import APIRouter, Query, Path, HTTPException
from typing import Optional from typing import Optional
from app.services.database import db from app.services.database import db
from app.utils.arabic import normalize_query, normalize_name
from app.models.schemas import ( from app.models.schemas import (
NarratorProfile, NarratorSummary, HadithSummary, NarratorSummary, NarratorProfile, NarratorInteraction,
NarratorInteraction, PlaceRelation, NarratorConnection, NarratorNetwork,
PaginatedResponse, PaginationMeta, WhoMetWhoResult, PathNode, PlaceRelation, NameForm, FamilyInfo,
HadithSummary, PaginatedResponse, PaginationMeta,
) )
router = APIRouter(prefix="/narrators", tags=["Narrators"]) router = APIRouter(prefix="/narrators", tags=["Narrators"])
@router.get("/search", response_model=list[NarratorSummary], def _paginate(total: int, page: int, per_page: int) -> PaginationMeta:
summary="Search narrators by name", pages = max(1, (total + per_page - 1) // per_page)
description="Full-text search across narrator names in both Arabic and Latin transliteration. " return PaginationMeta(total=total, page=page, per_page=per_page, pages=pages)
"Uses Neo4j full-text index for fast matching.")
# ── Search narrators by name (paginated, normalized) ───────────────────────
@router.get("/search", response_model=PaginatedResponse)
async def search_narrators( async def search_narrators(
q: str = Query( q: str = Query(..., min_length=2, description="Narrator name (Arabic). Diacritics stripped automatically."),
..., min_length=2, page: int = Query(1, ge=1),
description="Narrator name in Arabic or transliteration. Examples: أبو هريرة, الزهري, Anas, Bukhari", per_page: int = Query(20, ge=1, le=100),
examples=["أبو هريرة", "الزهري", "Anas ibn Malik"],
),
limit: int = Query(20, ge=1, le=100, description="Maximum results to return"),
): ):
"""Search narrators by name (Arabic or transliterated).""" """
Search narrators by Arabic name. Input is normalized to match
the deduplicated graph (diacritics stripped, characters unified).
"""
q_norm = normalize_query(q)
skip = (page - 1) * per_page
total = db.neo4j_count("""
MATCH (n:Narrator)
WHERE toLower(n.name_arabic) CONTAINS toLower($q)
RETURN count(n) AS count
""", {"q": q_norm})
rows = db.neo4j_query(""" rows = db.neo4j_query("""
CALL db.index.fulltext.queryNodes('narrator_names', $query) MATCH (n:Narrator)
YIELD node, score WHERE toLower(n.name_arabic) CONTAINS toLower($q)
WITH node AS n, score
OPTIONAL MATCH (n)-[:APPEARS_IN]->(h:Hadith) OPTIONAL MATCH (n)-[:APPEARS_IN]->(h:Hadith)
RETURN n.name_arabic AS name_arabic, RETURN n.name_arabic AS name_arabic,
n.name_transliterated AS name_transliterated, n.name_transliterated AS name_transliterated,
n.entity_type AS entity_type, n.entity_type AS entity_type,
n.generation AS generation, n.generation AS generation,
n.reliability_grade AS reliability_grade, n.reliability_grade AS reliability_grade,
count(h) AS hadith_count, count(DISTINCT h) AS hadith_count
score ORDER BY hadith_count DESC
ORDER BY score DESC SKIP $skip LIMIT $limit
LIMIT $limit """, {"q": q_norm, "skip": skip, "limit": per_page})
""", {"query": q, "limit": limit})
return [NarratorSummary(**r) for r in rows] data = [NarratorSummary(**r) for r in rows]
return PaginatedResponse(data=data, meta=_paginate(total, page, per_page))
@router.get("/profile/{name_arabic}", response_model=NarratorProfile, # ── Full narrator profile ──────────────────────────────────────────────────
summary="Get full narrator profile",
description="Complete narrator profile for the mobile app. Includes biography from classical " @router.get("/profile/{name_arabic}", response_model=NarratorProfile)
"scholarship (Tahdhib al-Kamal, Taqrib al-Tahdhib), teacher/student network, " async def get_narrator_profile(
"hadiths narrated, places, and tribal affiliations. " name_arabic: str = Path(..., description="Narrator Arabic name (exact or close match)"),
"Example: `/narrators/profile/أبو هريرة`") ):
async def get_narrator_profile(name_arabic: str):
""" """
Full narrator profile biography, hadiths, teachers, students, Complete narrator profile biography, hadiths, teachers, students, places, tribes.
places, tribes. Powers the mobile app profile page. This is the mobile app profile page query.
""" """
# Basic info q_norm = normalize_name(name_arabic)
# Find the narrator node — exact first, then CONTAINS fallback
narrator = db.neo4j_query_one(""" narrator = db.neo4j_query_one("""
MATCH (n:Narrator {name_arabic: $name}) MATCH (n:Narrator)
RETURN n.name_arabic AS name_arabic, WHERE toLower(n.name_arabic) = toLower($q)
n.name_transliterated AS name_transliterated, RETURN n
n.entity_type AS entity_type, """, {"q": q_norm})
n.full_nasab AS full_nasab,
n.kunya AS kunya,
n.nisba AS nisba,
n.laqab AS laqab,
n.generation AS generation,
n.reliability_grade AS reliability_grade,
n.reliability_detail AS reliability_detail,
n.birth_year_hijri AS birth_year_hijri,
n.death_year_hijri AS death_year_hijri,
n.birth_year_ce AS birth_year_ce,
n.death_year_ce AS death_year_ce,
n.biography_summary_arabic AS biography_summary_arabic,
n.biography_summary_english AS biography_summary_english,
n.total_hadiths_narrated_approx AS total_hadiths_narrated_approx,
n.bio_verified AS bio_verified
""", {"name": name_arabic})
if not narrator: if not narrator:
raise HTTPException(status_code=404, detail="Narrator not found") narrator = db.neo4j_query_one("""
MATCH (n:Narrator)
WHERE toLower(n.name_arabic) CONTAINS toLower($q)
RETURN n
""", {"q": q_norm})
# Hadiths if not narrator:
hadiths = db.neo4j_query(""" raise HTTPException(status_code=404, detail=f"Narrator not found: {name_arabic}")
n = narrator.get("n", {})
actual_name = n.get("name_arabic", q_norm)
# Hadith count + collections
stats = db.neo4j_query_one("""
MATCH (n:Narrator {name_arabic: $name})-[:APPEARS_IN]->(h:Hadith)
RETURN count(DISTINCT h) AS hadith_count,
collect(DISTINCT h.collection) AS collections
""", {"name": actual_name}) or {}
# Teachers: narrator NARRATED_FROM teacher + teacher TEACHER_OF narrator
teachers_nf = db.neo4j_query("""
MATCH (n:Narrator {name_arabic: $name})-[:NARRATED_FROM]->(t:Narrator)
OPTIONAL MATCH (t)-[:APPEARS_IN]->(h:Hadith)
RETURN t.name_arabic AS name_arabic,
t.name_transliterated AS name_transliterated,
t.entity_type AS entity_type,
t.generation AS generation,
t.reliability_grade AS reliability_grade,
count(DISTINCT h) AS hadith_count
""", {"name": actual_name})
teachers_to = db.neo4j_query("""
MATCH (t:Narrator)-[:TEACHER_OF]->(n:Narrator {name_arabic: $name})
OPTIONAL MATCH (t)-[:APPEARS_IN]->(h:Hadith)
RETURN t.name_arabic AS name_arabic,
t.name_transliterated AS name_transliterated,
t.entity_type AS entity_type,
t.generation AS generation,
t.reliability_grade AS reliability_grade,
count(DISTINCT h) AS hadith_count
""", {"name": actual_name})
# Deduplicate teachers
seen_teachers = set()
teachers = []
for r in teachers_nf + teachers_to:
if r["name_arabic"] not in seen_teachers:
seen_teachers.add(r["name_arabic"])
teachers.append(NarratorSummary(**r))
# Students: student NARRATED_FROM narrator + narrator TEACHER_OF student
students_nf = db.neo4j_query("""
MATCH (s:Narrator)-[:NARRATED_FROM]->(n:Narrator {name_arabic: $name})
OPTIONAL MATCH (s)-[:APPEARS_IN]->(h:Hadith)
RETURN s.name_arabic AS name_arabic,
s.name_transliterated AS name_transliterated,
s.entity_type AS entity_type,
s.generation AS generation,
s.reliability_grade AS reliability_grade,
count(DISTINCT h) AS hadith_count
""", {"name": actual_name})
students_to = db.neo4j_query("""
MATCH (n:Narrator {name_arabic: $name})-[:TEACHER_OF]->(s:Narrator)
OPTIONAL MATCH (s)-[:APPEARS_IN]->(h:Hadith)
RETURN s.name_arabic AS name_arabic,
s.name_transliterated AS name_transliterated,
s.entity_type AS entity_type,
s.generation AS generation,
s.reliability_grade AS reliability_grade,
count(DISTINCT h) AS hadith_count
""", {"name": actual_name})
seen_students = set()
students = []
for r in students_nf + students_to:
if r["name_arabic"] not in seen_students:
seen_students.add(r["name_arabic"])
students.append(NarratorSummary(**r))
# Places
places_rows = db.neo4j_query("""
MATCH (n:Narrator {name_arabic: $name})-[r]->(p:Place)
WHERE type(r) IN ['BORN_IN', 'LIVED_IN', 'DIED_IN', 'TRAVELED_TO']
RETURN p.name_arabic AS place, type(r) AS relation
""", {"name": actual_name})
# Tribes
tribe_rows = db.neo4j_query("""
MATCH (n:Narrator {name_arabic: $name})-[:BELONGS_TO_TRIBE]->(t:Tribe)
RETURN t.name_arabic AS name
""", {"name": actual_name})
# Name forms (alternative names via RELATED_TO)
name_form_rows = db.neo4j_query("""
MATCH (n:Narrator {name_arabic: $name})-[:RELATED_TO]-(alt:Narrator)
WHERE alt.name_arabic <> $name
RETURN alt.name_arabic AS name, alt.entity_type AS type
""", {"name": actual_name})
# Family info
family_row = db.neo4j_query_one("""
MATCH (n:Narrator {name_arabic: $name})
RETURN n.father AS father, n.mother AS mother,
n.spouse AS spouse, n.children AS children
""", {"name": actual_name})
family = None
if family_row and any(family_row.get(k) for k in ["father", "mother", "spouse", "children"]):
family = FamilyInfo(
father=family_row.get("father"),
mother=family_row.get("mother"),
spouse=family_row.get("spouse"),
children=family_row.get("children") or [],
)
# Sample hadiths
hadith_rows = db.neo4j_query("""
MATCH (n:Narrator {name_arabic: $name})-[:APPEARS_IN]->(h:Hadith) MATCH (n:Narrator {name_arabic: $name})-[:APPEARS_IN]->(h:Hadith)
RETURN h.id AS id, RETURN h.id AS id,
h.collection AS collection, h.collection AS collection,
h.hadith_number AS hadith_number, h.hadith_number AS hadith_number,
h.grade AS grade, h.grade AS grade,
left(h.matn_text, 200) AS matn_text substring(h.arabic_text, 0, 300) AS arabic_text
ORDER BY h.collection, h.hadith_number ORDER BY h.collection, h.hadith_number
LIMIT 50 LIMIT 50
""", {"name": name_arabic}) """, {"name": actual_name})
# Teachers (who taught this narrator)
teachers = db.neo4j_query("""
MATCH (teacher:Narrator)-[:TEACHER_OF]->(n:Narrator {name_arabic: $name})
OPTIONAL MATCH (teacher)-[:APPEARS_IN]->(h:Hadith)
RETURN teacher.name_arabic AS name_arabic,
teacher.name_transliterated AS name_transliterated,
teacher.entity_type AS entity_type,
teacher.generation AS generation,
teacher.reliability_grade AS reliability_grade,
count(h) AS hadith_count
""", {"name": name_arabic})
# Students (who this narrator taught)
students = db.neo4j_query("""
MATCH (n:Narrator {name_arabic: $name})-[:TEACHER_OF]->(student:Narrator)
OPTIONAL MATCH (student)-[:APPEARS_IN]->(h:Hadith)
RETURN student.name_arabic AS name_arabic,
student.name_transliterated AS name_transliterated,
student.entity_type AS entity_type,
student.generation AS generation,
student.reliability_grade AS reliability_grade,
count(h) AS hadith_count
""", {"name": name_arabic})
# Places
places = db.neo4j_query("""
MATCH (n:Narrator {name_arabic: $name})-[r:BORN_IN|LIVED_IN|DIED_IN|TRAVELED_TO]->(p:Place)
RETURN p.name_arabic AS place, type(r) AS relation
""", {"name": name_arabic})
# Tribes
tribes_rows = db.neo4j_query("""
MATCH (n:Narrator {name_arabic: $name})-[:BELONGS_TO_TRIBE]->(t:Tribe)
RETURN t.name_arabic AS tribe
""", {"name": name_arabic})
return NarratorProfile( return NarratorProfile(
**narrator, name_arabic=n.get("name_arabic", actual_name),
hadith_count=len(hadiths), name_transliterated=n.get("name_transliterated", ""),
hadiths=[HadithSummary( entity_type=n.get("entity_type", ""),
id=str(h["id"]), collection=h["collection"] or "", full_nasab=n.get("full_nasab"),
hadith_number=h["hadith_number"] or 0, grade=h["grade"], kunya=n.get("kunya"),
matn_text=h["matn_text"], nisba=n.get("nisba"),
) for h in hadiths], laqab=n.get("laqab"),
teachers=[NarratorSummary(**t) for t in teachers], generation=n.get("generation"),
students=[NarratorSummary(**s) for s in students], reliability_grade=n.get("reliability_grade"),
places=[PlaceRelation(**p) for p in places], reliability_detail=n.get("reliability_detail"),
tribes=[t["tribe"] for t in tribes_rows], birth_year_hijri=n.get("birth_year_hijri"),
death_year_hijri=n.get("death_year_hijri"),
birth_year_ce=n.get("birth_year_ce"),
death_year_ce=n.get("death_year_ce"),
biography_summary_arabic=n.get("biography_summary_arabic"),
biography_summary_english=n.get("biography_summary_english"),
total_hadiths_narrated_approx=n.get("total_hadiths_narrated_approx"),
hadith_count=stats.get("hadith_count", 0),
hadiths=[HadithSummary(**r) for r in hadith_rows],
teachers=teachers,
students=students,
name_forms=[NameForm(**r) for r in name_form_rows],
family=family,
places=[PlaceRelation(**r) for r in places_rows],
tribes=[r["name"] for r in tribe_rows],
bio_verified=n.get("bio_verified", False),
) )
@router.get("/by-generation/{generation}", response_model=list[NarratorSummary]) # ── Narrators by generation (paginated, normalized) ────────────────────────
@router.get("/by-generation/{generation}", response_model=PaginatedResponse)
async def narrators_by_generation( async def narrators_by_generation(
generation: str, generation: str = Path(..., description="Generation: صحابي, تابعي, تابع التابعين, نبي"),
limit: int = Query(50, ge=1, le=200), page: int = Query(1, ge=1),
per_page: int = Query(20, ge=1, le=100),
): ):
"""List narrators by generation (صحابي, تابعي, etc.).""" """List narrators by generation (e.g. Companions, Successors)."""
q_norm = normalize_query(generation)
skip = (page - 1) * per_page
total = db.neo4j_count("""
MATCH (n:Narrator)
WHERE toLower(n.generation) CONTAINS toLower($gen)
RETURN count(n) AS count
""", {"gen": q_norm})
rows = db.neo4j_query(""" rows = db.neo4j_query("""
MATCH (n:Narrator) MATCH (n:Narrator)
WHERE n.generation CONTAINS $gen WHERE toLower(n.generation) CONTAINS toLower($gen)
OPTIONAL MATCH (n)-[:APPEARS_IN]->(h:Hadith) OPTIONAL MATCH (n)-[:APPEARS_IN]->(h:Hadith)
RETURN n.name_arabic AS name_arabic, RETURN n.name_arabic AS name_arabic,
n.name_transliterated AS name_transliterated, n.name_transliterated AS name_transliterated,
n.entity_type AS entity_type, n.entity_type AS entity_type,
n.generation AS generation, n.generation AS generation,
n.reliability_grade AS reliability_grade, n.reliability_grade AS reliability_grade,
count(h) AS hadith_count count(DISTINCT h) AS hadith_count
ORDER BY hadith_count DESC ORDER BY hadith_count DESC
LIMIT $limit SKIP $skip LIMIT $limit
""", {"gen": generation, "limit": limit}) """, {"gen": q_norm, "skip": skip, "limit": per_page})
return [NarratorSummary(**r) for r in rows] data = [NarratorSummary(**r) for r in rows]
return PaginatedResponse(data=data, meta=_paginate(total, page, per_page))
@router.get("/by-place/{place_name}", response_model=list[NarratorSummary]) # ── Narrators by place (paginated, normalized) ─────────────────────────────
@router.get("/by-place/{place_name}", response_model=PaginatedResponse)
async def narrators_by_place( async def narrators_by_place(
place_name: str, place_name: str = Path(..., description="Place name in Arabic (e.g. مكة)"),
limit: int = Query(50, ge=1, le=200), page: int = Query(1, ge=1),
per_page: int = Query(50, ge=1, le=100),
): ):
"""Find narrators associated with a place.""" """
Narrators associated with a place (born, lived, died, traveled).
Input is normalized مكة المكرمة matches مكه المكرمه.
"""
q_norm = normalize_query(place_name)
skip = (page - 1) * per_page
total = db.neo4j_count("""
MATCH (n:Narrator)-[r]->(p:Place)
WHERE type(r) IN ['BORN_IN', 'LIVED_IN', 'DIED_IN', 'TRAVELED_TO']
AND toLower(p.name_arabic) CONTAINS toLower($place)
RETURN count(DISTINCT n) AS count
""", {"place": q_norm})
rows = db.neo4j_query(""" rows = db.neo4j_query("""
MATCH (n:Narrator)-[:BORN_IN|LIVED_IN|DIED_IN|TRAVELED_TO]->(p:Place) MATCH (n:Narrator)-[r]->(p:Place)
WHERE p.name_arabic CONTAINS $place WHERE type(r) IN ['BORN_IN', 'LIVED_IN', 'DIED_IN', 'TRAVELED_TO']
AND toLower(p.name_arabic) CONTAINS toLower($place)
OPTIONAL MATCH (n)-[:APPEARS_IN]->(h:Hadith) OPTIONAL MATCH (n)-[:APPEARS_IN]->(h:Hadith)
RETURN DISTINCT n.name_arabic AS name_arabic, RETURN DISTINCT n.name_arabic AS name_arabic,
n.name_transliterated AS name_transliterated, n.name_transliterated AS name_transliterated,
n.entity_type AS entity_type, n.entity_type AS entity_type,
n.generation AS generation, n.generation AS generation,
n.reliability_grade AS reliability_grade, n.reliability_grade AS reliability_grade,
count(h) AS hadith_count count(DISTINCT h) AS hadith_count
ORDER BY hadith_count DESC ORDER BY hadith_count DESC
LIMIT $limit SKIP $skip LIMIT $limit
""", {"place": place_name, "limit": limit}) """, {"place": q_norm, "skip": skip, "limit": per_page})
return [NarratorSummary(**r) for r in rows] data = [NarratorSummary(**r) for r in rows]
return PaginatedResponse(data=data, meta=_paginate(total, page, per_page))
@router.get("/interactions/{name_arabic}", response_model=list[NarratorInteraction], # ── Narrator interactions ──────────────────────────────────────────────────
summary="Get all narrator interactions",
description="Lists all relationships for a narrator: who they narrated from, " @router.get("/interactions/{name_arabic}", response_model=list[NarratorInteraction])
"who narrated from them, their teachers, and their students. " async def narrator_interactions(
"Each interaction includes shared hadith count. " name_arabic: str = Path(..., description="Narrator Arabic name"),
"Example: `/narrators/interactions/الزهري`") limit: int = Query(50, ge=1, le=200),
async def get_interactions(
name_arabic: str,
limit: int = Query(50, ge=1, le=200, description="Maximum interactions to return"),
): ):
""" """All direct relationships for a narrator — who they narrated from/to."""
Get all interactions of a narrator who they narrated from, q_norm = normalize_name(name_arabic)
who narrated from them, teachers, students.
"""
rows = db.neo4j_query(""" rows = db.neo4j_query("""
MATCH (n:Narrator {name_arabic: $name}) MATCH (a:Narrator)-[r]-(b:Narrator)
OPTIONAL MATCH (n)-[r1:NARRATED_FROM]->(other1:Narrator) WHERE toLower(a.name_arabic) CONTAINS toLower($name)
WITH n, collect(DISTINCT { AND type(r) IN ['NARRATED_FROM', 'TEACHER_OF']
narrator_b: other1.name_arabic, WITH a, b, type(r) AS rel_type
narrator_b_trans: other1.name_transliterated, OPTIONAL MATCH (a)-[:APPEARS_IN]->(h:Hadith)<-[:APPEARS_IN]-(b)
type: 'NARRATED_FROM',
hadith_ids: r1.hadith_ids
}) AS outgoing
OPTIONAL MATCH (other2:Narrator)-[r2:NARRATED_FROM]->(n)
WITH n, outgoing, collect(DISTINCT {
narrator_b: other2.name_arabic,
narrator_b_trans: other2.name_transliterated,
type: 'HEARD_BY',
hadith_ids: r2.hadith_ids
}) AS incoming
OPTIONAL MATCH (teacher:Narrator)-[r3:TEACHER_OF]->(n)
WITH n, outgoing, incoming, collect(DISTINCT {
narrator_b: teacher.name_arabic,
narrator_b_trans: teacher.name_transliterated,
type: 'TEACHER_OF',
hadith_ids: []
}) AS teacher_rels
OPTIONAL MATCH (n)-[r4:TEACHER_OF]->(student:Narrator)
WITH n, outgoing, incoming, teacher_rels, collect(DISTINCT {
narrator_b: student.name_arabic,
narrator_b_trans: student.name_transliterated,
type: 'STUDENT_OF',
hadith_ids: []
}) AS student_rels
RETURN n.name_arabic AS narrator_a,
n.name_transliterated AS narrator_a_trans,
outgoing + incoming + teacher_rels + student_rels AS interactions
""", {"name": name_arabic})
if not rows:
raise HTTPException(status_code=404, detail="Narrator not found")
result = []
row = rows[0]
for interaction in row["interactions"]:
if not interaction.get("narrator_b"):
continue
hadith_ids = interaction.get("hadith_ids") or []
result.append(NarratorInteraction(
narrator_a=row["narrator_a"],
narrator_a_transliterated=row.get("narrator_a_trans") or "",
narrator_b=interaction["narrator_b"],
narrator_b_transliterated=interaction.get("narrator_b_trans") or "",
relationship_type=interaction["type"],
shared_hadith_count=len(hadith_ids),
hadith_ids=[str(h) for h in hadith_ids[:20]],
))
return result[:limit]
@router.get("/who-met-who", response_model=list[NarratorInteraction],
summary="Check if two narrators are connected",
description="Finds the shortest path between two narrators in the knowledge graph. "
"Reveals whether they had a direct or indirect relationship through "
"narration chains, teacher/student bonds, or shared connections. "
"Example: `/narrators/who-met-who?narrator_a=الزهري&narrator_b=أنس بن مالك`")
async def who_met_who(
narrator_a: str = Query(
..., description="First narrator name (Arabic). Example: الزهري",
examples=["الزهري", "أبو هريرة"],
),
narrator_b: str = Query(
..., description="Second narrator name (Arabic). Example: أنس بن مالك",
examples=["أنس بن مالك", "عمر بن الخطاب"],
),
):
"""
Check if two narrators had a relationship did they meet,
narrate from each other, or share a teacher/student bond?
"""
rows = db.neo4j_query("""
MATCH (a:Narrator), (b:Narrator)
WHERE a.name_arabic CONTAINS $name_a
AND b.name_arabic CONTAINS $name_b
OPTIONAL MATCH path = shortestPath((a)-[*..6]-(b))
WITH a, b, path,
[r IN relationships(path) | {
type: type(r),
from: startNode(r).name_arabic,
from_trans: startNode(r).name_transliterated,
to: endNode(r).name_arabic,
to_trans: endNode(r).name_transliterated
}] AS rels
RETURN a.name_arabic AS narrator_a, RETURN a.name_arabic AS narrator_a,
a.name_transliterated AS narrator_a_trans, a.name_transliterated AS narrator_a_transliterated,
b.name_arabic AS narrator_b, b.name_arabic AS narrator_b,
b.name_transliterated AS narrator_b_trans, b.name_transliterated AS narrator_b_transliterated,
length(path) AS distance, rel_type AS relationship_type,
rels count(DISTINCT h) AS shared_hadith_count,
""", {"name_a": narrator_a, "name_b": narrator_b}) collect(DISTINCT h.id)[..20] AS hadith_ids
ORDER BY shared_hadith_count DESC
LIMIT $limit
""", {"name": q_norm, "limit": limit})
if not rows or rows[0].get("distance") is None: return [NarratorInteraction(**r) for r in rows]
return []
row = rows[0]
return [NarratorInteraction( # ── Narrator network (graph visualization) ─────────────────────────────────
narrator_a=rel["from"],
narrator_a_transliterated=rel.get("from_trans") or "", @router.get("/network/{name_arabic}", response_model=NarratorNetwork)
narrator_b=rel["to"], async def narrator_network(
narrator_b_transliterated=rel.get("to_trans") or "", name_arabic: str = Path(..., description="Narrator Arabic name"),
relationship_type=rel["type"], limit: int = Query(50, ge=1, le=200),
) for rel in (row.get("rels") or [])] ):
"""
Get a narrator's connection network — all incoming/outgoing relationships.
Useful for network visualization.
"""
q_norm = normalize_name(name_arabic)
# Center narrator
center_row = db.neo4j_query_one("""
MATCH (n:Narrator)
WHERE toLower(n.name_arabic) CONTAINS toLower($name)
OPTIONAL MATCH (n)-[:APPEARS_IN]->(h:Hadith)
RETURN n.name_arabic AS name_arabic,
n.name_transliterated AS name_transliterated,
n.entity_type AS entity_type,
n.generation AS generation,
n.reliability_grade AS reliability_grade,
count(DISTINCT h) AS hadith_count
""", {"name": q_norm})
if not center_row:
raise HTTPException(status_code=404, detail=f"Narrator not found: {name_arabic}")
# Connections
conn_rows = db.neo4j_query("""
MATCH (a:Narrator)-[r]-(b:Narrator)
WHERE toLower(a.name_arabic) CONTAINS toLower($name)
AND type(r) IN ['NARRATED_FROM', 'TEACHER_OF']
RETURN b.name_arabic AS narrator,
b.name_transliterated AS narrator_transliterated,
type(r) AS connection_type,
CASE WHEN startNode(r) = a THEN 'outgoing' ELSE 'incoming' END AS direction
LIMIT $limit
""", {"name": q_norm, "limit": limit})
return NarratorNetwork(
center=NarratorSummary(**center_row),
connections=[NarratorConnection(**r) for r in conn_rows],
total_connections=len(conn_rows),
)
# ── Who met who (shortest path) ────────────────────────────────────────────
@router.get("/who-met-who", response_model=WhoMetWhoResult)
async def who_met_who(
narrator_a: str = Query(..., description="First narrator (Arabic)"),
narrator_b: str = Query(..., description="Second narrator (Arabic)"),
):
"""
Shortest path between two narrators in the knowledge graph.
Useful to see how a narrator connects to the Prophet .
"""
a_norm = normalize_name(narrator_a)
b_norm = normalize_name(narrator_b)
row = db.neo4j_query_one("""
MATCH (a:Narrator), (b:Narrator)
WHERE toLower(a.name_arabic) CONTAINS toLower($a)
AND toLower(b.name_arabic) CONTAINS toLower($b)
WITH a, b LIMIT 1
MATCH path = shortestPath((a)-[*..10]-(b))
RETURN [n IN nodes(path) |
{name_arabic: n.name_arabic,
name_transliterated: n.name_transliterated,
generation: n.generation}] AS path_nodes,
[r IN relationships(path) | type(r)] AS rel_types,
length(path) AS path_length
""", {"a": a_norm, "b": b_norm})
if not row:
raise HTTPException(
status_code=404,
detail=f"No path found between '{narrator_a}' and '{narrator_b}'",
)
return WhoMetWhoResult(
narrator_a=narrator_a,
narrator_b=narrator_b,
path=[PathNode(**n) for n in (row.get("path_nodes") or [])],
path_length=row.get("path_length"),
relationship_types=row.get("rel_types", []),
)

View File

@ -6,12 +6,16 @@ from typing import Optional
from app.services.database import db from app.services.database import db
from app.config import get_settings from app.config import get_settings
from app.models.schemas import SemanticSearchResult, FullTextSearchResult, HadithSummary from app.utils.arabic import normalize_query
from app.models.schemas import (
SemanticSearchResult, FullTextSearchResult, CombinedSearchResult,
HadithSummary,
)
router = APIRouter(prefix="/search", tags=["Search"]) router = APIRouter(prefix="/search", tags=["Search"])
async def get_embedding(text: str) -> list[float]: async def _get_embedding(text: str) -> list[float]:
"""Get embedding vector from TEI (BGE-M3).""" """Get embedding vector from TEI (BGE-M3)."""
settings = get_settings() settings = get_settings()
response = await db.http_client.post( response = await db.http_client.post(
@ -22,7 +26,6 @@ async def get_embedding(text: str) -> list[float]:
raise HTTPException(status_code=502, detail=f"TEI embedding failed: {response.text}") raise HTTPException(status_code=502, detail=f"TEI embedding failed: {response.text}")
embeddings = response.json() embeddings = response.json()
# TEI returns list of embeddings; we sent one input
if isinstance(embeddings, list) and len(embeddings) > 0: if isinstance(embeddings, list) and len(embeddings) > 0:
if isinstance(embeddings[0], list): if isinstance(embeddings[0], list):
return embeddings[0] return embeddings[0]
@ -30,34 +33,25 @@ async def get_embedding(text: str) -> list[float]:
raise HTTPException(status_code=502, detail="Unexpected TEI response format") raise HTTPException(status_code=502, detail="Unexpected TEI response format")
@router.get("/semantic", response_model=list[SemanticSearchResult], # ── Semantic search ─────────────────────────────────────────────────────────
summary="Semantic search (find by meaning)",
description="Search hadiths by meaning using BGE-M3 multilingual embeddings + Qdrant. " @router.get("/semantic", response_model=list[SemanticSearchResult])
"Supports cross-language queries: search in English and find Arabic hadiths, or vice versa. "
"Example: `what did the prophet say about fasting` → finds Arabic hadiths about صيام")
async def semantic_search( async def semantic_search(
q: str = Query( q: str = Query(..., min_length=2, description="Search query (any language — Arabic, English, etc.)"),
..., min_length=2, collection: Optional[str] = Query(None, description="Filter by collection name"),
description="Search query in any language. The embedding model handles Arabic, English, and Urdu.", limit: int = Query(10, ge=1, le=50),
examples=["what is the reward of prayer", "أحاديث عن الصيام", "حكم الربا"],
),
collection: Optional[str] = Query(
None,
description="Filter by collection name. Example: Sahih Bukhari",
),
limit: int = Query(10, ge=1, le=50, description="Number of results (max 50)"),
): ):
""" """
Semantic search find hadiths by meaning, not just keywords. Semantic search find hadiths by meaning, not just keywords.
Supports Arabic, English, and cross-language queries. Supports cross-language queries (English query Arabic results).
Uses BGE-M3 embeddings + Qdrant vector search. Uses BGE-M3 embeddings + Qdrant vector search.
""" """
if not db.qdrant_available():
raise HTTPException(status_code=503, detail="Qdrant unavailable")
settings = get_settings() settings = get_settings()
query_vector = await _get_embedding(q)
# Get query embedding from TEI
query_vector = await get_embedding(q)
# Build Qdrant filter if collection specified
query_filter = None query_filter = None
if collection: if collection:
from qdrant_client.models import Filter, FieldCondition, MatchValue from qdrant_client.models import Filter, FieldCondition, MatchValue
@ -65,7 +59,6 @@ async def semantic_search(
must=[FieldCondition(key="collection", match=MatchValue(value=collection))] must=[FieldCondition(key="collection", match=MatchValue(value=collection))]
) )
# Search Qdrant
results = db.qdrant.search( results = db.qdrant.search(
collection_name=settings.qdrant_collection, collection_name=settings.qdrant_collection,
query_vector=query_vector, query_vector=query_vector,
@ -80,8 +73,8 @@ async def semantic_search(
output.append(SemanticSearchResult( output.append(SemanticSearchResult(
hadith=HadithSummary( hadith=HadithSummary(
id=str(payload.get("id", hit.id)), id=str(payload.get("id", hit.id)),
collection=payload.get("collection", ""), collection=payload.get("collection"),
hadith_number=payload.get("hadith_number", 0), hadith_number=payload.get("hadith_number"),
grade=payload.get("grade"), grade=payload.get("grade"),
arabic_text=(payload.get("arabic_text") or "")[:300], arabic_text=(payload.get("arabic_text") or "")[:300],
), ),
@ -92,74 +85,53 @@ async def semantic_search(
return output return output
@router.get("/fulltext", response_model=list[FullTextSearchResult], # ── Full-text Arabic search ─────────────────────────────────────────────────
summary="Full-text Arabic search",
description="Keyword search using Elasticsearch with Arabic morphological analysis (stemming, root extraction). " @router.get("/fulltext", response_model=list[FullTextSearchResult])
"Returns highlighted text fragments showing where matches occurred. "
"Handles both vocalized (الصَّلاة) and unvocalized (الصلاة) Arabic.")
async def fulltext_search( async def fulltext_search(
q: str = Query( q: str = Query(..., min_length=2, description="Arabic text search query"),
..., min_length=2, collection: Optional[str] = Query(None, description="Filter by collection"),
description="Arabic text search query. Examples: الصلاة (prayer), النكاح (marriage), الجهاد (jihad)", limit: int = Query(10, ge=1, le=50),
examples=["الصلاة", "صيام رمضان", "بيع وشراء"],
),
collection: Optional[str] = Query(
None,
description="Filter by collection. Example: Sahih Muslim",
),
limit: int = Query(10, ge=1, le=50, description="Number of results (max 50)"),
): ):
""" """
Full-text Arabic search using Elasticsearch. Full-text Arabic search using Elasticsearch.
Supports Arabic morphological analysis. Supports Arabic morphological analysis (root-based matching).
""" """
if not db.es_available():
raise HTTPException(status_code=503, detail="Elasticsearch unavailable")
settings = get_settings() settings = get_settings()
# Build ES query must = [{"multi_match": {
must = [
{
"multi_match": {
"query": q, "query": q,
"fields": ["arabic_text^3", "arabic_normalized^2", "matn", "sanad"], "fields": ["arabic_text^3", "english_text", "urdu_text"],
"type": "best_fields", "type": "best_fields",
"analyzer": "arabic", "analyzer": "arabic",
} }}]
}
]
if collection: if collection:
must.append({"match": {"collection_name": collection}}) must.append({"match": {"collection": collection}})
body = { body = {
"query": {"bool": {"must": must}}, "query": {"bool": {"must": must}},
"highlight": { "highlight": {
"fields": { "fields": {"arabic_text": {"fragment_size": 200, "number_of_fragments": 3}},
"arabic_text": {"fragment_size": 200, "number_of_fragments": 2},
"matn": {"fragment_size": 200, "number_of_fragments": 1},
}
}, },
"size": limit, "size": limit,
} }
try: resp = db.es.search(index=settings.es_index, body=body)
response = db.es.search(index=settings.es_index, body=body) hits = resp.get("hits", {}).get("hits", [])
except Exception as e:
# ES index might not exist yet
raise HTTPException(status_code=503, detail=f"Elasticsearch error: {str(e)}")
output = [] output = []
for hit in response["hits"]["hits"]: for hit in hits:
src = hit["_source"] src = hit["_source"]
highlights = [] highlights = hit.get("highlight", {}).get("arabic_text", [])
if "highlight" in hit:
for field_highlights in hit["highlight"].values():
highlights.extend(field_highlights)
output.append(FullTextSearchResult( output.append(FullTextSearchResult(
hadith=HadithSummary( hadith=HadithSummary(
id=str(src.get("id", hit["_id"])), id=str(src.get("id", hit["_id"])),
collection=src.get("collection_name", ""), collection=src.get("collection"),
hadith_number=src.get("hadith_number", 0), hadith_number=src.get("hadith_number"),
grade=src.get("grade"), grade=src.get("grade"),
arabic_text=(src.get("arabic_text") or "")[:300], arabic_text=(src.get("arabic_text") or "")[:300],
), ),
@ -170,38 +142,55 @@ async def fulltext_search(
return output return output
@router.get("/combined", response_model=dict, # ── Combined search (semantic + fulltext) ───────────────────────────────────
summary="Combined search (semantic + full-text)",
description="Runs both semantic and full-text search in parallel and returns merged results. " @router.get("/combined", response_model=list[CombinedSearchResult])
"Best for the mobile app search bar — gives both meaning-based and keyword-based results. "
"Returns `{semantic: [...], fulltext: [...], query: '...'}`")
async def combined_search( async def combined_search(
q: str = Query( q: str = Query(..., min_length=2, description="Search query"),
..., min_length=2, collection: Optional[str] = Query(None),
description="Search query. Works with Arabic keywords or natural language in any language.", limit: int = Query(10, ge=1, le=50),
examples=["الصلاة في وقتها", "hadith about charity"], semantic_weight: float = Query(0.6, ge=0, le=1, description="Weight for semantic score (0-1)"),
),
collection: Optional[str] = Query(None, description="Filter by collection name"),
limit: int = Query(10, ge=1, le=20, description="Results per search type (max 20)"),
): ):
""" """Combined semantic + full-text search. Results merged and ranked by weighted score."""
Combined search runs both semantic and full-text in parallel, results_map: dict[str, CombinedSearchResult] = {}
returns merged results. Best for the mobile app search bar.
"""
import asyncio
semantic_task = semantic_search(q=q, collection=collection, limit=limit) # Semantic
# Full-text only makes sense for Arabic queries if db.qdrant_available():
fulltext_task = fulltext_search(q=q, collection=collection, limit=limit) try:
sem_results = await semantic_search(q=q, collection=collection, limit=limit)
semantic_results, fulltext_results = await asyncio.gather( for sr in sem_results:
semantic_task, hid = sr.hadith.id
fulltext_task, results_map[hid] = CombinedSearchResult(
return_exceptions=True, hadith=sr.hadith,
semantic_score=sr.score,
combined_score=sr.score * semantic_weight,
source="semantic",
) )
except Exception:
pass
return { # Full-text
"semantic": semantic_results if not isinstance(semantic_results, Exception) else [], if db.es_available():
"fulltext": fulltext_results if not isinstance(fulltext_results, Exception) else [], try:
"query": q, ft_results = await fulltext_search(q=q, collection=collection, limit=limit)
} ft_weight = 1.0 - semantic_weight
for fr in ft_results:
hid = fr.hadith.id
norm_score = min(fr.score / 20.0, 1.0)
if hid in results_map:
existing = results_map[hid]
existing.fulltext_score = norm_score
existing.combined_score += norm_score * ft_weight
existing.source = "both"
else:
results_map[hid] = CombinedSearchResult(
hadith=fr.hadith,
fulltext_score=norm_score,
combined_score=norm_score * ft_weight,
source="fulltext",
)
except Exception:
pass
results = sorted(results_map.values(), key=lambda x: x.combined_score, reverse=True)
return results[:limit]

View File

@ -1,6 +1,7 @@
""" """
Database connection manager initializes and provides access to Database connections PostgreSQL, Neo4j, Qdrant, Elasticsearch, TEI.
PostgreSQL, Neo4j, Qdrant, and Elasticsearch clients. Resilient startup: each backend wrapped in try/except so the app
starts even if some services are temporarily unavailable.
""" """
import psycopg2 import psycopg2
import psycopg2.pool import psycopg2.pool
@ -53,7 +54,7 @@ class Database:
except Exception as e: except Exception as e:
print(f"⚠️ Neo4j failed: {e}") print(f"⚠️ Neo4j failed: {e}")
# Qdrant # Qdrant (URL-based connection — matches the working k8s setup)
try: try:
self.qdrant = QdrantClient( self.qdrant = QdrantClient(
url=f"http://{settings.qdrant_host}:{settings.qdrant_port}", url=f"http://{settings.qdrant_host}:{settings.qdrant_port}",
@ -92,13 +93,6 @@ class Database:
# ── PostgreSQL helpers ── # ── PostgreSQL helpers ──
def get_pg(self):
conn = self.pg_pool.getconn()
try:
yield conn
finally:
self.pg_pool.putconn(conn)
def pg_query(self, query: str, params: tuple = None) -> list[dict]: def pg_query(self, query: str, params: tuple = None) -> list[dict]:
conn = self.pg_pool.getconn() conn = self.pg_pool.getconn()
try: try:
@ -112,9 +106,16 @@ class Database:
rows = self.pg_query(query, params) rows = self.pg_query(query, params)
return rows[0] if rows else None return rows[0] if rows else None
def pg_count(self, query: str, params: tuple = None) -> int:
"""Execute a SELECT count(*) query and return the integer."""
row = self.pg_query_one(query, params)
return row.get("count", 0) if row else 0
# ── Neo4j helpers ── # ── Neo4j helpers ──
def neo4j_query(self, query: str, params: dict = None) -> list[dict]: def neo4j_query(self, query: str, params: dict = None) -> list[dict]:
if not self.neo4j_driver:
return []
with self.neo4j_driver.session() as session: with self.neo4j_driver.session() as session:
result = session.run(query, params or {}) result = session.run(query, params or {})
return [dict(record) for record in result] return [dict(record) for record in result]
@ -123,6 +124,25 @@ class Database:
rows = self.neo4j_query(query, params) rows = self.neo4j_query(query, params)
return rows[0] if rows else None return rows[0] if rows else None
def neo4j_count(self, query: str, params: dict = None) -> int:
"""Execute a RETURN count(...) AS count query and return the integer."""
row = self.neo4j_query_one(query, params)
return row.get("count", 0) if row else 0
# ── Service availability checks ──
def pg_available(self) -> bool:
return self.pg_pool is not None
def neo4j_available(self) -> bool:
return self.neo4j_driver is not None
def qdrant_available(self) -> bool:
return self.qdrant is not None
def es_available(self) -> bool:
return self.es is not None
# Global instance # Global instance
db = Database() db = Database()