diff --git a/app/models/schemas.py b/app/models/schemas.py index c5e4e7c..5447914 100644 --- a/app/models/schemas.py +++ b/app/models/schemas.py @@ -1,14 +1,23 @@ +""" +Pydantic response models for the Hadith Scholar API. + +v2.0 changes: + - All fields that Neo4j/PG can return as null are now Optional with defaults. + - Added PaginationMeta / PaginatedResponse for paginated list endpoints. + - All existing model_config / json_schema_extra examples preserved. +""" from pydantic import BaseModel, Field from typing import Optional +from datetime import datetime -# ── Common ───────────────────────────────────────────────────────────────── +# ── Pagination (NEW in v2.0) ─────────────────────────────────────────────── class PaginationMeta(BaseModel): - total: int - page: int - per_page: int - pages: int + total: int = Field(description="Total matching items") + page: int = Field(description="Current page (1-indexed)") + per_page: int = Field(description="Items per page") + pages: int = Field(description="Total pages") model_config = { "json_schema_extra": { @@ -26,12 +35,12 @@ class PaginatedResponse(BaseModel): class HadithSummary(BaseModel): id: str = Field(description="Unique hadith UUID") - collection: str = Field(description="Collection name in English") - hadith_number: int = Field(description="Hadith number within collection") + collection: Optional[str] = Field(None, description="Collection name in English") + hadith_number: Optional[int] = Field(None, description="Hadith number within collection") grade: Optional[str] = Field(None, description="Grading: Sahih, Hasan, Da'if, etc.") - arabic_text: Optional[str] = Field(None, description="Full Arabic text (may be truncated in list views)") - matn_text: Optional[str] = Field(None, description="Body text only (without isnad)") - sanad_text: Optional[str] = Field(None, description="Chain of narration text only") + arabic_text: Optional[str] = Field(None, description="Arabic text (truncated in lists)") + sanad_text: Optional[str] = Field(None, description="Sanad (chain) text only") + matn_text: Optional[str] = Field(None, description="Matn (body) text only") model_config = { "json_schema_extra": { @@ -40,36 +49,28 @@ class HadithSummary(BaseModel): "collection": "Sahih Bukhari", "hadith_number": 1, "grade": "Sahih", - "arabic_text": "حَدَّثَنَا الْحُمَيْدِيُّ عَبْدُ اللَّهِ بْنُ الزُّبَيْرِ قَالَ حَدَّثَنَا سُفْيَانُ...", - "matn_text": "إِنَّمَا الأَعْمَالُ بِالنِّيَّاتِ وَإِنَّمَا لِكُلِّ امْرِئٍ مَا نَوَى...", - "sanad_text": "حَدَّثَنَا الْحُمَيْدِيُّ عَبْدُ اللَّهِ بْنُ الزُّبَيْرِ قَالَ حَدَّثَنَا سُفْيَانُ قَالَ حَدَّثَنَا يَحْيَى بْنُ سَعِيدٍ الأَنْصَارِيُّ" + "arabic_text": "حَدَّثَنَا الْحُمَيْدِيُّ عَبْدُ اللَّهِ بْنُ الزُّبَيْرِ...", + "sanad_text": "حَدَّثَنَا الْحُمَيْدِيُّ...", + "matn_text": "إِنَّمَا الأَعْمَالُ بِالنِّيَّاتِ...", }] } } class TopicTag(BaseModel): - topic_arabic: str = Field(description="Topic name in Arabic, e.g. الصلاة") - topic_english: str = Field(description="Topic name in English, e.g. Prayer") - category: str = Field(description="Broad Islamic category: عقيدة، فقه، سيرة، أخلاق، تفسير") - - model_config = { - "json_schema_extra": { - "examples": [{ - "topic_arabic": "النية", - "topic_english": "Intention", - "category": "فقه" - }] - } - } + topic_arabic: str = Field("", description="Topic name in Arabic") + topic_english: str = Field("", description="Topic name in English") + category: str = Field("", description="Topic category (فقه, عقيدة, سيرة, etc.)") class NarratorInChain(BaseModel): - order: int = Field(description="Position in chain: 1=closest to compiler, last=closest to Prophet ﷺ") - name_arabic: str = Field(description="Narrator's Arabic name as it appears in the hadith text") - name_transliterated: Optional[str] = Field(None, description="Latin transliteration of the name") - entity_type: Optional[str] = Field(None, description="PERSON, KUNYA (أبو/أم), NISBA (attributional), or TITLE (رسول الله)") - transmission_verb: Optional[str] = Field(None, description="Exact Arabic transmission verb: حدثنا، أخبرنا، عن، سمعت") + order: Optional[int] = Field(None, description="Position in chain (1 = compiler-end)") + name_arabic: str = Field(description="Narrator Arabic name") + name_transliterated: str = Field("", description="Latin transliteration") + entity_type: str = Field("", description="PERSON, KUNYA, NISBA, TITLE") + transmission_verb: Optional[str] = Field(None, description="حدثنا, أخبرنا, عن, سمعت, etc.") + generation: Optional[str] = Field(None, description="صحابي, تابعي, etc.") + reliability_grade: Optional[str] = Field(None, description="ثقة, صدوق, ضعيف, etc.") model_config = { "json_schema_extra": { @@ -78,7 +79,9 @@ class NarratorInChain(BaseModel): "name_arabic": "الْحُمَيْدِيُّ", "name_transliterated": "al-Humaydi", "entity_type": "NISBA", - "transmission_verb": "حَدَّثَنَا" + "transmission_verb": "حَدَّثَنَا", + "generation": "تابع التابعين", + "reliability_grade": "ثقة", }] } } @@ -86,11 +89,14 @@ class NarratorInChain(BaseModel): class HadithDetail(BaseModel): id: str = Field(description="Unique hadith UUID") - collection: str = Field(description="Collection English name") - hadith_number: int = Field(description="Number within collection") - grade: Optional[str] = Field(None, description="Hadith grade") - arabic_text: Optional[str] = Field(None, description="Complete Arabic text") - sanad_text: Optional[str] = Field(None, description="Isnad (chain) text only") + collection: Optional[str] = Field(None, description="Collection name") + hadith_number: Optional[int] = Field(None, description="Hadith number") + book_number: Optional[int] = Field(None, description="Book number within collection") + grade: Optional[str] = Field(None, description="Grading") + arabic_text: Optional[str] = Field(None, description="Full Arabic text") + english_text: Optional[str] = Field(None, description="English translation") + urdu_text: Optional[str] = Field(None, description="Urdu translation") + sanad_text: Optional[str] = Field(None, description="Sanad (chain) text only") matn_text: Optional[str] = Field(None, description="Matn (body) text only") narrator_chain: list[NarratorInChain] = Field(default_factory=list, description="Ordered narrator chain from Neo4j graph") topics: list[TopicTag] = Field(default_factory=list, description="Topic tags for searchability") @@ -121,10 +127,10 @@ class HadithDetail(BaseModel): class NarratorSummary(BaseModel): name_arabic: str = Field(description="Primary Arabic name") - name_transliterated: Optional[str] = Field(None, description="Latin transliteration") - entity_type: Optional[str] = Field(None, description="PERSON, KUNYA, NISBA, TITLE") + name_transliterated: str = Field("", description="Latin transliteration") + entity_type: str = Field("", description="PERSON, KUNYA, NISBA, TITLE") generation: Optional[str] = Field(None, description="طبقة: صحابي، تابعي، تابع التابعين") - reliability_grade: Optional[str] = Field(None, description="جرح وتعديل: ثقة، صدوق، ضعيف، متروك") + reliability_grade: Optional[str] = Field(None, description="جرح وتعديل grade: ثقة، صدوق، ضعيف") hadith_count: int = Field(0, description="Number of hadiths this narrator appears in") model_config = { @@ -142,6 +148,7 @@ class NarratorSummary(BaseModel): class NameForm(BaseModel): + """Alternative name forms for a narrator (kunya, nisba, laqab, etc.).""" name: str = Field(description="Alternative name form") type: str = Field(description="Name type: PERSON, KUNYA, NISBA, TITLE") @@ -165,9 +172,10 @@ class PlaceRelation(BaseModel): class NarratorProfile(BaseModel): + """Complete narrator profile — the mobile app profile page.""" name_arabic: str = Field(description="Primary Arabic name") - name_transliterated: Optional[str] = Field(None, description="Latin transliteration") - entity_type: Optional[str] = Field(None, description="PERSON, KUNYA, NISBA, TITLE") + name_transliterated: str = Field("", description="Latin transliteration") + entity_type: str = Field("", description="PERSON, KUNYA, NISBA, TITLE") full_nasab: Optional[str] = Field(None, description="Full lineage: فلان بن فلان بن فلان") kunya: Optional[str] = Field(None, description="أبو/أم name (e.g. أبو هريرة)") nisba: Optional[str] = Field(None, description="Attributional name (e.g. البخاري، المدني، الزهري)") @@ -186,6 +194,8 @@ class NarratorProfile(BaseModel): hadiths: list[HadithSummary] = Field(default_factory=list, description="Sample hadiths narrated (max 50)") teachers: list[NarratorSummary] = Field(default_factory=list, description="Known teachers / شيوخ") students: list[NarratorSummary] = Field(default_factory=list, description="Known students / تلاميذ") + name_forms: list[NameForm] = Field(default_factory=list, description="Alternative name forms") + family: Optional[FamilyInfo] = Field(None, description="Family info if known") places: list[PlaceRelation] = Field(default_factory=list, description="Associated places (born, lived, died, traveled)") tribes: list[str] = Field(default_factory=list, description="Tribal affiliations (e.g. قريش، دوس، الأنصار)") bio_verified: bool = Field(False, description="Whether biography has been manually verified against classical sources") @@ -208,12 +218,11 @@ class NarratorProfile(BaseModel): "birth_year_ce": None, "death_year_ce": 676, "biography_summary_arabic": "أبو هريرة الدوسي، صحابي جليل، أكثر الصحابة رواية للحديث النبوي. أسلم عام خيبر ولازم النبي ﷺ.", - "biography_summary_english": "Abu Hurayrah al-Dawsi, a prominent Companion and the most prolific narrator of hadith. He accepted Islam during Khaybar and remained close to the Prophet ﷺ.", + "biography_summary_english": "Abu Hurayrah al-Dawsi, a prominent Companion and the most prolific narrator of hadith.", "total_hadiths_narrated_approx": 5374, "hadith_count": 142, - "hadiths": [], - "teachers": [{"name_arabic": "رسول الله ﷺ", "name_transliterated": "Prophet Muhammad", "entity_type": "TITLE", "generation": None, "reliability_grade": None, "hadith_count": 0}], - "students": [{"name_arabic": "الزهري", "name_transliterated": "al-Zuhri", "entity_type": "NISBA", "generation": "تابعي", "reliability_grade": "ثقة", "hadith_count": 89}], + "teachers": [{"name_arabic": "النبي ﷺ", "name_transliterated": "Prophet Muhammad", "entity_type": "TITLE", "generation": "نبي", "reliability_grade": None, "hadith_count": 0}], + "students": [{"name_arabic": "الزهري", "name_transliterated": "al-Zuhri", "entity_type": "NISBA", "generation": "تابعي", "reliability_grade": "ثقة", "hadith_count": 0}], "places": [{"place": "المدينة", "relation": "LIVED_IN"}], "tribes": ["دوس"], "bio_verified": False, @@ -222,27 +231,27 @@ class NarratorProfile(BaseModel): } -# ── Isnad Chain ──────────────────────────────────────────────────────────── +# ── Isnad Chain (D3-ready) ───────────────────────────────────────────────── class IsnadNode(BaseModel): name_arabic: str = Field(description="Narrator Arabic name") - name_transliterated: Optional[str] = Field(None, description="Latin transliteration") - entity_type: Optional[str] = Field(None, description="PERSON, KUNYA, NISBA, TITLE") - generation: Optional[str] = Field(None, description="طبقة") - reliability_grade: Optional[str] = Field(None, description="جرح وتعديل grade") + name_transliterated: str = Field("", description="Latin transliteration") + entity_type: str = Field("", description="PERSON, KUNYA, NISBA, TITLE") + generation: Optional[str] = Field(None, description="صحابي, تابعي, etc.") + reliability_grade: Optional[str] = Field(None, description="ثقة, صدوق, ضعيف, etc.") class IsnadLink(BaseModel): - source: str = Field(description="name_arabic of narrator who received the hadith") - target: str = Field(description="name_arabic of narrator they received it from") - transmission_verb: Optional[str] = Field(None, description="Exact verb: حدثنا، أخبرنا، عن، سمعت، أنبأنا") + source: str = Field(description="name_arabic of narrator who heard") + target: str = Field(description="name_arabic of narrator who transmitted") + transmission_verb: Optional[str] = Field(None, description="حدثنا, عن, أخبرنا, etc.") class IsnadChain(BaseModel): - hadith_id: str = Field(description="UUID of the hadith") - collection: str = Field(description="Collection name") - hadith_number: int = Field(description="Hadith number") - nodes: list[IsnadNode] = Field(default_factory=list, description="Narrator nodes for graph visualization") + hadith_id: str = Field(description="Hadith UUID") + collection: Optional[str] = Field(None, description="Collection name") + hadith_number: Optional[int] = Field(None, description="Hadith number") + nodes: list[IsnadNode] = Field(default_factory=list, description="Narrators in the chain") links: list[IsnadLink] = Field(default_factory=list, description="Directed edges: source heard from target") model_config = { @@ -271,12 +280,12 @@ class IsnadChain(BaseModel): class NarratorInteraction(BaseModel): narrator_a: str = Field(description="First narrator Arabic name") - narrator_a_transliterated: Optional[str] = Field(None, description="First narrator transliteration") + narrator_a_transliterated: str = Field("", description="First narrator transliteration") narrator_b: str = Field(description="Second narrator Arabic name") - narrator_b_transliterated: Optional[str] = Field(None, description="Second narrator transliteration") - relationship_type: str = Field(description="NARRATED_FROM, TEACHER_OF, HEARD_BY, STUDENT_OF") + narrator_b_transliterated: str = Field("", description="Second narrator transliteration") + relationship_type: str = Field("", description="NARRATED_FROM, TEACHER_OF, HEARD_BY, STUDENT_OF") shared_hadith_count: int = Field(0, description="Number of hadiths connecting them") - hadith_ids: list[str] = Field(default_factory=list, description="IDs of connecting hadiths (max 20)") + hadith_ids: list[str] = Field(default_factory=list, description="IDs of shared hadiths (max 20)") model_config = { "json_schema_extra": { @@ -295,7 +304,7 @@ class NarratorInteraction(BaseModel): class NarratorConnection(BaseModel): narrator: str = Field(description="Connected narrator Arabic name") - narrator_transliterated: Optional[str] = Field(None, description="Transliteration") + narrator_transliterated: str = Field("", description="Transliteration") connection_type: str = Field(description="Relationship type") direction: str = Field(description="'incoming' (they → this) or 'outgoing' (this → them)") @@ -306,12 +315,26 @@ class NarratorNetwork(BaseModel): total_connections: int = 0 +class PathNode(BaseModel): + name_arabic: str + name_transliterated: str = "" + generation: Optional[str] = None + + +class WhoMetWhoResult(BaseModel): + narrator_a: str + narrator_b: str + path: list[PathNode] = Field(default_factory=list) + path_length: Optional[int] = None + relationship_types: list[str] = Field(default_factory=list) + + # ── Search ───────────────────────────────────────────────────────────────── class SemanticSearchResult(BaseModel): hadith: HadithSummary = Field(description="Matching hadith") score: float = Field(description="Cosine similarity score (0-1, higher = more relevant)") - collection: Optional[str] = Field(None, description="Collection name") + collection: str = Field("", description="Collection name") model_config = { "json_schema_extra": { @@ -332,8 +355,8 @@ class SemanticSearchResult(BaseModel): class FullTextSearchResult(BaseModel): hadith: HadithSummary = Field(description="Matching hadith") - score: float = Field(description="Elasticsearch relevance score (higher = more relevant)") - highlights: list[str] = Field(default_factory=list, description="Text fragments with highlighted matches") + score: float = Field(description="Elasticsearch relevance score") + highlights: list[str] = Field(default_factory=list, description="Text fragments with highlighted matches") model_config = { "json_schema_extra": { @@ -346,7 +369,27 @@ class FullTextSearchResult(BaseModel): "arabic_text": "..." }, "score": 12.45, - "highlights": ["...عن الصلاة في المسجد الحرام..."] + "highlights": ["...عن الصلاة في المسجد..."] }] } } + + +class CombinedSearchResult(BaseModel): + hadith: HadithSummary + semantic_score: Optional[float] = None + fulltext_score: Optional[float] = None + combined_score: float = 0.0 + source: str = Field(description="semantic, fulltext, or both") + + +# ── Stats ────────────────────────────────────────────────────────────────── + +class SystemStats(BaseModel): + hadiths_pg: Optional[int] = None + narrators_neo4j: Optional[int] = None + places_neo4j: Optional[int] = None + tribes_neo4j: Optional[int] = None + relationships_neo4j: Optional[int] = None + embeddings_qdrant: Optional[int] = None + documents_es: Optional[int] = None diff --git a/app/routers/chains.py b/app/routers/chains.py index 7d37d46..e9bb58d 100644 --- a/app/routers/chains.py +++ b/app/routers/chains.py @@ -1,27 +1,33 @@ """ -Isnad chain endpoints — chain visualization data for hadith detail views. +Isnad chain endpoints — chain visualization data (D3-ready nodes + links). """ -from fastapi import APIRouter, Query, HTTPException +from fastapi import APIRouter, Query, Path, HTTPException from app.services.database import db -from app.models.schemas import IsnadChain, IsnadNode, IsnadLink +from app.utils.arabic import normalize_name +from app.models.schemas import ( + IsnadChain, IsnadNode, IsnadLink, + PaginatedResponse, PaginationMeta, +) router = APIRouter(prefix="/chains", tags=["Isnad Chains"]) -@router.get("/hadith/{hadith_id}", response_model=IsnadChain, - summary="Get isnad chain for a hadith", - description="Returns the complete isnad (chain of narration) as a graph structure " - "with nodes (narrators) and links (transmission relationships). " - "Ready for visualization with D3.js, vis.js, Cytoscape.js, or any graph library. " - "Each node includes narrator metadata (generation, reliability); " - "each link includes the transmission verb (حدثنا، عن، أخبرنا).") -async def get_isnad_chain(hadith_id: str): +def _paginate(total: int, page: int, per_page: int) -> PaginationMeta: + pages = max(1, (total + per_page - 1) // per_page) + return PaginationMeta(total=total, page=page, per_page=per_page, pages=pages) + + +# ── Chain for a single hadith ────────────────────────────────────────────── + +@router.get("/hadith/{hadith_id}", response_model=IsnadChain) +async def get_isnad_chain( + hadith_id: str = Path(..., description="Hadith UUID"), +): """ - Get the full isnad chain for a hadith as a graph (nodes + links) - ready for visualization (D3.js, vis.js, etc.). + Get the isnad chain for a hadith as a directed graph (nodes + links). + Returns D3-compatible format for frontend visualization. """ - # Get hadith info hadith = db.neo4j_query_one(""" MATCH (h:Hadith {id: $hid}) RETURN h.id AS id, h.collection AS collection, h.hadith_number AS hadith_number @@ -30,108 +36,113 @@ async def get_isnad_chain(hadith_id: str): if not hadith: raise HTTPException(status_code=404, detail="Hadith not found in graph") - # Get chain nodes - nodes = db.neo4j_query(""" - MATCH (n:Narrator)-[r:APPEARS_IN]->(h:Hadith {id: $hid}) + # Narrator nodes in the chain + nodes_rows = db.neo4j_query(""" + MATCH (n:Narrator)-[a:APPEARS_IN]->(h:Hadith {id: $hid}) RETURN n.name_arabic AS name_arabic, n.name_transliterated AS name_transliterated, n.entity_type AS entity_type, n.generation AS generation, - n.reliability_grade AS reliability_grade, - r.chain_order AS chain_order - ORDER BY r.chain_order + n.reliability_grade AS reliability_grade + ORDER BY a.chain_order """, {"hid": hadith_id}) - # Get chain links (NARRATED_FROM within this hadith's narrators) - links = db.neo4j_query(""" - MATCH (a:Narrator)-[r1:APPEARS_IN]->(h:Hadith {id: $hid}) - MATCH (b:Narrator)-[r2:APPEARS_IN]->(h) - MATCH (a)-[nf:NARRATED_FROM]->(b) + # Transmission links — NARRATED_FROM edges store hadith_ids as array + links_rows = db.neo4j_query(""" + MATCH (a:Narrator)-[nf:NARRATED_FROM]->(b:Narrator) WHERE $hid IN nf.hadith_ids RETURN a.name_arabic AS source, b.name_arabic AS target, nf.transmission_verb AS transmission_verb + ORDER BY a.name_arabic """, {"hid": hadith_id}) - # If no NARRATED_FROM edges with hadith_id, fall back to chain order - if not links and len(nodes) > 1: - sorted_nodes = sorted(nodes, key=lambda n: n.get("chain_order") or 999) - links = [] - for i in range(len(sorted_nodes) - 1): - links.append({ - "source": sorted_nodes[i]["name_arabic"], - "target": sorted_nodes[i + 1]["name_arabic"], - "transmission_verb": None, - }) - return IsnadChain( - hadith_id=str(hadith["id"]), - collection=hadith["collection"] or "", - hadith_number=hadith["hadith_number"] or 0, - nodes=[IsnadNode(**n) for n in nodes], - links=[IsnadLink(**l) for l in links], + hadith_id=hadith_id, + collection=hadith.get("collection"), + hadith_number=hadith.get("hadith_number"), + nodes=[IsnadNode(**r) for r in nodes_rows], + links=[IsnadLink(**r) for r in links_rows], ) -@router.get("/narrator/{name_arabic}", response_model=list[IsnadChain], - summary="Get all chains for a narrator", - description="Returns all isnad chains that include a specific narrator. " - "Useful for visualizing how a narrator connects to the Prophet ﷺ " - "through different transmission paths. " - "Example: `/chains/narrator/الزهري`") -async def get_narrator_chains( - name_arabic: str, - limit: int = Query(10, ge=1, le=50, description="Maximum chains to return"), +# ── All chains containing a narrator (paginated) ────────────────────────── + +@router.get("/narrator/{name_arabic}", response_model=PaginatedResponse) +async def chains_by_narrator( + name_arabic: str = Path(..., description="Narrator Arabic name"), + page: int = Query(1, ge=1), + per_page: int = Query(10, ge=1, le=50), ): """ - Get all isnad chains that include a specific narrator. - Useful for seeing how a narrator connects to the Prophet ﷺ. + All isnad chains containing a narrator. + Useful for seeing how a narrator connects to the Prophet ﷺ across collections. """ + q_norm = normalize_name(name_arabic) + skip = (page - 1) * per_page + + total = db.neo4j_count(""" + MATCH (n:Narrator)-[:APPEARS_IN]->(h:Hadith) + WHERE toLower(n.name_arabic) CONTAINS toLower($name) + RETURN count(DISTINCT h) AS count + """, {"name": q_norm}) + hadith_ids = db.neo4j_query(""" - MATCH (n:Narrator {name_arabic: $name})-[:APPEARS_IN]->(h:Hadith) - RETURN h.id AS id - LIMIT $limit - """, {"name": name_arabic, "limit": limit}) + MATCH (n:Narrator)-[:APPEARS_IN]->(h:Hadith) + WHERE toLower(n.name_arabic) CONTAINS toLower($name) + RETURN DISTINCT h.id AS id + ORDER BY h.id + SKIP $skip LIMIT $limit + """, {"name": q_norm, "skip": skip, "limit": per_page}) chains = [] for row in hadith_ids: chain = await get_isnad_chain(str(row["id"])) chains.append(chain) - return chains + return PaginatedResponse( + data=chains, + meta=_paginate(total, page, per_page), + ) -@router.get("/common-chains", response_model=list[dict], - summary="Find shared chains between two narrators", - description="Find hadiths where both narrators appear in the same isnad chain. " - "Useful for verifying narrator relationships and finding corroborating chains. " - "Example: `/chains/common-chains?narrator_a=الزهري&narrator_b=أنس بن مالك`") +# ── Common chains between two narrators (paginated) ─────────────────────── + +@router.get("/common", response_model=PaginatedResponse) async def find_common_chains( - narrator_a: str = Query( - ..., description="First narrator (Arabic). Example: الزهري", - examples=["الزهري"], - ), - narrator_b: str = Query( - ..., description="Second narrator (Arabic). Example: أنس بن مالك", - examples=["أنس بن مالك"], - ), - limit: int = Query(10, ge=1, le=50, description="Maximum results"), + narrator_a: str = Query(..., description="First narrator (Arabic)"), + narrator_b: str = Query(..., description="Second narrator (Arabic)"), + page: int = Query(1, ge=1), + per_page: int = Query(10, ge=1, le=50), ): - """ - Find hadiths where both narrators appear in the same chain. - Useful for verifying narrator relationships. - """ + """Find hadiths where both narrators appear in the same chain.""" + a_norm = normalize_name(narrator_a) + b_norm = normalize_name(narrator_b) + skip = (page - 1) * per_page + + total = db.neo4j_count(""" + MATCH (a:Narrator)-[:APPEARS_IN]->(h:Hadith)<-[:APPEARS_IN]-(b:Narrator) + WHERE toLower(a.name_arabic) CONTAINS toLower($a) + AND toLower(b.name_arabic) CONTAINS toLower($b) + AND a <> b + RETURN count(DISTINCT h) AS count + """, {"a": a_norm, "b": b_norm}) + rows = db.neo4j_query(""" MATCH (a:Narrator)-[:APPEARS_IN]->(h:Hadith)<-[:APPEARS_IN]-(b:Narrator) - WHERE a.name_arabic CONTAINS $name_a - AND b.name_arabic CONTAINS $name_b + WHERE toLower(a.name_arabic) CONTAINS toLower($a) + AND toLower(b.name_arabic) CONTAINS toLower($b) AND a <> b - RETURN h.id AS hadith_id, + RETURN DISTINCT h.id AS hadith_id, h.collection AS collection, h.hadith_number AS hadith_number, a.name_arabic AS narrator_a, b.name_arabic AS narrator_b - LIMIT $limit - """, {"name_a": narrator_a, "name_b": narrator_b, "limit": limit}) + ORDER BY h.collection, h.hadith_number + SKIP $skip LIMIT $limit + """, {"a": a_norm, "b": b_norm, "skip": skip, "limit": per_page}) - return [dict(r) for r in rows] + return PaginatedResponse( + data=[dict(r) for r in rows], + meta=_paginate(total, page, per_page), + ) diff --git a/app/routers/hadiths.py b/app/routers/hadiths.py index 226baf7..2c0162a 100644 --- a/app/routers/hadiths.py +++ b/app/routers/hadiths.py @@ -1,10 +1,13 @@ """ Hadith endpoints — details, listing, search by keyword/narrator/topic/place. +All query parameters are Arabic-normalized for consistent matching. +All list endpoints support pagination via page + per_page. """ -from fastapi import APIRouter, Query, HTTPException +from fastapi import APIRouter, Query, Path, HTTPException from typing import Optional from app.services.database import db +from app.utils.arabic import normalize_query from app.models.schemas import ( HadithDetail, HadithSummary, NarratorInChain, TopicTag, PaginatedResponse, PaginationMeta, @@ -13,36 +16,44 @@ from app.models.schemas import ( router = APIRouter(prefix="/hadiths", tags=["Hadiths"]) -@router.get("/{hadith_id}", response_model=HadithDetail, - summary="Get hadith by ID", - description="Retrieve full hadith details including Arabic text, sanad/matn separation, " - "ordered narrator chain from the knowledge graph, and topic tags.") -async def get_hadith(hadith_id: str): +def _paginate(total: int, page: int, per_page: int) -> PaginationMeta: + pages = max(1, (total + per_page - 1) // per_page) + return PaginationMeta(total=total, page=page, per_page=per_page, pages=pages) + + +# ── Single hadith by ID ──────────────────────────────────────────────────── + +@router.get("/{hadith_id}", response_model=HadithDetail) +async def get_hadith(hadith_id: str = Path(..., description="Hadith UUID")): """Get full hadith details by ID, including narrator chain and topics from Neo4j.""" # Base hadith from PostgreSQL hadith = db.pg_query_one(""" SELECT h.id, c.name_english AS collection, h.hadith_number, - h.grade, h.arabic_text, h.sanad, h.matn + h.book_number, h.grade, h.arabic_text, h.english_text, + h.urdu_text, h.sanad, h.matn FROM hadiths h JOIN collections c ON c.id = h.collection_id - WHERE h.id = %s + WHERE h.id::text = %s """, (hadith_id,)) if not hadith: raise HTTPException(status_code=404, detail="Hadith not found") - # Enrich with chain + topics from Neo4j + # Narrator chain from Neo4j chain = db.neo4j_query(""" MATCH (n:Narrator)-[r:APPEARS_IN]->(h:Hadith {id: $hid}) RETURN n.name_arabic AS name_arabic, n.name_transliterated AS name_transliterated, n.entity_type AS entity_type, + n.generation AS generation, + n.reliability_grade AS reliability_grade, r.chain_order AS order, r.transmission_verb AS transmission_verb ORDER BY r.chain_order """, {"hid": hadith_id}) + # Topics from Neo4j topics = db.neo4j_query(""" MATCH (h:Hadith {id: $hid})-[:HAS_TOPIC]->(t:Topic) RETURN t.topic_arabic AS topic_arabic, @@ -52,10 +63,13 @@ async def get_hadith(hadith_id: str): return HadithDetail( id=str(hadith["id"]), - collection=hadith["collection"], - hadith_number=hadith["hadith_number"], - grade=hadith["grade"], - arabic_text=hadith["arabic_text"], + collection=hadith.get("collection"), + hadith_number=hadith.get("hadith_number"), + book_number=hadith.get("book_number"), + grade=hadith.get("grade"), + arabic_text=hadith.get("arabic_text"), + english_text=hadith.get("english_text"), + urdu_text=hadith.get("urdu_text"), sanad_text=hadith.get("sanad"), matn_text=hadith.get("matn"), narrator_chain=[NarratorInChain(**c) for c in chain], @@ -63,183 +77,155 @@ async def get_hadith(hadith_id: str): ) -@router.get("/collection/{collection_name}", response_model=PaginatedResponse, - summary="List hadiths by collection", - description="Paginated listing of hadiths in a specific collection. " - "Collection names use partial matching (e.g. 'bukhari' matches 'Sahih Bukhari').") +# ── By collection + number ───────────────────────────────────────────────── + +@router.get("/by-ref/{collection}/{number}", response_model=HadithDetail) +async def get_hadith_by_reference( + collection: str = Path(..., description="Collection name, e.g. 'Sahih Bukhari'"), + number: int = Path(..., description="Hadith number within the collection"), +): + """Lookup hadith by collection name + hadith number.""" + row = db.pg_query_one(""" + SELECT h.id FROM hadiths h + JOIN collections c ON c.id = h.collection_id + WHERE c.name_english = %s AND h.hadith_number = %s + """, (collection, number)) + if not row: + raise HTTPException(status_code=404, detail=f"Hadith {collection} #{number} not found") + return await get_hadith(str(row["id"])) + + +# ── List by collection (paginated) ───────────────────────────────────────── + +@router.get("/collection/{collection_name}", response_model=PaginatedResponse) async def list_by_collection( - collection_name: str, + collection_name: str = Path(..., description="Collection name"), page: int = Query(1, ge=1, description="Page number"), - per_page: int = Query(20, ge=1, le=100, description="Results per page"), + per_page: int = Query(20, ge=1, le=100, description="Items per page"), ): """List hadiths in a collection with pagination.""" - offset = (page - 1) * per_page - - total_row = db.pg_query_one(""" - SELECT COUNT(*) AS total - FROM hadiths h + total = db.pg_count(""" + SELECT count(*) FROM hadiths h JOIN collections c ON c.id = h.collection_id - WHERE c.name_english ILIKE %s - """, (f"%{collection_name}%",)) - total = total_row["total"] if total_row else 0 + WHERE c.name_english = %s + """, (collection_name,)) + offset = (page - 1) * per_page rows = db.pg_query(""" SELECT h.id, c.name_english AS collection, h.hadith_number, h.grade, LEFT(h.arabic_text, 300) AS arabic_text FROM hadiths h JOIN collections c ON c.id = h.collection_id - WHERE c.name_english ILIKE %s + WHERE c.name_english = %s ORDER BY h.hadith_number LIMIT %s OFFSET %s - """, (f"%{collection_name}%", per_page, offset)) + """, (collection_name, per_page, offset)) - return PaginatedResponse( - meta=PaginationMeta( - total=total, page=page, per_page=per_page, - pages=(total + per_page - 1) // per_page, - ), - data=[HadithSummary( - id=str(r["id"]), collection=r["collection"], - hadith_number=r["hadith_number"], grade=r["grade"], - arabic_text=r["arabic_text"], - ) for r in rows], - ) + data = [HadithSummary(id=str(r["id"]), **{k: r[k] for k in r if k != "id"}) for r in rows] + return PaginatedResponse(data=data, meta=_paginate(total, page, per_page)) -@router.get("/number/{collection_name}/{number}", response_model=HadithDetail) -async def get_by_number(collection_name: str, number: int): - """Get a hadith by collection name and number.""" - hadith = db.pg_query_one(""" - SELECT h.id - FROM hadiths h - JOIN collections c ON c.id = h.collection_id - WHERE c.name_english ILIKE %s AND h.hadith_number = %s - """, (f"%{collection_name}%", number)) +# ── Keyword search (paginated, normalized) ───────────────────────────────── - if not hadith: - raise HTTPException(status_code=404, detail=f"Hadith #{number} not found in {collection_name}") - - return await get_hadith(str(hadith["id"])) - - -@router.get("/search/keyword", response_model=PaginatedResponse, - summary="Search hadiths by Arabic keyword", - description="Full-text keyword search across all hadith Arabic text. " - "Supports both vocalized (مَكَّةَ) and unvocalized (مكة) Arabic.") -async def search_by_keyword( - q: str = Query( - ..., min_length=2, - description="Arabic keyword to search. Examples: صلاة (prayer), زكاة (zakat), صيام (fasting), حج (hajj), نية (intention)", - examples=["صلاة", "الجنة", "رمضان"], - ), - collection: Optional[str] = Query( - None, - description="Filter by collection name. Examples: Sahih Bukhari, Sahih Muslim, Sunan Abu Dawood", - examples=["Sahih Bukhari"], - ), - grade: Optional[str] = Query( - None, - description="Filter by hadith grade. Examples: Sahih, Hasan, Da'if", - examples=["Sahih"], - ), - page: int = Query(1, ge=1, description="Page number (1-indexed)"), - per_page: int = Query(20, ge=1, le=100, description="Results per page (max 100)"), +@router.get("/search/keyword", response_model=PaginatedResponse) +async def search_keyword( + q: str = Query(..., min_length=2, description="Arabic keyword(s) — diacritics stripped automatically"), + collection: Optional[str] = Query(None, description="Filter by collection"), + page: int = Query(1, ge=1), + per_page: int = Query(20, ge=1, le=100), ): - """Search hadiths by Arabic keyword in text.""" - offset = (page - 1) * per_page - - conditions = ["h.arabic_text ILIKE %s"] - params = [f"%{q}%"] + """Keyword search in Arabic hadith text. Query is normalized for consistent matching.""" + q_norm = normalize_query(q) + where = "WHERE h.arabic_text ILIKE %s" + params: list = [f"%{q_norm}%"] if collection: - conditions.append("c.name_english ILIKE %s") - params.append(f"%{collection}%") - if grade: - conditions.append("h.grade ILIKE %s") - params.append(f"%{grade}%") + where += " AND c.name_english = %s" + params.append(collection) - where = " AND ".join(conditions) - - total_row = db.pg_query_one(f""" - SELECT COUNT(*) AS total - FROM hadiths h - JOIN collections c ON c.id = h.collection_id - WHERE {where} - """, tuple(params)) - total = total_row["total"] if total_row else 0 - - params.extend([per_page, offset]) - rows = db.pg_query(f""" - SELECT h.id, c.name_english AS collection, h.hadith_number, - h.grade, LEFT(h.arabic_text, 300) AS arabic_text - FROM hadiths h - JOIN collections c ON c.id = h.collection_id - WHERE {where} - ORDER BY c.name_english, h.hadith_number - LIMIT %s OFFSET %s - """, tuple(params)) - - return PaginatedResponse( - meta=PaginationMeta( - total=total, page=page, per_page=per_page, - pages=(total + per_page - 1) // per_page, - ), - data=[HadithSummary( - id=str(r["id"]), collection=r["collection"], - hadith_number=r["hadith_number"], grade=r["grade"], - arabic_text=r["arabic_text"], - ) for r in rows], + total = db.pg_count( + f"SELECT count(*) FROM hadiths h JOIN collections c ON c.id = h.collection_id {where}", + tuple(params), + ) + offset = (page - 1) * per_page + rows = db.pg_query( + f"SELECT h.id, c.name_english AS collection, h.hadith_number, " + f"h.grade, LEFT(h.arabic_text, 300) AS arabic_text " + f"FROM hadiths h JOIN collections c ON c.id = h.collection_id " + f"{where} ORDER BY c.name_english, h.hadith_number " + f"LIMIT %s OFFSET %s", + tuple(params + [per_page, offset]), ) - -@router.get("/search/topic/{topic}", response_model=list[HadithSummary]) -async def search_by_topic(topic: str, limit: int = Query(20, ge=1, le=100)): - """Search hadiths by topic tag (from Neo4j).""" - rows = db.neo4j_query(""" - CALL db.index.fulltext.queryNodes('hadith_arabic_text', $topic) - YIELD node, score - RETURN node.id AS id, - node.collection AS collection, - node.hadith_number AS hadith_number, - node.grade AS grade, - left(node.matn_text, 300) AS matn_text, - score - ORDER BY score DESC - LIMIT $limit - """, {"topic": topic, "limit": limit}) - - return [HadithSummary( - id=str(r["id"]), collection=r["collection"] or "", - hadith_number=r["hadith_number"] or 0, grade=r["grade"], - matn_text=r["matn_text"], - ) for r in rows] + data = [HadithSummary(id=str(r["id"]), **{k: r[k] for k in r if k != "id"}) for r in rows] + return PaginatedResponse(data=data, meta=_paginate(total, page, per_page)) -@router.get("/search/narrator/{narrator_name}", response_model=list[HadithSummary], - summary="Find hadiths by narrator", - description="Find all hadiths where a specific narrator appears in the chain. " - "Searches both Arabic name and transliteration. " - "Example: `/hadiths/search/narrator/أبو هريرة`") -async def search_by_narrator( - narrator_name: str, - limit: int = Query(50, ge=1, le=200, description="Maximum results"), +# ── Search by topic (paginated, normalized) ──────────────────────────────── + +@router.get("/search/topic", response_model=PaginatedResponse) +async def search_by_topic( + q: str = Query(..., min_length=2, description="Topic keyword (Arabic or English)"), + page: int = Query(1, ge=1), + per_page: int = Query(20, ge=1, le=100), ): - """Find all hadiths narrated by a specific person.""" + """Find hadiths by topic tag from the knowledge graph.""" + q_norm = normalize_query(q) + skip = (page - 1) * per_page + + total = db.neo4j_count(""" + MATCH (t:Topic)<-[:HAS_TOPIC]-(h:Hadith) + WHERE toLower(t.topic_arabic) CONTAINS toLower($q) + OR toLower(t.topic_english) CONTAINS toLower($q) + RETURN count(DISTINCT h) AS count + """, {"q": q_norm}) + rows = db.neo4j_query(""" - MATCH (n:Narrator)-[r:APPEARS_IN]->(h:Hadith) - WHERE n.name_arabic CONTAINS $name - OR n.name_transliterated CONTAINS $name - RETURN h.id AS id, + MATCH (t:Topic)<-[:HAS_TOPIC]-(h:Hadith) + WHERE toLower(t.topic_arabic) CONTAINS toLower($q) + OR toLower(t.topic_english) CONTAINS toLower($q) + RETURN DISTINCT h.id AS id, h.collection AS collection, h.hadith_number AS hadith_number, h.grade AS grade, - left(h.matn_text, 300) AS matn_text + substring(h.arabic_text, 0, 300) AS arabic_text ORDER BY h.collection, h.hadith_number - LIMIT $limit - """, {"name": narrator_name, "limit": limit}) + SKIP $skip LIMIT $limit + """, {"q": q_norm, "skip": skip, "limit": per_page}) - return [HadithSummary( - id=str(r["id"]), collection=r["collection"] or "", - hadith_number=r["hadith_number"] or 0, grade=r["grade"], - matn_text=r["matn_text"], - ) for r in rows] + data = [HadithSummary(**r) for r in rows] + return PaginatedResponse(data=data, meta=_paginate(total, page, per_page)) + + +# ── Search by narrator (paginated, normalized) ───────────────────────────── + +@router.get("/search/narrator", response_model=PaginatedResponse) +async def search_by_narrator( + q: str = Query(..., min_length=2, description="Narrator name (Arabic)"), + page: int = Query(1, ge=1), + per_page: int = Query(20, ge=1, le=100), +): + """Find all hadiths where a narrator appears in the chain.""" + q_norm = normalize_query(q) + skip = (page - 1) * per_page + + total = db.neo4j_count(""" + MATCH (n:Narrator)-[:APPEARS_IN]->(h:Hadith) + WHERE toLower(n.name_arabic) CONTAINS toLower($q) + RETURN count(DISTINCT h) AS count + """, {"q": q_norm}) + + rows = db.neo4j_query(""" + MATCH (n:Narrator)-[:APPEARS_IN]->(h:Hadith) + WHERE toLower(n.name_arabic) CONTAINS toLower($q) + RETURN DISTINCT h.id AS id, + h.collection AS collection, + h.hadith_number AS hadith_number, + h.grade AS grade, + substring(h.arabic_text, 0, 300) AS arabic_text + ORDER BY h.collection, h.hadith_number + SKIP $skip LIMIT $limit + """, {"q": q_norm, "skip": skip, "limit": per_page}) + + data = [HadithSummary(**r) for r in rows] + return PaginatedResponse(data=data, meta=_paginate(total, page, per_page)) diff --git a/app/routers/narrators.py b/app/routers/narrators.py index 7a61dc4..9f42a93 100644 --- a/app/routers/narrators.py +++ b/app/routers/narrators.py @@ -1,317 +1,436 @@ """ -Narrator endpoints — profiles, teacher/student network, relationships, who met who. +Narrator endpoints — search, profiles, network queries. +All queries normalize Arabic input to match post-dedup graph data. """ -from fastapi import APIRouter, Query, HTTPException +from fastapi import APIRouter, Query, Path, HTTPException from typing import Optional from app.services.database import db +from app.utils.arabic import normalize_query, normalize_name from app.models.schemas import ( - NarratorProfile, NarratorSummary, HadithSummary, - NarratorInteraction, PlaceRelation, - PaginatedResponse, PaginationMeta, + NarratorSummary, NarratorProfile, NarratorInteraction, + NarratorConnection, NarratorNetwork, + WhoMetWhoResult, PathNode, PlaceRelation, NameForm, FamilyInfo, + HadithSummary, PaginatedResponse, PaginationMeta, ) router = APIRouter(prefix="/narrators", tags=["Narrators"]) -@router.get("/search", response_model=list[NarratorSummary], - summary="Search narrators by name", - description="Full-text search across narrator names in both Arabic and Latin transliteration. " - "Uses Neo4j full-text index for fast matching.") +def _paginate(total: int, page: int, per_page: int) -> PaginationMeta: + pages = max(1, (total + per_page - 1) // per_page) + return PaginationMeta(total=total, page=page, per_page=per_page, pages=pages) + + +# ── Search narrators by name (paginated, normalized) ─────────────────────── + +@router.get("/search", response_model=PaginatedResponse) async def search_narrators( - q: str = Query( - ..., min_length=2, - description="Narrator name in Arabic or transliteration. Examples: أبو هريرة, الزهري, Anas, Bukhari", - examples=["أبو هريرة", "الزهري", "Anas ibn Malik"], - ), - limit: int = Query(20, ge=1, le=100, description="Maximum results to return"), + q: str = Query(..., min_length=2, description="Narrator name (Arabic). Diacritics stripped automatically."), + page: int = Query(1, ge=1), + per_page: int = Query(20, ge=1, le=100), ): - """Search narrators by name (Arabic or transliterated).""" + """ + Search narrators by Arabic name. Input is normalized to match + the deduplicated graph (diacritics stripped, characters unified). + """ + q_norm = normalize_query(q) + skip = (page - 1) * per_page + + total = db.neo4j_count(""" + MATCH (n:Narrator) + WHERE toLower(n.name_arabic) CONTAINS toLower($q) + RETURN count(n) AS count + """, {"q": q_norm}) + rows = db.neo4j_query(""" - CALL db.index.fulltext.queryNodes('narrator_names', $query) - YIELD node, score - WITH node AS n, score + MATCH (n:Narrator) + WHERE toLower(n.name_arabic) CONTAINS toLower($q) OPTIONAL MATCH (n)-[:APPEARS_IN]->(h:Hadith) RETURN n.name_arabic AS name_arabic, n.name_transliterated AS name_transliterated, n.entity_type AS entity_type, n.generation AS generation, n.reliability_grade AS reliability_grade, - count(h) AS hadith_count, - score - ORDER BY score DESC - LIMIT $limit - """, {"query": q, "limit": limit}) + count(DISTINCT h) AS hadith_count + ORDER BY hadith_count DESC + SKIP $skip LIMIT $limit + """, {"q": q_norm, "skip": skip, "limit": per_page}) - return [NarratorSummary(**r) for r in rows] + data = [NarratorSummary(**r) for r in rows] + return PaginatedResponse(data=data, meta=_paginate(total, page, per_page)) -@router.get("/profile/{name_arabic}", response_model=NarratorProfile, - summary="Get full narrator profile", - description="Complete narrator profile for the mobile app. Includes biography from classical " - "scholarship (Tahdhib al-Kamal, Taqrib al-Tahdhib), teacher/student network, " - "hadiths narrated, places, and tribal affiliations. " - "Example: `/narrators/profile/أبو هريرة`") -async def get_narrator_profile(name_arabic: str): +# ── Full narrator profile ────────────────────────────────────────────────── + +@router.get("/profile/{name_arabic}", response_model=NarratorProfile) +async def get_narrator_profile( + name_arabic: str = Path(..., description="Narrator Arabic name (exact or close match)"), +): """ - Full narrator profile — biography, hadiths, teachers, students, - places, tribes. Powers the mobile app profile page. + Complete narrator profile — biography, hadiths, teachers, students, places, tribes. + This is the mobile app profile page query. """ - # Basic info + q_norm = normalize_name(name_arabic) + + # Find the narrator node — exact first, then CONTAINS fallback narrator = db.neo4j_query_one(""" - MATCH (n:Narrator {name_arabic: $name}) - RETURN n.name_arabic AS name_arabic, - n.name_transliterated AS name_transliterated, - n.entity_type AS entity_type, - n.full_nasab AS full_nasab, - n.kunya AS kunya, - n.nisba AS nisba, - n.laqab AS laqab, - n.generation AS generation, - n.reliability_grade AS reliability_grade, - n.reliability_detail AS reliability_detail, - n.birth_year_hijri AS birth_year_hijri, - n.death_year_hijri AS death_year_hijri, - n.birth_year_ce AS birth_year_ce, - n.death_year_ce AS death_year_ce, - n.biography_summary_arabic AS biography_summary_arabic, - n.biography_summary_english AS biography_summary_english, - n.total_hadiths_narrated_approx AS total_hadiths_narrated_approx, - n.bio_verified AS bio_verified - """, {"name": name_arabic}) + MATCH (n:Narrator) + WHERE toLower(n.name_arabic) = toLower($q) + RETURN n + """, {"q": q_norm}) if not narrator: - raise HTTPException(status_code=404, detail="Narrator not found") + narrator = db.neo4j_query_one(""" + MATCH (n:Narrator) + WHERE toLower(n.name_arabic) CONTAINS toLower($q) + RETURN n + """, {"q": q_norm}) - # Hadiths - hadiths = db.neo4j_query(""" + if not narrator: + raise HTTPException(status_code=404, detail=f"Narrator not found: {name_arabic}") + + n = narrator.get("n", {}) + actual_name = n.get("name_arabic", q_norm) + + # Hadith count + collections + stats = db.neo4j_query_one(""" + MATCH (n:Narrator {name_arabic: $name})-[:APPEARS_IN]->(h:Hadith) + RETURN count(DISTINCT h) AS hadith_count, + collect(DISTINCT h.collection) AS collections + """, {"name": actual_name}) or {} + + # Teachers: narrator NARRATED_FROM teacher + teacher TEACHER_OF narrator + teachers_nf = db.neo4j_query(""" + MATCH (n:Narrator {name_arabic: $name})-[:NARRATED_FROM]->(t:Narrator) + OPTIONAL MATCH (t)-[:APPEARS_IN]->(h:Hadith) + RETURN t.name_arabic AS name_arabic, + t.name_transliterated AS name_transliterated, + t.entity_type AS entity_type, + t.generation AS generation, + t.reliability_grade AS reliability_grade, + count(DISTINCT h) AS hadith_count + """, {"name": actual_name}) + + teachers_to = db.neo4j_query(""" + MATCH (t:Narrator)-[:TEACHER_OF]->(n:Narrator {name_arabic: $name}) + OPTIONAL MATCH (t)-[:APPEARS_IN]->(h:Hadith) + RETURN t.name_arabic AS name_arabic, + t.name_transliterated AS name_transliterated, + t.entity_type AS entity_type, + t.generation AS generation, + t.reliability_grade AS reliability_grade, + count(DISTINCT h) AS hadith_count + """, {"name": actual_name}) + + # Deduplicate teachers + seen_teachers = set() + teachers = [] + for r in teachers_nf + teachers_to: + if r["name_arabic"] not in seen_teachers: + seen_teachers.add(r["name_arabic"]) + teachers.append(NarratorSummary(**r)) + + # Students: student NARRATED_FROM narrator + narrator TEACHER_OF student + students_nf = db.neo4j_query(""" + MATCH (s:Narrator)-[:NARRATED_FROM]->(n:Narrator {name_arabic: $name}) + OPTIONAL MATCH (s)-[:APPEARS_IN]->(h:Hadith) + RETURN s.name_arabic AS name_arabic, + s.name_transliterated AS name_transliterated, + s.entity_type AS entity_type, + s.generation AS generation, + s.reliability_grade AS reliability_grade, + count(DISTINCT h) AS hadith_count + """, {"name": actual_name}) + + students_to = db.neo4j_query(""" + MATCH (n:Narrator {name_arabic: $name})-[:TEACHER_OF]->(s:Narrator) + OPTIONAL MATCH (s)-[:APPEARS_IN]->(h:Hadith) + RETURN s.name_arabic AS name_arabic, + s.name_transliterated AS name_transliterated, + s.entity_type AS entity_type, + s.generation AS generation, + s.reliability_grade AS reliability_grade, + count(DISTINCT h) AS hadith_count + """, {"name": actual_name}) + + seen_students = set() + students = [] + for r in students_nf + students_to: + if r["name_arabic"] not in seen_students: + seen_students.add(r["name_arabic"]) + students.append(NarratorSummary(**r)) + + # Places + places_rows = db.neo4j_query(""" + MATCH (n:Narrator {name_arabic: $name})-[r]->(p:Place) + WHERE type(r) IN ['BORN_IN', 'LIVED_IN', 'DIED_IN', 'TRAVELED_TO'] + RETURN p.name_arabic AS place, type(r) AS relation + """, {"name": actual_name}) + + # Tribes + tribe_rows = db.neo4j_query(""" + MATCH (n:Narrator {name_arabic: $name})-[:BELONGS_TO_TRIBE]->(t:Tribe) + RETURN t.name_arabic AS name + """, {"name": actual_name}) + + # Name forms (alternative names via RELATED_TO) + name_form_rows = db.neo4j_query(""" + MATCH (n:Narrator {name_arabic: $name})-[:RELATED_TO]-(alt:Narrator) + WHERE alt.name_arabic <> $name + RETURN alt.name_arabic AS name, alt.entity_type AS type + """, {"name": actual_name}) + + # Family info + family_row = db.neo4j_query_one(""" + MATCH (n:Narrator {name_arabic: $name}) + RETURN n.father AS father, n.mother AS mother, + n.spouse AS spouse, n.children AS children + """, {"name": actual_name}) + family = None + if family_row and any(family_row.get(k) for k in ["father", "mother", "spouse", "children"]): + family = FamilyInfo( + father=family_row.get("father"), + mother=family_row.get("mother"), + spouse=family_row.get("spouse"), + children=family_row.get("children") or [], + ) + + # Sample hadiths + hadith_rows = db.neo4j_query(""" MATCH (n:Narrator {name_arabic: $name})-[:APPEARS_IN]->(h:Hadith) RETURN h.id AS id, h.collection AS collection, h.hadith_number AS hadith_number, h.grade AS grade, - left(h.matn_text, 200) AS matn_text + substring(h.arabic_text, 0, 300) AS arabic_text ORDER BY h.collection, h.hadith_number LIMIT 50 - """, {"name": name_arabic}) - - # Teachers (who taught this narrator) - teachers = db.neo4j_query(""" - MATCH (teacher:Narrator)-[:TEACHER_OF]->(n:Narrator {name_arabic: $name}) - OPTIONAL MATCH (teacher)-[:APPEARS_IN]->(h:Hadith) - RETURN teacher.name_arabic AS name_arabic, - teacher.name_transliterated AS name_transliterated, - teacher.entity_type AS entity_type, - teacher.generation AS generation, - teacher.reliability_grade AS reliability_grade, - count(h) AS hadith_count - """, {"name": name_arabic}) - - # Students (who this narrator taught) - students = db.neo4j_query(""" - MATCH (n:Narrator {name_arabic: $name})-[:TEACHER_OF]->(student:Narrator) - OPTIONAL MATCH (student)-[:APPEARS_IN]->(h:Hadith) - RETURN student.name_arabic AS name_arabic, - student.name_transliterated AS name_transliterated, - student.entity_type AS entity_type, - student.generation AS generation, - student.reliability_grade AS reliability_grade, - count(h) AS hadith_count - """, {"name": name_arabic}) - - # Places - places = db.neo4j_query(""" - MATCH (n:Narrator {name_arabic: $name})-[r:BORN_IN|LIVED_IN|DIED_IN|TRAVELED_TO]->(p:Place) - RETURN p.name_arabic AS place, type(r) AS relation - """, {"name": name_arabic}) - - # Tribes - tribes_rows = db.neo4j_query(""" - MATCH (n:Narrator {name_arabic: $name})-[:BELONGS_TO_TRIBE]->(t:Tribe) - RETURN t.name_arabic AS tribe - """, {"name": name_arabic}) + """, {"name": actual_name}) return NarratorProfile( - **narrator, - hadith_count=len(hadiths), - hadiths=[HadithSummary( - id=str(h["id"]), collection=h["collection"] or "", - hadith_number=h["hadith_number"] or 0, grade=h["grade"], - matn_text=h["matn_text"], - ) for h in hadiths], - teachers=[NarratorSummary(**t) for t in teachers], - students=[NarratorSummary(**s) for s in students], - places=[PlaceRelation(**p) for p in places], - tribes=[t["tribe"] for t in tribes_rows], + name_arabic=n.get("name_arabic", actual_name), + name_transliterated=n.get("name_transliterated", ""), + entity_type=n.get("entity_type", ""), + full_nasab=n.get("full_nasab"), + kunya=n.get("kunya"), + nisba=n.get("nisba"), + laqab=n.get("laqab"), + generation=n.get("generation"), + reliability_grade=n.get("reliability_grade"), + reliability_detail=n.get("reliability_detail"), + birth_year_hijri=n.get("birth_year_hijri"), + death_year_hijri=n.get("death_year_hijri"), + birth_year_ce=n.get("birth_year_ce"), + death_year_ce=n.get("death_year_ce"), + biography_summary_arabic=n.get("biography_summary_arabic"), + biography_summary_english=n.get("biography_summary_english"), + total_hadiths_narrated_approx=n.get("total_hadiths_narrated_approx"), + hadith_count=stats.get("hadith_count", 0), + hadiths=[HadithSummary(**r) for r in hadith_rows], + teachers=teachers, + students=students, + name_forms=[NameForm(**r) for r in name_form_rows], + family=family, + places=[PlaceRelation(**r) for r in places_rows], + tribes=[r["name"] for r in tribe_rows], + bio_verified=n.get("bio_verified", False), ) -@router.get("/by-generation/{generation}", response_model=list[NarratorSummary]) +# ── Narrators by generation (paginated, normalized) ──────────────────────── + +@router.get("/by-generation/{generation}", response_model=PaginatedResponse) async def narrators_by_generation( - generation: str, - limit: int = Query(50, ge=1, le=200), + generation: str = Path(..., description="Generation: صحابي, تابعي, تابع التابعين, نبي"), + page: int = Query(1, ge=1), + per_page: int = Query(20, ge=1, le=100), ): - """List narrators by generation (صحابي, تابعي, etc.).""" + """List narrators by generation (e.g. Companions, Successors).""" + q_norm = normalize_query(generation) + skip = (page - 1) * per_page + + total = db.neo4j_count(""" + MATCH (n:Narrator) + WHERE toLower(n.generation) CONTAINS toLower($gen) + RETURN count(n) AS count + """, {"gen": q_norm}) + rows = db.neo4j_query(""" MATCH (n:Narrator) - WHERE n.generation CONTAINS $gen + WHERE toLower(n.generation) CONTAINS toLower($gen) OPTIONAL MATCH (n)-[:APPEARS_IN]->(h:Hadith) RETURN n.name_arabic AS name_arabic, n.name_transliterated AS name_transliterated, n.entity_type AS entity_type, n.generation AS generation, n.reliability_grade AS reliability_grade, - count(h) AS hadith_count + count(DISTINCT h) AS hadith_count ORDER BY hadith_count DESC - LIMIT $limit - """, {"gen": generation, "limit": limit}) + SKIP $skip LIMIT $limit + """, {"gen": q_norm, "skip": skip, "limit": per_page}) - return [NarratorSummary(**r) for r in rows] + data = [NarratorSummary(**r) for r in rows] + return PaginatedResponse(data=data, meta=_paginate(total, page, per_page)) -@router.get("/by-place/{place_name}", response_model=list[NarratorSummary]) +# ── Narrators by place (paginated, normalized) ───────────────────────────── + +@router.get("/by-place/{place_name}", response_model=PaginatedResponse) async def narrators_by_place( - place_name: str, - limit: int = Query(50, ge=1, le=200), + place_name: str = Path(..., description="Place name in Arabic (e.g. مكة)"), + page: int = Query(1, ge=1), + per_page: int = Query(50, ge=1, le=100), ): - """Find narrators associated with a place.""" + """ + Narrators associated with a place (born, lived, died, traveled). + Input is normalized — مكة المكرمة matches مكه المكرمه. + """ + q_norm = normalize_query(place_name) + skip = (page - 1) * per_page + + total = db.neo4j_count(""" + MATCH (n:Narrator)-[r]->(p:Place) + WHERE type(r) IN ['BORN_IN', 'LIVED_IN', 'DIED_IN', 'TRAVELED_TO'] + AND toLower(p.name_arabic) CONTAINS toLower($place) + RETURN count(DISTINCT n) AS count + """, {"place": q_norm}) + rows = db.neo4j_query(""" - MATCH (n:Narrator)-[:BORN_IN|LIVED_IN|DIED_IN|TRAVELED_TO]->(p:Place) - WHERE p.name_arabic CONTAINS $place + MATCH (n:Narrator)-[r]->(p:Place) + WHERE type(r) IN ['BORN_IN', 'LIVED_IN', 'DIED_IN', 'TRAVELED_TO'] + AND toLower(p.name_arabic) CONTAINS toLower($place) OPTIONAL MATCH (n)-[:APPEARS_IN]->(h:Hadith) RETURN DISTINCT n.name_arabic AS name_arabic, n.name_transliterated AS name_transliterated, n.entity_type AS entity_type, n.generation AS generation, n.reliability_grade AS reliability_grade, - count(h) AS hadith_count + count(DISTINCT h) AS hadith_count ORDER BY hadith_count DESC - LIMIT $limit - """, {"place": place_name, "limit": limit}) + SKIP $skip LIMIT $limit + """, {"place": q_norm, "skip": skip, "limit": per_page}) - return [NarratorSummary(**r) for r in rows] + data = [NarratorSummary(**r) for r in rows] + return PaginatedResponse(data=data, meta=_paginate(total, page, per_page)) -@router.get("/interactions/{name_arabic}", response_model=list[NarratorInteraction], - summary="Get all narrator interactions", - description="Lists all relationships for a narrator: who they narrated from, " - "who narrated from them, their teachers, and their students. " - "Each interaction includes shared hadith count. " - "Example: `/narrators/interactions/الزهري`") -async def get_interactions( - name_arabic: str, - limit: int = Query(50, ge=1, le=200, description="Maximum interactions to return"), +# ── Narrator interactions ────────────────────────────────────────────────── + +@router.get("/interactions/{name_arabic}", response_model=list[NarratorInteraction]) +async def narrator_interactions( + name_arabic: str = Path(..., description="Narrator Arabic name"), + limit: int = Query(50, ge=1, le=200), ): - """ - Get all interactions of a narrator — who they narrated from, - who narrated from them, teachers, students. - """ + """All direct relationships for a narrator — who they narrated from/to.""" + q_norm = normalize_name(name_arabic) + rows = db.neo4j_query(""" - MATCH (n:Narrator {name_arabic: $name}) - OPTIONAL MATCH (n)-[r1:NARRATED_FROM]->(other1:Narrator) - WITH n, collect(DISTINCT { - narrator_b: other1.name_arabic, - narrator_b_trans: other1.name_transliterated, - type: 'NARRATED_FROM', - hadith_ids: r1.hadith_ids - }) AS outgoing - OPTIONAL MATCH (other2:Narrator)-[r2:NARRATED_FROM]->(n) - WITH n, outgoing, collect(DISTINCT { - narrator_b: other2.name_arabic, - narrator_b_trans: other2.name_transliterated, - type: 'HEARD_BY', - hadith_ids: r2.hadith_ids - }) AS incoming - OPTIONAL MATCH (teacher:Narrator)-[r3:TEACHER_OF]->(n) - WITH n, outgoing, incoming, collect(DISTINCT { - narrator_b: teacher.name_arabic, - narrator_b_trans: teacher.name_transliterated, - type: 'TEACHER_OF', - hadith_ids: [] - }) AS teacher_rels - OPTIONAL MATCH (n)-[r4:TEACHER_OF]->(student:Narrator) - WITH n, outgoing, incoming, teacher_rels, collect(DISTINCT { - narrator_b: student.name_arabic, - narrator_b_trans: student.name_transliterated, - type: 'STUDENT_OF', - hadith_ids: [] - }) AS student_rels - RETURN n.name_arabic AS narrator_a, - n.name_transliterated AS narrator_a_trans, - outgoing + incoming + teacher_rels + student_rels AS interactions - """, {"name": name_arabic}) - - if not rows: - raise HTTPException(status_code=404, detail="Narrator not found") - - result = [] - row = rows[0] - for interaction in row["interactions"]: - if not interaction.get("narrator_b"): - continue - hadith_ids = interaction.get("hadith_ids") or [] - result.append(NarratorInteraction( - narrator_a=row["narrator_a"], - narrator_a_transliterated=row.get("narrator_a_trans") or "", - narrator_b=interaction["narrator_b"], - narrator_b_transliterated=interaction.get("narrator_b_trans") or "", - relationship_type=interaction["type"], - shared_hadith_count=len(hadith_ids), - hadith_ids=[str(h) for h in hadith_ids[:20]], - )) - - return result[:limit] - - -@router.get("/who-met-who", response_model=list[NarratorInteraction], - summary="Check if two narrators are connected", - description="Finds the shortest path between two narrators in the knowledge graph. " - "Reveals whether they had a direct or indirect relationship through " - "narration chains, teacher/student bonds, or shared connections. " - "Example: `/narrators/who-met-who?narrator_a=الزهري&narrator_b=أنس بن مالك`") -async def who_met_who( - narrator_a: str = Query( - ..., description="First narrator name (Arabic). Example: الزهري", - examples=["الزهري", "أبو هريرة"], - ), - narrator_b: str = Query( - ..., description="Second narrator name (Arabic). Example: أنس بن مالك", - examples=["أنس بن مالك", "عمر بن الخطاب"], - ), -): - """ - Check if two narrators had a relationship — did they meet, - narrate from each other, or share a teacher/student bond? - """ - rows = db.neo4j_query(""" - MATCH (a:Narrator), (b:Narrator) - WHERE a.name_arabic CONTAINS $name_a - AND b.name_arabic CONTAINS $name_b - OPTIONAL MATCH path = shortestPath((a)-[*..6]-(b)) - WITH a, b, path, - [r IN relationships(path) | { - type: type(r), - from: startNode(r).name_arabic, - from_trans: startNode(r).name_transliterated, - to: endNode(r).name_arabic, - to_trans: endNode(r).name_transliterated - }] AS rels + MATCH (a:Narrator)-[r]-(b:Narrator) + WHERE toLower(a.name_arabic) CONTAINS toLower($name) + AND type(r) IN ['NARRATED_FROM', 'TEACHER_OF'] + WITH a, b, type(r) AS rel_type + OPTIONAL MATCH (a)-[:APPEARS_IN]->(h:Hadith)<-[:APPEARS_IN]-(b) RETURN a.name_arabic AS narrator_a, - a.name_transliterated AS narrator_a_trans, + a.name_transliterated AS narrator_a_transliterated, b.name_arabic AS narrator_b, - b.name_transliterated AS narrator_b_trans, - length(path) AS distance, - rels - """, {"name_a": narrator_a, "name_b": narrator_b}) + b.name_transliterated AS narrator_b_transliterated, + rel_type AS relationship_type, + count(DISTINCT h) AS shared_hadith_count, + collect(DISTINCT h.id)[..20] AS hadith_ids + ORDER BY shared_hadith_count DESC + LIMIT $limit + """, {"name": q_norm, "limit": limit}) - if not rows or rows[0].get("distance") is None: - return [] + return [NarratorInteraction(**r) for r in rows] - row = rows[0] - return [NarratorInteraction( - narrator_a=rel["from"], - narrator_a_transliterated=rel.get("from_trans") or "", - narrator_b=rel["to"], - narrator_b_transliterated=rel.get("to_trans") or "", - relationship_type=rel["type"], - ) for rel in (row.get("rels") or [])] + +# ── Narrator network (graph visualization) ───────────────────────────────── + +@router.get("/network/{name_arabic}", response_model=NarratorNetwork) +async def narrator_network( + name_arabic: str = Path(..., description="Narrator Arabic name"), + limit: int = Query(50, ge=1, le=200), +): + """ + Get a narrator's connection network — all incoming/outgoing relationships. + Useful for network visualization. + """ + q_norm = normalize_name(name_arabic) + + # Center narrator + center_row = db.neo4j_query_one(""" + MATCH (n:Narrator) + WHERE toLower(n.name_arabic) CONTAINS toLower($name) + OPTIONAL MATCH (n)-[:APPEARS_IN]->(h:Hadith) + RETURN n.name_arabic AS name_arabic, + n.name_transliterated AS name_transliterated, + n.entity_type AS entity_type, + n.generation AS generation, + n.reliability_grade AS reliability_grade, + count(DISTINCT h) AS hadith_count + """, {"name": q_norm}) + + if not center_row: + raise HTTPException(status_code=404, detail=f"Narrator not found: {name_arabic}") + + # Connections + conn_rows = db.neo4j_query(""" + MATCH (a:Narrator)-[r]-(b:Narrator) + WHERE toLower(a.name_arabic) CONTAINS toLower($name) + AND type(r) IN ['NARRATED_FROM', 'TEACHER_OF'] + RETURN b.name_arabic AS narrator, + b.name_transliterated AS narrator_transliterated, + type(r) AS connection_type, + CASE WHEN startNode(r) = a THEN 'outgoing' ELSE 'incoming' END AS direction + LIMIT $limit + """, {"name": q_norm, "limit": limit}) + + return NarratorNetwork( + center=NarratorSummary(**center_row), + connections=[NarratorConnection(**r) for r in conn_rows], + total_connections=len(conn_rows), + ) + + +# ── Who met who (shortest path) ──────────────────────────────────────────── + +@router.get("/who-met-who", response_model=WhoMetWhoResult) +async def who_met_who( + narrator_a: str = Query(..., description="First narrator (Arabic)"), + narrator_b: str = Query(..., description="Second narrator (Arabic)"), +): + """ + Shortest path between two narrators in the knowledge graph. + Useful to see how a narrator connects to the Prophet ﷺ. + """ + a_norm = normalize_name(narrator_a) + b_norm = normalize_name(narrator_b) + + row = db.neo4j_query_one(""" + MATCH (a:Narrator), (b:Narrator) + WHERE toLower(a.name_arabic) CONTAINS toLower($a) + AND toLower(b.name_arabic) CONTAINS toLower($b) + WITH a, b LIMIT 1 + MATCH path = shortestPath((a)-[*..10]-(b)) + RETURN [n IN nodes(path) | + {name_arabic: n.name_arabic, + name_transliterated: n.name_transliterated, + generation: n.generation}] AS path_nodes, + [r IN relationships(path) | type(r)] AS rel_types, + length(path) AS path_length + """, {"a": a_norm, "b": b_norm}) + + if not row: + raise HTTPException( + status_code=404, + detail=f"No path found between '{narrator_a}' and '{narrator_b}'", + ) + + return WhoMetWhoResult( + narrator_a=narrator_a, + narrator_b=narrator_b, + path=[PathNode(**n) for n in (row.get("path_nodes") or [])], + path_length=row.get("path_length"), + relationship_types=row.get("rel_types", []), + ) diff --git a/app/routers/search.py b/app/routers/search.py index 3155ab2..e168adb 100644 --- a/app/routers/search.py +++ b/app/routers/search.py @@ -6,12 +6,16 @@ from typing import Optional from app.services.database import db from app.config import get_settings -from app.models.schemas import SemanticSearchResult, FullTextSearchResult, HadithSummary +from app.utils.arabic import normalize_query +from app.models.schemas import ( + SemanticSearchResult, FullTextSearchResult, CombinedSearchResult, + HadithSummary, +) router = APIRouter(prefix="/search", tags=["Search"]) -async def get_embedding(text: str) -> list[float]: +async def _get_embedding(text: str) -> list[float]: """Get embedding vector from TEI (BGE-M3).""" settings = get_settings() response = await db.http_client.post( @@ -22,7 +26,6 @@ async def get_embedding(text: str) -> list[float]: raise HTTPException(status_code=502, detail=f"TEI embedding failed: {response.text}") embeddings = response.json() - # TEI returns list of embeddings; we sent one input if isinstance(embeddings, list) and len(embeddings) > 0: if isinstance(embeddings[0], list): return embeddings[0] @@ -30,34 +33,25 @@ async def get_embedding(text: str) -> list[float]: raise HTTPException(status_code=502, detail="Unexpected TEI response format") -@router.get("/semantic", response_model=list[SemanticSearchResult], - summary="Semantic search (find by meaning)", - description="Search hadiths by meaning using BGE-M3 multilingual embeddings + Qdrant. " - "Supports cross-language queries: search in English and find Arabic hadiths, or vice versa. " - "Example: `what did the prophet say about fasting` → finds Arabic hadiths about صيام") +# ── Semantic search ───────────────────────────────────────────────────────── + +@router.get("/semantic", response_model=list[SemanticSearchResult]) async def semantic_search( - q: str = Query( - ..., min_length=2, - description="Search query in any language. The embedding model handles Arabic, English, and Urdu.", - examples=["what is the reward of prayer", "أحاديث عن الصيام", "حكم الربا"], - ), - collection: Optional[str] = Query( - None, - description="Filter by collection name. Example: Sahih Bukhari", - ), - limit: int = Query(10, ge=1, le=50, description="Number of results (max 50)"), + q: str = Query(..., min_length=2, description="Search query (any language — Arabic, English, etc.)"), + collection: Optional[str] = Query(None, description="Filter by collection name"), + limit: int = Query(10, ge=1, le=50), ): """ Semantic search — find hadiths by meaning, not just keywords. - Supports Arabic, English, and cross-language queries. + Supports cross-language queries (English query → Arabic results). Uses BGE-M3 embeddings + Qdrant vector search. """ + if not db.qdrant_available(): + raise HTTPException(status_code=503, detail="Qdrant unavailable") + settings = get_settings() + query_vector = await _get_embedding(q) - # Get query embedding from TEI - query_vector = await get_embedding(q) - - # Build Qdrant filter if collection specified query_filter = None if collection: from qdrant_client.models import Filter, FieldCondition, MatchValue @@ -65,7 +59,6 @@ async def semantic_search( must=[FieldCondition(key="collection", match=MatchValue(value=collection))] ) - # Search Qdrant results = db.qdrant.search( collection_name=settings.qdrant_collection, query_vector=query_vector, @@ -80,8 +73,8 @@ async def semantic_search( output.append(SemanticSearchResult( hadith=HadithSummary( id=str(payload.get("id", hit.id)), - collection=payload.get("collection", ""), - hadith_number=payload.get("hadith_number", 0), + collection=payload.get("collection"), + hadith_number=payload.get("hadith_number"), grade=payload.get("grade"), arabic_text=(payload.get("arabic_text") or "")[:300], ), @@ -92,74 +85,53 @@ async def semantic_search( return output -@router.get("/fulltext", response_model=list[FullTextSearchResult], - summary="Full-text Arabic search", - description="Keyword search using Elasticsearch with Arabic morphological analysis (stemming, root extraction). " - "Returns highlighted text fragments showing where matches occurred. " - "Handles both vocalized (الصَّلاة) and unvocalized (الصلاة) Arabic.") +# ── Full-text Arabic search ───────────────────────────────────────────────── + +@router.get("/fulltext", response_model=list[FullTextSearchResult]) async def fulltext_search( - q: str = Query( - ..., min_length=2, - description="Arabic text search query. Examples: الصلاة (prayer), النكاح (marriage), الجهاد (jihad)", - examples=["الصلاة", "صيام رمضان", "بيع وشراء"], - ), - collection: Optional[str] = Query( - None, - description="Filter by collection. Example: Sahih Muslim", - ), - limit: int = Query(10, ge=1, le=50, description="Number of results (max 50)"), + q: str = Query(..., min_length=2, description="Arabic text search query"), + collection: Optional[str] = Query(None, description="Filter by collection"), + limit: int = Query(10, ge=1, le=50), ): """ Full-text Arabic search using Elasticsearch. - Supports Arabic morphological analysis. + Supports Arabic morphological analysis (root-based matching). """ + if not db.es_available(): + raise HTTPException(status_code=503, detail="Elasticsearch unavailable") + settings = get_settings() - # Build ES query - must = [ - { - "multi_match": { - "query": q, - "fields": ["arabic_text^3", "arabic_normalized^2", "matn", "sanad"], - "type": "best_fields", - "analyzer": "arabic", - } - } - ] + must = [{"multi_match": { + "query": q, + "fields": ["arabic_text^3", "english_text", "urdu_text"], + "type": "best_fields", + "analyzer": "arabic", + }}] if collection: - must.append({"match": {"collection_name": collection}}) + must.append({"match": {"collection": collection}}) body = { "query": {"bool": {"must": must}}, "highlight": { - "fields": { - "arabic_text": {"fragment_size": 200, "number_of_fragments": 2}, - "matn": {"fragment_size": 200, "number_of_fragments": 1}, - } + "fields": {"arabic_text": {"fragment_size": 200, "number_of_fragments": 3}}, }, "size": limit, } - try: - response = db.es.search(index=settings.es_index, body=body) - except Exception as e: - # ES index might not exist yet - raise HTTPException(status_code=503, detail=f"Elasticsearch error: {str(e)}") + resp = db.es.search(index=settings.es_index, body=body) + hits = resp.get("hits", {}).get("hits", []) output = [] - for hit in response["hits"]["hits"]: + for hit in hits: src = hit["_source"] - highlights = [] - if "highlight" in hit: - for field_highlights in hit["highlight"].values(): - highlights.extend(field_highlights) - + highlights = hit.get("highlight", {}).get("arabic_text", []) output.append(FullTextSearchResult( hadith=HadithSummary( id=str(src.get("id", hit["_id"])), - collection=src.get("collection_name", ""), - hadith_number=src.get("hadith_number", 0), + collection=src.get("collection"), + hadith_number=src.get("hadith_number"), grade=src.get("grade"), arabic_text=(src.get("arabic_text") or "")[:300], ), @@ -170,38 +142,55 @@ async def fulltext_search( return output -@router.get("/combined", response_model=dict, - summary="Combined search (semantic + full-text)", - description="Runs both semantic and full-text search in parallel and returns merged results. " - "Best for the mobile app search bar — gives both meaning-based and keyword-based results. " - "Returns `{semantic: [...], fulltext: [...], query: '...'}`") +# ── Combined search (semantic + fulltext) ─────────────────────────────────── + +@router.get("/combined", response_model=list[CombinedSearchResult]) async def combined_search( - q: str = Query( - ..., min_length=2, - description="Search query. Works with Arabic keywords or natural language in any language.", - examples=["الصلاة في وقتها", "hadith about charity"], - ), - collection: Optional[str] = Query(None, description="Filter by collection name"), - limit: int = Query(10, ge=1, le=20, description="Results per search type (max 20)"), + q: str = Query(..., min_length=2, description="Search query"), + collection: Optional[str] = Query(None), + limit: int = Query(10, ge=1, le=50), + semantic_weight: float = Query(0.6, ge=0, le=1, description="Weight for semantic score (0-1)"), ): - """ - Combined search — runs both semantic and full-text in parallel, - returns merged results. Best for the mobile app search bar. - """ - import asyncio + """Combined semantic + full-text search. Results merged and ranked by weighted score.""" + results_map: dict[str, CombinedSearchResult] = {} - semantic_task = semantic_search(q=q, collection=collection, limit=limit) - # Full-text only makes sense for Arabic queries - fulltext_task = fulltext_search(q=q, collection=collection, limit=limit) + # Semantic + if db.qdrant_available(): + try: + sem_results = await semantic_search(q=q, collection=collection, limit=limit) + for sr in sem_results: + hid = sr.hadith.id + results_map[hid] = CombinedSearchResult( + hadith=sr.hadith, + semantic_score=sr.score, + combined_score=sr.score * semantic_weight, + source="semantic", + ) + except Exception: + pass - semantic_results, fulltext_results = await asyncio.gather( - semantic_task, - fulltext_task, - return_exceptions=True, - ) + # Full-text + if db.es_available(): + try: + ft_results = await fulltext_search(q=q, collection=collection, limit=limit) + ft_weight = 1.0 - semantic_weight + for fr in ft_results: + hid = fr.hadith.id + norm_score = min(fr.score / 20.0, 1.0) + if hid in results_map: + existing = results_map[hid] + existing.fulltext_score = norm_score + existing.combined_score += norm_score * ft_weight + existing.source = "both" + else: + results_map[hid] = CombinedSearchResult( + hadith=fr.hadith, + fulltext_score=norm_score, + combined_score=norm_score * ft_weight, + source="fulltext", + ) + except Exception: + pass - return { - "semantic": semantic_results if not isinstance(semantic_results, Exception) else [], - "fulltext": fulltext_results if not isinstance(fulltext_results, Exception) else [], - "query": q, - } + results = sorted(results_map.values(), key=lambda x: x.combined_score, reverse=True) + return results[:limit] diff --git a/app/services/database.py b/app/services/database.py index 1c5ad62..d2a33bc 100644 --- a/app/services/database.py +++ b/app/services/database.py @@ -1,6 +1,7 @@ """ -Database connection manager — initializes and provides access to -PostgreSQL, Neo4j, Qdrant, and Elasticsearch clients. +Database connections — PostgreSQL, Neo4j, Qdrant, Elasticsearch, TEI. +Resilient startup: each backend wrapped in try/except so the app +starts even if some services are temporarily unavailable. """ import psycopg2 import psycopg2.pool @@ -53,7 +54,7 @@ class Database: except Exception as e: print(f"⚠️ Neo4j failed: {e}") - # Qdrant + # Qdrant (URL-based connection — matches the working k8s setup) try: self.qdrant = QdrantClient( url=f"http://{settings.qdrant_host}:{settings.qdrant_port}", @@ -92,13 +93,6 @@ class Database: # ── PostgreSQL helpers ── - def get_pg(self): - conn = self.pg_pool.getconn() - try: - yield conn - finally: - self.pg_pool.putconn(conn) - def pg_query(self, query: str, params: tuple = None) -> list[dict]: conn = self.pg_pool.getconn() try: @@ -112,9 +106,16 @@ class Database: rows = self.pg_query(query, params) return rows[0] if rows else None + def pg_count(self, query: str, params: tuple = None) -> int: + """Execute a SELECT count(*) query and return the integer.""" + row = self.pg_query_one(query, params) + return row.get("count", 0) if row else 0 + # ── Neo4j helpers ── def neo4j_query(self, query: str, params: dict = None) -> list[dict]: + if not self.neo4j_driver: + return [] with self.neo4j_driver.session() as session: result = session.run(query, params or {}) return [dict(record) for record in result] @@ -123,6 +124,25 @@ class Database: rows = self.neo4j_query(query, params) return rows[0] if rows else None + def neo4j_count(self, query: str, params: dict = None) -> int: + """Execute a RETURN count(...) AS count query and return the integer.""" + row = self.neo4j_query_one(query, params) + return row.get("count", 0) if row else 0 + + # ── Service availability checks ── + + def pg_available(self) -> bool: + return self.pg_pool is not None + + def neo4j_available(self) -> bool: + return self.neo4j_driver is not None + + def qdrant_available(self) -> bool: + return self.qdrant is not None + + def es_available(self) -> bool: + return self.es is not None + # Global instance db = Database()