Refactor narrator endpoints for improved search and profile retrieval

- Enhanced search functionality with pagination and normalization for Arabic input.
- Updated profile retrieval to include comprehensive data about narrators, including hadith counts, teachers, students, and family information.
- Introduced new endpoints for narrator interactions and network visualization.
- Improved error handling and response structures across endpoints.
- Added utility functions for database queries and service availability checks.
- Refactored search module to support combined semantic and full-text search with weighted scoring.
This commit is contained in:
salah 2026-03-02 21:51:04 +01:00
parent abb091685e
commit 9aa76cddaf
6 changed files with 831 additions and 663 deletions

View File

@ -1,14 +1,23 @@
"""
Pydantic response models for the Hadith Scholar API.
v2.0 changes:
- All fields that Neo4j/PG can return as null are now Optional with defaults.
- Added PaginationMeta / PaginatedResponse for paginated list endpoints.
- All existing model_config / json_schema_extra examples preserved.
"""
from pydantic import BaseModel, Field
from typing import Optional
from datetime import datetime
# ── Common ─────────────────────────────────────────────────────────────────
# ── Pagination (NEW in v2.0) ───────────────────────────────────────────────
class PaginationMeta(BaseModel):
total: int
page: int
per_page: int
pages: int
total: int = Field(description="Total matching items")
page: int = Field(description="Current page (1-indexed)")
per_page: int = Field(description="Items per page")
pages: int = Field(description="Total pages")
model_config = {
"json_schema_extra": {
@ -26,12 +35,12 @@ class PaginatedResponse(BaseModel):
class HadithSummary(BaseModel):
id: str = Field(description="Unique hadith UUID")
collection: str = Field(description="Collection name in English")
hadith_number: int = Field(description="Hadith number within collection")
collection: Optional[str] = Field(None, description="Collection name in English")
hadith_number: Optional[int] = Field(None, description="Hadith number within collection")
grade: Optional[str] = Field(None, description="Grading: Sahih, Hasan, Da'if, etc.")
arabic_text: Optional[str] = Field(None, description="Full Arabic text (may be truncated in list views)")
matn_text: Optional[str] = Field(None, description="Body text only (without isnad)")
sanad_text: Optional[str] = Field(None, description="Chain of narration text only")
arabic_text: Optional[str] = Field(None, description="Arabic text (truncated in lists)")
sanad_text: Optional[str] = Field(None, description="Sanad (chain) text only")
matn_text: Optional[str] = Field(None, description="Matn (body) text only")
model_config = {
"json_schema_extra": {
@ -40,36 +49,28 @@ class HadithSummary(BaseModel):
"collection": "Sahih Bukhari",
"hadith_number": 1,
"grade": "Sahih",
"arabic_text": "حَدَّثَنَا الْحُمَيْدِيُّ عَبْدُ اللَّهِ بْنُ الزُّبَيْرِ قَالَ حَدَّثَنَا سُفْيَانُ...",
"matn_text": "إِنَّمَا الأَعْمَالُ بِالنِّيَّاتِ وَإِنَّمَا لِكُلِّ امْرِئٍ مَا نَوَى...",
"sanad_text": "حَدَّثَنَا الْحُمَيْدِيُّ عَبْدُ اللَّهِ بْنُ الزُّبَيْرِ قَالَ حَدَّثَنَا سُفْيَانُ قَالَ حَدَّثَنَا يَحْيَى بْنُ سَعِيدٍ الأَنْصَارِيُّ"
"arabic_text": "حَدَّثَنَا الْحُمَيْدِيُّ عَبْدُ اللَّهِ بْنُ الزُّبَيْرِ...",
"sanad_text": "حَدَّثَنَا الْحُمَيْدِيُّ...",
"matn_text": "إِنَّمَا الأَعْمَالُ بِالنِّيَّاتِ...",
}]
}
}
class TopicTag(BaseModel):
topic_arabic: str = Field(description="Topic name in Arabic, e.g. الصلاة")
topic_english: str = Field(description="Topic name in English, e.g. Prayer")
category: str = Field(description="Broad Islamic category: عقيدة، فقه، سيرة، أخلاق، تفسير")
model_config = {
"json_schema_extra": {
"examples": [{
"topic_arabic": "النية",
"topic_english": "Intention",
"category": "فقه"
}]
}
}
topic_arabic: str = Field("", description="Topic name in Arabic")
topic_english: str = Field("", description="Topic name in English")
category: str = Field("", description="Topic category (فقه, عقيدة, سيرة, etc.)")
class NarratorInChain(BaseModel):
order: int = Field(description="Position in chain: 1=closest to compiler, last=closest to Prophet ﷺ")
name_arabic: str = Field(description="Narrator's Arabic name as it appears in the hadith text")
name_transliterated: Optional[str] = Field(None, description="Latin transliteration of the name")
entity_type: Optional[str] = Field(None, description="PERSON, KUNYA (أبو/أم), NISBA (attributional), or TITLE (رسول الله)")
transmission_verb: Optional[str] = Field(None, description="Exact Arabic transmission verb: حدثنا، أخبرنا، عن، سمعت")
order: Optional[int] = Field(None, description="Position in chain (1 = compiler-end)")
name_arabic: str = Field(description="Narrator Arabic name")
name_transliterated: str = Field("", description="Latin transliteration")
entity_type: str = Field("", description="PERSON, KUNYA, NISBA, TITLE")
transmission_verb: Optional[str] = Field(None, description="حدثنا, أخبرنا, عن, سمعت, etc.")
generation: Optional[str] = Field(None, description="صحابي, تابعي, etc.")
reliability_grade: Optional[str] = Field(None, description="ثقة, صدوق, ضعيف, etc.")
model_config = {
"json_schema_extra": {
@ -78,7 +79,9 @@ class NarratorInChain(BaseModel):
"name_arabic": "الْحُمَيْدِيُّ",
"name_transliterated": "al-Humaydi",
"entity_type": "NISBA",
"transmission_verb": "حَدَّثَنَا"
"transmission_verb": "حَدَّثَنَا",
"generation": "تابع التابعين",
"reliability_grade": "ثقة",
}]
}
}
@ -86,11 +89,14 @@ class NarratorInChain(BaseModel):
class HadithDetail(BaseModel):
id: str = Field(description="Unique hadith UUID")
collection: str = Field(description="Collection English name")
hadith_number: int = Field(description="Number within collection")
grade: Optional[str] = Field(None, description="Hadith grade")
arabic_text: Optional[str] = Field(None, description="Complete Arabic text")
sanad_text: Optional[str] = Field(None, description="Isnad (chain) text only")
collection: Optional[str] = Field(None, description="Collection name")
hadith_number: Optional[int] = Field(None, description="Hadith number")
book_number: Optional[int] = Field(None, description="Book number within collection")
grade: Optional[str] = Field(None, description="Grading")
arabic_text: Optional[str] = Field(None, description="Full Arabic text")
english_text: Optional[str] = Field(None, description="English translation")
urdu_text: Optional[str] = Field(None, description="Urdu translation")
sanad_text: Optional[str] = Field(None, description="Sanad (chain) text only")
matn_text: Optional[str] = Field(None, description="Matn (body) text only")
narrator_chain: list[NarratorInChain] = Field(default_factory=list, description="Ordered narrator chain from Neo4j graph")
topics: list[TopicTag] = Field(default_factory=list, description="Topic tags for searchability")
@ -121,10 +127,10 @@ class HadithDetail(BaseModel):
class NarratorSummary(BaseModel):
name_arabic: str = Field(description="Primary Arabic name")
name_transliterated: Optional[str] = Field(None, description="Latin transliteration")
entity_type: Optional[str] = Field(None, description="PERSON, KUNYA, NISBA, TITLE")
name_transliterated: str = Field("", description="Latin transliteration")
entity_type: str = Field("", description="PERSON, KUNYA, NISBA, TITLE")
generation: Optional[str] = Field(None, description="طبقة: صحابي، تابعي، تابع التابعين")
reliability_grade: Optional[str] = Field(None, description="جرح وتعديل: ثقة، صدوق، ضعيف، متروك")
reliability_grade: Optional[str] = Field(None, description="جرح وتعديل grade: ثقة، صدوق، ضعيف")
hadith_count: int = Field(0, description="Number of hadiths this narrator appears in")
model_config = {
@ -142,6 +148,7 @@ class NarratorSummary(BaseModel):
class NameForm(BaseModel):
"""Alternative name forms for a narrator (kunya, nisba, laqab, etc.)."""
name: str = Field(description="Alternative name form")
type: str = Field(description="Name type: PERSON, KUNYA, NISBA, TITLE")
@ -165,9 +172,10 @@ class PlaceRelation(BaseModel):
class NarratorProfile(BaseModel):
"""Complete narrator profile — the mobile app profile page."""
name_arabic: str = Field(description="Primary Arabic name")
name_transliterated: Optional[str] = Field(None, description="Latin transliteration")
entity_type: Optional[str] = Field(None, description="PERSON, KUNYA, NISBA, TITLE")
name_transliterated: str = Field("", description="Latin transliteration")
entity_type: str = Field("", description="PERSON, KUNYA, NISBA, TITLE")
full_nasab: Optional[str] = Field(None, description="Full lineage: فلان بن فلان بن فلان")
kunya: Optional[str] = Field(None, description="أبو/أم name (e.g. أبو هريرة)")
nisba: Optional[str] = Field(None, description="Attributional name (e.g. البخاري، المدني، الزهري)")
@ -186,6 +194,8 @@ class NarratorProfile(BaseModel):
hadiths: list[HadithSummary] = Field(default_factory=list, description="Sample hadiths narrated (max 50)")
teachers: list[NarratorSummary] = Field(default_factory=list, description="Known teachers / شيوخ")
students: list[NarratorSummary] = Field(default_factory=list, description="Known students / تلاميذ")
name_forms: list[NameForm] = Field(default_factory=list, description="Alternative name forms")
family: Optional[FamilyInfo] = Field(None, description="Family info if known")
places: list[PlaceRelation] = Field(default_factory=list, description="Associated places (born, lived, died, traveled)")
tribes: list[str] = Field(default_factory=list, description="Tribal affiliations (e.g. قريش، دوس، الأنصار)")
bio_verified: bool = Field(False, description="Whether biography has been manually verified against classical sources")
@ -208,12 +218,11 @@ class NarratorProfile(BaseModel):
"birth_year_ce": None,
"death_year_ce": 676,
"biography_summary_arabic": "أبو هريرة الدوسي، صحابي جليل، أكثر الصحابة رواية للحديث النبوي. أسلم عام خيبر ولازم النبي ﷺ.",
"biography_summary_english": "Abu Hurayrah al-Dawsi, a prominent Companion and the most prolific narrator of hadith. He accepted Islam during Khaybar and remained close to the Prophet ﷺ.",
"biography_summary_english": "Abu Hurayrah al-Dawsi, a prominent Companion and the most prolific narrator of hadith.",
"total_hadiths_narrated_approx": 5374,
"hadith_count": 142,
"hadiths": [],
"teachers": [{"name_arabic": "رسول الله ﷺ", "name_transliterated": "Prophet Muhammad", "entity_type": "TITLE", "generation": None, "reliability_grade": None, "hadith_count": 0}],
"students": [{"name_arabic": "الزهري", "name_transliterated": "al-Zuhri", "entity_type": "NISBA", "generation": "تابعي", "reliability_grade": "ثقة", "hadith_count": 89}],
"teachers": [{"name_arabic": "النبي ﷺ", "name_transliterated": "Prophet Muhammad", "entity_type": "TITLE", "generation": "نبي", "reliability_grade": None, "hadith_count": 0}],
"students": [{"name_arabic": "الزهري", "name_transliterated": "al-Zuhri", "entity_type": "NISBA", "generation": "تابعي", "reliability_grade": "ثقة", "hadith_count": 0}],
"places": [{"place": "المدينة", "relation": "LIVED_IN"}],
"tribes": ["دوس"],
"bio_verified": False,
@ -222,27 +231,27 @@ class NarratorProfile(BaseModel):
}
# ── Isnad Chain ────────────────────────────────────────────────────────────
# ── Isnad Chain (D3-ready) ─────────────────────────────────────────────────
class IsnadNode(BaseModel):
name_arabic: str = Field(description="Narrator Arabic name")
name_transliterated: Optional[str] = Field(None, description="Latin transliteration")
entity_type: Optional[str] = Field(None, description="PERSON, KUNYA, NISBA, TITLE")
generation: Optional[str] = Field(None, description="طبقة")
reliability_grade: Optional[str] = Field(None, description="جرح وتعديل grade")
name_transliterated: str = Field("", description="Latin transliteration")
entity_type: str = Field("", description="PERSON, KUNYA, NISBA, TITLE")
generation: Optional[str] = Field(None, description="صحابي, تابعي, etc.")
reliability_grade: Optional[str] = Field(None, description="ثقة, صدوق, ضعيف, etc.")
class IsnadLink(BaseModel):
source: str = Field(description="name_arabic of narrator who received the hadith")
target: str = Field(description="name_arabic of narrator they received it from")
transmission_verb: Optional[str] = Field(None, description="Exact verb: حدثنا، أخبرنا، عن، سمعت، أنبأنا")
source: str = Field(description="name_arabic of narrator who heard")
target: str = Field(description="name_arabic of narrator who transmitted")
transmission_verb: Optional[str] = Field(None, description="حدثنا, عن, أخبرنا, etc.")
class IsnadChain(BaseModel):
hadith_id: str = Field(description="UUID of the hadith")
collection: str = Field(description="Collection name")
hadith_number: int = Field(description="Hadith number")
nodes: list[IsnadNode] = Field(default_factory=list, description="Narrator nodes for graph visualization")
hadith_id: str = Field(description="Hadith UUID")
collection: Optional[str] = Field(None, description="Collection name")
hadith_number: Optional[int] = Field(None, description="Hadith number")
nodes: list[IsnadNode] = Field(default_factory=list, description="Narrators in the chain")
links: list[IsnadLink] = Field(default_factory=list, description="Directed edges: source heard from target")
model_config = {
@ -271,12 +280,12 @@ class IsnadChain(BaseModel):
class NarratorInteraction(BaseModel):
narrator_a: str = Field(description="First narrator Arabic name")
narrator_a_transliterated: Optional[str] = Field(None, description="First narrator transliteration")
narrator_a_transliterated: str = Field("", description="First narrator transliteration")
narrator_b: str = Field(description="Second narrator Arabic name")
narrator_b_transliterated: Optional[str] = Field(None, description="Second narrator transliteration")
relationship_type: str = Field(description="NARRATED_FROM, TEACHER_OF, HEARD_BY, STUDENT_OF")
narrator_b_transliterated: str = Field("", description="Second narrator transliteration")
relationship_type: str = Field("", description="NARRATED_FROM, TEACHER_OF, HEARD_BY, STUDENT_OF")
shared_hadith_count: int = Field(0, description="Number of hadiths connecting them")
hadith_ids: list[str] = Field(default_factory=list, description="IDs of connecting hadiths (max 20)")
hadith_ids: list[str] = Field(default_factory=list, description="IDs of shared hadiths (max 20)")
model_config = {
"json_schema_extra": {
@ -295,7 +304,7 @@ class NarratorInteraction(BaseModel):
class NarratorConnection(BaseModel):
narrator: str = Field(description="Connected narrator Arabic name")
narrator_transliterated: Optional[str] = Field(None, description="Transliteration")
narrator_transliterated: str = Field("", description="Transliteration")
connection_type: str = Field(description="Relationship type")
direction: str = Field(description="'incoming' (they → this) or 'outgoing' (this → them)")
@ -306,12 +315,26 @@ class NarratorNetwork(BaseModel):
total_connections: int = 0
class PathNode(BaseModel):
name_arabic: str
name_transliterated: str = ""
generation: Optional[str] = None
class WhoMetWhoResult(BaseModel):
narrator_a: str
narrator_b: str
path: list[PathNode] = Field(default_factory=list)
path_length: Optional[int] = None
relationship_types: list[str] = Field(default_factory=list)
# ── Search ─────────────────────────────────────────────────────────────────
class SemanticSearchResult(BaseModel):
hadith: HadithSummary = Field(description="Matching hadith")
score: float = Field(description="Cosine similarity score (0-1, higher = more relevant)")
collection: Optional[str] = Field(None, description="Collection name")
collection: str = Field("", description="Collection name")
model_config = {
"json_schema_extra": {
@ -332,8 +355,8 @@ class SemanticSearchResult(BaseModel):
class FullTextSearchResult(BaseModel):
hadith: HadithSummary = Field(description="Matching hadith")
score: float = Field(description="Elasticsearch relevance score (higher = more relevant)")
highlights: list[str] = Field(default_factory=list, description="Text fragments with <em>highlighted</em> matches")
score: float = Field(description="Elasticsearch relevance score")
highlights: list[str] = Field(default_factory=list, description="Text fragments with <em> highlighted matches")
model_config = {
"json_schema_extra": {
@ -346,7 +369,27 @@ class FullTextSearchResult(BaseModel):
"arabic_text": "..."
},
"score": 12.45,
"highlights": ["...عن <em>الصلاة</em> في المسجد الحرام..."]
"highlights": ["...عن <em>الصلاة</em> في المسجد..."]
}]
}
}
class CombinedSearchResult(BaseModel):
hadith: HadithSummary
semantic_score: Optional[float] = None
fulltext_score: Optional[float] = None
combined_score: float = 0.0
source: str = Field(description="semantic, fulltext, or both")
# ── Stats ──────────────────────────────────────────────────────────────────
class SystemStats(BaseModel):
hadiths_pg: Optional[int] = None
narrators_neo4j: Optional[int] = None
places_neo4j: Optional[int] = None
tribes_neo4j: Optional[int] = None
relationships_neo4j: Optional[int] = None
embeddings_qdrant: Optional[int] = None
documents_es: Optional[int] = None

View File

@ -1,27 +1,33 @@
"""
Isnad chain endpoints chain visualization data for hadith detail views.
Isnad chain endpoints chain visualization data (D3-ready nodes + links).
"""
from fastapi import APIRouter, Query, HTTPException
from fastapi import APIRouter, Query, Path, HTTPException
from app.services.database import db
from app.models.schemas import IsnadChain, IsnadNode, IsnadLink
from app.utils.arabic import normalize_name
from app.models.schemas import (
IsnadChain, IsnadNode, IsnadLink,
PaginatedResponse, PaginationMeta,
)
router = APIRouter(prefix="/chains", tags=["Isnad Chains"])
@router.get("/hadith/{hadith_id}", response_model=IsnadChain,
summary="Get isnad chain for a hadith",
description="Returns the complete isnad (chain of narration) as a graph structure "
"with nodes (narrators) and links (transmission relationships). "
"Ready for visualization with D3.js, vis.js, Cytoscape.js, or any graph library. "
"Each node includes narrator metadata (generation, reliability); "
"each link includes the transmission verb (حدثنا، عن، أخبرنا).")
async def get_isnad_chain(hadith_id: str):
def _paginate(total: int, page: int, per_page: int) -> PaginationMeta:
pages = max(1, (total + per_page - 1) // per_page)
return PaginationMeta(total=total, page=page, per_page=per_page, pages=pages)
# ── Chain for a single hadith ──────────────────────────────────────────────
@router.get("/hadith/{hadith_id}", response_model=IsnadChain)
async def get_isnad_chain(
hadith_id: str = Path(..., description="Hadith UUID"),
):
"""
Get the full isnad chain for a hadith as a graph (nodes + links)
ready for visualization (D3.js, vis.js, etc.).
Get the isnad chain for a hadith as a directed graph (nodes + links).
Returns D3-compatible format for frontend visualization.
"""
# Get hadith info
hadith = db.neo4j_query_one("""
MATCH (h:Hadith {id: $hid})
RETURN h.id AS id, h.collection AS collection, h.hadith_number AS hadith_number
@ -30,108 +36,113 @@ async def get_isnad_chain(hadith_id: str):
if not hadith:
raise HTTPException(status_code=404, detail="Hadith not found in graph")
# Get chain nodes
nodes = db.neo4j_query("""
MATCH (n:Narrator)-[r:APPEARS_IN]->(h:Hadith {id: $hid})
# Narrator nodes in the chain
nodes_rows = db.neo4j_query("""
MATCH (n:Narrator)-[a:APPEARS_IN]->(h:Hadith {id: $hid})
RETURN n.name_arabic AS name_arabic,
n.name_transliterated AS name_transliterated,
n.entity_type AS entity_type,
n.generation AS generation,
n.reliability_grade AS reliability_grade,
r.chain_order AS chain_order
ORDER BY r.chain_order
n.reliability_grade AS reliability_grade
ORDER BY a.chain_order
""", {"hid": hadith_id})
# Get chain links (NARRATED_FROM within this hadith's narrators)
links = db.neo4j_query("""
MATCH (a:Narrator)-[r1:APPEARS_IN]->(h:Hadith {id: $hid})
MATCH (b:Narrator)-[r2:APPEARS_IN]->(h)
MATCH (a)-[nf:NARRATED_FROM]->(b)
# Transmission links — NARRATED_FROM edges store hadith_ids as array
links_rows = db.neo4j_query("""
MATCH (a:Narrator)-[nf:NARRATED_FROM]->(b:Narrator)
WHERE $hid IN nf.hadith_ids
RETURN a.name_arabic AS source,
b.name_arabic AS target,
nf.transmission_verb AS transmission_verb
ORDER BY a.name_arabic
""", {"hid": hadith_id})
# If no NARRATED_FROM edges with hadith_id, fall back to chain order
if not links and len(nodes) > 1:
sorted_nodes = sorted(nodes, key=lambda n: n.get("chain_order") or 999)
links = []
for i in range(len(sorted_nodes) - 1):
links.append({
"source": sorted_nodes[i]["name_arabic"],
"target": sorted_nodes[i + 1]["name_arabic"],
"transmission_verb": None,
})
return IsnadChain(
hadith_id=str(hadith["id"]),
collection=hadith["collection"] or "",
hadith_number=hadith["hadith_number"] or 0,
nodes=[IsnadNode(**n) for n in nodes],
links=[IsnadLink(**l) for l in links],
hadith_id=hadith_id,
collection=hadith.get("collection"),
hadith_number=hadith.get("hadith_number"),
nodes=[IsnadNode(**r) for r in nodes_rows],
links=[IsnadLink(**r) for r in links_rows],
)
@router.get("/narrator/{name_arabic}", response_model=list[IsnadChain],
summary="Get all chains for a narrator",
description="Returns all isnad chains that include a specific narrator. "
"Useful for visualizing how a narrator connects to the Prophet ﷺ "
"through different transmission paths. "
"Example: `/chains/narrator/الزهري`")
async def get_narrator_chains(
name_arabic: str,
limit: int = Query(10, ge=1, le=50, description="Maximum chains to return"),
# ── All chains containing a narrator (paginated) ──────────────────────────
@router.get("/narrator/{name_arabic}", response_model=PaginatedResponse)
async def chains_by_narrator(
name_arabic: str = Path(..., description="Narrator Arabic name"),
page: int = Query(1, ge=1),
per_page: int = Query(10, ge=1, le=50),
):
"""
Get all isnad chains that include a specific narrator.
Useful for seeing how a narrator connects to the Prophet .
All isnad chains containing a narrator.
Useful for seeing how a narrator connects to the Prophet across collections.
"""
q_norm = normalize_name(name_arabic)
skip = (page - 1) * per_page
total = db.neo4j_count("""
MATCH (n:Narrator)-[:APPEARS_IN]->(h:Hadith)
WHERE toLower(n.name_arabic) CONTAINS toLower($name)
RETURN count(DISTINCT h) AS count
""", {"name": q_norm})
hadith_ids = db.neo4j_query("""
MATCH (n:Narrator {name_arabic: $name})-[:APPEARS_IN]->(h:Hadith)
RETURN h.id AS id
LIMIT $limit
""", {"name": name_arabic, "limit": limit})
MATCH (n:Narrator)-[:APPEARS_IN]->(h:Hadith)
WHERE toLower(n.name_arabic) CONTAINS toLower($name)
RETURN DISTINCT h.id AS id
ORDER BY h.id
SKIP $skip LIMIT $limit
""", {"name": q_norm, "skip": skip, "limit": per_page})
chains = []
for row in hadith_ids:
chain = await get_isnad_chain(str(row["id"]))
chains.append(chain)
return chains
return PaginatedResponse(
data=chains,
meta=_paginate(total, page, per_page),
)
@router.get("/common-chains", response_model=list[dict],
summary="Find shared chains between two narrators",
description="Find hadiths where both narrators appear in the same isnad chain. "
"Useful for verifying narrator relationships and finding corroborating chains. "
"Example: `/chains/common-chains?narrator_a=الزهري&narrator_b=أنس بن مالك`")
# ── Common chains between two narrators (paginated) ───────────────────────
@router.get("/common", response_model=PaginatedResponse)
async def find_common_chains(
narrator_a: str = Query(
..., description="First narrator (Arabic). Example: الزهري",
examples=["الزهري"],
),
narrator_b: str = Query(
..., description="Second narrator (Arabic). Example: أنس بن مالك",
examples=["أنس بن مالك"],
),
limit: int = Query(10, ge=1, le=50, description="Maximum results"),
narrator_a: str = Query(..., description="First narrator (Arabic)"),
narrator_b: str = Query(..., description="Second narrator (Arabic)"),
page: int = Query(1, ge=1),
per_page: int = Query(10, ge=1, le=50),
):
"""
Find hadiths where both narrators appear in the same chain.
Useful for verifying narrator relationships.
"""
"""Find hadiths where both narrators appear in the same chain."""
a_norm = normalize_name(narrator_a)
b_norm = normalize_name(narrator_b)
skip = (page - 1) * per_page
total = db.neo4j_count("""
MATCH (a:Narrator)-[:APPEARS_IN]->(h:Hadith)<-[:APPEARS_IN]-(b:Narrator)
WHERE toLower(a.name_arabic) CONTAINS toLower($a)
AND toLower(b.name_arabic) CONTAINS toLower($b)
AND a <> b
RETURN count(DISTINCT h) AS count
""", {"a": a_norm, "b": b_norm})
rows = db.neo4j_query("""
MATCH (a:Narrator)-[:APPEARS_IN]->(h:Hadith)<-[:APPEARS_IN]-(b:Narrator)
WHERE a.name_arabic CONTAINS $name_a
AND b.name_arabic CONTAINS $name_b
WHERE toLower(a.name_arabic) CONTAINS toLower($a)
AND toLower(b.name_arabic) CONTAINS toLower($b)
AND a <> b
RETURN h.id AS hadith_id,
RETURN DISTINCT h.id AS hadith_id,
h.collection AS collection,
h.hadith_number AS hadith_number,
a.name_arabic AS narrator_a,
b.name_arabic AS narrator_b
LIMIT $limit
""", {"name_a": narrator_a, "name_b": narrator_b, "limit": limit})
ORDER BY h.collection, h.hadith_number
SKIP $skip LIMIT $limit
""", {"a": a_norm, "b": b_norm, "skip": skip, "limit": per_page})
return [dict(r) for r in rows]
return PaginatedResponse(
data=[dict(r) for r in rows],
meta=_paginate(total, page, per_page),
)

View File

@ -1,10 +1,13 @@
"""
Hadith endpoints details, listing, search by keyword/narrator/topic/place.
All query parameters are Arabic-normalized for consistent matching.
All list endpoints support pagination via page + per_page.
"""
from fastapi import APIRouter, Query, HTTPException
from fastapi import APIRouter, Query, Path, HTTPException
from typing import Optional
from app.services.database import db
from app.utils.arabic import normalize_query
from app.models.schemas import (
HadithDetail, HadithSummary, NarratorInChain, TopicTag,
PaginatedResponse, PaginationMeta,
@ -13,36 +16,44 @@ from app.models.schemas import (
router = APIRouter(prefix="/hadiths", tags=["Hadiths"])
@router.get("/{hadith_id}", response_model=HadithDetail,
summary="Get hadith by ID",
description="Retrieve full hadith details including Arabic text, sanad/matn separation, "
"ordered narrator chain from the knowledge graph, and topic tags.")
async def get_hadith(hadith_id: str):
def _paginate(total: int, page: int, per_page: int) -> PaginationMeta:
pages = max(1, (total + per_page - 1) // per_page)
return PaginationMeta(total=total, page=page, per_page=per_page, pages=pages)
# ── Single hadith by ID ────────────────────────────────────────────────────
@router.get("/{hadith_id}", response_model=HadithDetail)
async def get_hadith(hadith_id: str = Path(..., description="Hadith UUID")):
"""Get full hadith details by ID, including narrator chain and topics from Neo4j."""
# Base hadith from PostgreSQL
hadith = db.pg_query_one("""
SELECT h.id, c.name_english AS collection, h.hadith_number,
h.grade, h.arabic_text, h.sanad, h.matn
h.book_number, h.grade, h.arabic_text, h.english_text,
h.urdu_text, h.sanad, h.matn
FROM hadiths h
JOIN collections c ON c.id = h.collection_id
WHERE h.id = %s
WHERE h.id::text = %s
""", (hadith_id,))
if not hadith:
raise HTTPException(status_code=404, detail="Hadith not found")
# Enrich with chain + topics from Neo4j
# Narrator chain from Neo4j
chain = db.neo4j_query("""
MATCH (n:Narrator)-[r:APPEARS_IN]->(h:Hadith {id: $hid})
RETURN n.name_arabic AS name_arabic,
n.name_transliterated AS name_transliterated,
n.entity_type AS entity_type,
n.generation AS generation,
n.reliability_grade AS reliability_grade,
r.chain_order AS order,
r.transmission_verb AS transmission_verb
ORDER BY r.chain_order
""", {"hid": hadith_id})
# Topics from Neo4j
topics = db.neo4j_query("""
MATCH (h:Hadith {id: $hid})-[:HAS_TOPIC]->(t:Topic)
RETURN t.topic_arabic AS topic_arabic,
@ -52,10 +63,13 @@ async def get_hadith(hadith_id: str):
return HadithDetail(
id=str(hadith["id"]),
collection=hadith["collection"],
hadith_number=hadith["hadith_number"],
grade=hadith["grade"],
arabic_text=hadith["arabic_text"],
collection=hadith.get("collection"),
hadith_number=hadith.get("hadith_number"),
book_number=hadith.get("book_number"),
grade=hadith.get("grade"),
arabic_text=hadith.get("arabic_text"),
english_text=hadith.get("english_text"),
urdu_text=hadith.get("urdu_text"),
sanad_text=hadith.get("sanad"),
matn_text=hadith.get("matn"),
narrator_chain=[NarratorInChain(**c) for c in chain],
@ -63,183 +77,155 @@ async def get_hadith(hadith_id: str):
)
@router.get("/collection/{collection_name}", response_model=PaginatedResponse,
summary="List hadiths by collection",
description="Paginated listing of hadiths in a specific collection. "
"Collection names use partial matching (e.g. 'bukhari' matches 'Sahih Bukhari').")
# ── By collection + number ─────────────────────────────────────────────────
@router.get("/by-ref/{collection}/{number}", response_model=HadithDetail)
async def get_hadith_by_reference(
collection: str = Path(..., description="Collection name, e.g. 'Sahih Bukhari'"),
number: int = Path(..., description="Hadith number within the collection"),
):
"""Lookup hadith by collection name + hadith number."""
row = db.pg_query_one("""
SELECT h.id FROM hadiths h
JOIN collections c ON c.id = h.collection_id
WHERE c.name_english = %s AND h.hadith_number = %s
""", (collection, number))
if not row:
raise HTTPException(status_code=404, detail=f"Hadith {collection} #{number} not found")
return await get_hadith(str(row["id"]))
# ── List by collection (paginated) ─────────────────────────────────────────
@router.get("/collection/{collection_name}", response_model=PaginatedResponse)
async def list_by_collection(
collection_name: str,
collection_name: str = Path(..., description="Collection name"),
page: int = Query(1, ge=1, description="Page number"),
per_page: int = Query(20, ge=1, le=100, description="Results per page"),
per_page: int = Query(20, ge=1, le=100, description="Items per page"),
):
"""List hadiths in a collection with pagination."""
offset = (page - 1) * per_page
total_row = db.pg_query_one("""
SELECT COUNT(*) AS total
FROM hadiths h
total = db.pg_count("""
SELECT count(*) FROM hadiths h
JOIN collections c ON c.id = h.collection_id
WHERE c.name_english ILIKE %s
""", (f"%{collection_name}%",))
total = total_row["total"] if total_row else 0
WHERE c.name_english = %s
""", (collection_name,))
offset = (page - 1) * per_page
rows = db.pg_query("""
SELECT h.id, c.name_english AS collection, h.hadith_number,
h.grade, LEFT(h.arabic_text, 300) AS arabic_text
FROM hadiths h
JOIN collections c ON c.id = h.collection_id
WHERE c.name_english ILIKE %s
WHERE c.name_english = %s
ORDER BY h.hadith_number
LIMIT %s OFFSET %s
""", (f"%{collection_name}%", per_page, offset))
""", (collection_name, per_page, offset))
return PaginatedResponse(
meta=PaginationMeta(
total=total, page=page, per_page=per_page,
pages=(total + per_page - 1) // per_page,
),
data=[HadithSummary(
id=str(r["id"]), collection=r["collection"],
hadith_number=r["hadith_number"], grade=r["grade"],
arabic_text=r["arabic_text"],
) for r in rows],
)
data = [HadithSummary(id=str(r["id"]), **{k: r[k] for k in r if k != "id"}) for r in rows]
return PaginatedResponse(data=data, meta=_paginate(total, page, per_page))
@router.get("/number/{collection_name}/{number}", response_model=HadithDetail)
async def get_by_number(collection_name: str, number: int):
"""Get a hadith by collection name and number."""
hadith = db.pg_query_one("""
SELECT h.id
FROM hadiths h
JOIN collections c ON c.id = h.collection_id
WHERE c.name_english ILIKE %s AND h.hadith_number = %s
""", (f"%{collection_name}%", number))
# ── Keyword search (paginated, normalized) ─────────────────────────────────
if not hadith:
raise HTTPException(status_code=404, detail=f"Hadith #{number} not found in {collection_name}")
return await get_hadith(str(hadith["id"]))
@router.get("/search/keyword", response_model=PaginatedResponse,
summary="Search hadiths by Arabic keyword",
description="Full-text keyword search across all hadith Arabic text. "
"Supports both vocalized (مَكَّةَ) and unvocalized (مكة) Arabic.")
async def search_by_keyword(
q: str = Query(
..., min_length=2,
description="Arabic keyword to search. Examples: صلاة (prayer), زكاة (zakat), صيام (fasting), حج (hajj), نية (intention)",
examples=["صلاة", "الجنة", "رمضان"],
),
collection: Optional[str] = Query(
None,
description="Filter by collection name. Examples: Sahih Bukhari, Sahih Muslim, Sunan Abu Dawood",
examples=["Sahih Bukhari"],
),
grade: Optional[str] = Query(
None,
description="Filter by hadith grade. Examples: Sahih, Hasan, Da'if",
examples=["Sahih"],
),
page: int = Query(1, ge=1, description="Page number (1-indexed)"),
per_page: int = Query(20, ge=1, le=100, description="Results per page (max 100)"),
@router.get("/search/keyword", response_model=PaginatedResponse)
async def search_keyword(
q: str = Query(..., min_length=2, description="Arabic keyword(s) — diacritics stripped automatically"),
collection: Optional[str] = Query(None, description="Filter by collection"),
page: int = Query(1, ge=1),
per_page: int = Query(20, ge=1, le=100),
):
"""Search hadiths by Arabic keyword in text."""
offset = (page - 1) * per_page
conditions = ["h.arabic_text ILIKE %s"]
params = [f"%{q}%"]
"""Keyword search in Arabic hadith text. Query is normalized for consistent matching."""
q_norm = normalize_query(q)
where = "WHERE h.arabic_text ILIKE %s"
params: list = [f"%{q_norm}%"]
if collection:
conditions.append("c.name_english ILIKE %s")
params.append(f"%{collection}%")
if grade:
conditions.append("h.grade ILIKE %s")
params.append(f"%{grade}%")
where += " AND c.name_english = %s"
params.append(collection)
where = " AND ".join(conditions)
total_row = db.pg_query_one(f"""
SELECT COUNT(*) AS total
FROM hadiths h
JOIN collections c ON c.id = h.collection_id
WHERE {where}
""", tuple(params))
total = total_row["total"] if total_row else 0
params.extend([per_page, offset])
rows = db.pg_query(f"""
SELECT h.id, c.name_english AS collection, h.hadith_number,
h.grade, LEFT(h.arabic_text, 300) AS arabic_text
FROM hadiths h
JOIN collections c ON c.id = h.collection_id
WHERE {where}
ORDER BY c.name_english, h.hadith_number
LIMIT %s OFFSET %s
""", tuple(params))
return PaginatedResponse(
meta=PaginationMeta(
total=total, page=page, per_page=per_page,
pages=(total + per_page - 1) // per_page,
),
data=[HadithSummary(
id=str(r["id"]), collection=r["collection"],
hadith_number=r["hadith_number"], grade=r["grade"],
arabic_text=r["arabic_text"],
) for r in rows],
total = db.pg_count(
f"SELECT count(*) FROM hadiths h JOIN collections c ON c.id = h.collection_id {where}",
tuple(params),
)
offset = (page - 1) * per_page
rows = db.pg_query(
f"SELECT h.id, c.name_english AS collection, h.hadith_number, "
f"h.grade, LEFT(h.arabic_text, 300) AS arabic_text "
f"FROM hadiths h JOIN collections c ON c.id = h.collection_id "
f"{where} ORDER BY c.name_english, h.hadith_number "
f"LIMIT %s OFFSET %s",
tuple(params + [per_page, offset]),
)
@router.get("/search/topic/{topic}", response_model=list[HadithSummary])
async def search_by_topic(topic: str, limit: int = Query(20, ge=1, le=100)):
"""Search hadiths by topic tag (from Neo4j)."""
rows = db.neo4j_query("""
CALL db.index.fulltext.queryNodes('hadith_arabic_text', $topic)
YIELD node, score
RETURN node.id AS id,
node.collection AS collection,
node.hadith_number AS hadith_number,
node.grade AS grade,
left(node.matn_text, 300) AS matn_text,
score
ORDER BY score DESC
LIMIT $limit
""", {"topic": topic, "limit": limit})
return [HadithSummary(
id=str(r["id"]), collection=r["collection"] or "",
hadith_number=r["hadith_number"] or 0, grade=r["grade"],
matn_text=r["matn_text"],
) for r in rows]
data = [HadithSummary(id=str(r["id"]), **{k: r[k] for k in r if k != "id"}) for r in rows]
return PaginatedResponse(data=data, meta=_paginate(total, page, per_page))
@router.get("/search/narrator/{narrator_name}", response_model=list[HadithSummary],
summary="Find hadiths by narrator",
description="Find all hadiths where a specific narrator appears in the chain. "
"Searches both Arabic name and transliteration. "
"Example: `/hadiths/search/narrator/أبو هريرة`")
async def search_by_narrator(
narrator_name: str,
limit: int = Query(50, ge=1, le=200, description="Maximum results"),
# ── Search by topic (paginated, normalized) ────────────────────────────────
@router.get("/search/topic", response_model=PaginatedResponse)
async def search_by_topic(
q: str = Query(..., min_length=2, description="Topic keyword (Arabic or English)"),
page: int = Query(1, ge=1),
per_page: int = Query(20, ge=1, le=100),
):
"""Find all hadiths narrated by a specific person."""
"""Find hadiths by topic tag from the knowledge graph."""
q_norm = normalize_query(q)
skip = (page - 1) * per_page
total = db.neo4j_count("""
MATCH (t:Topic)<-[:HAS_TOPIC]-(h:Hadith)
WHERE toLower(t.topic_arabic) CONTAINS toLower($q)
OR toLower(t.topic_english) CONTAINS toLower($q)
RETURN count(DISTINCT h) AS count
""", {"q": q_norm})
rows = db.neo4j_query("""
MATCH (n:Narrator)-[r:APPEARS_IN]->(h:Hadith)
WHERE n.name_arabic CONTAINS $name
OR n.name_transliterated CONTAINS $name
RETURN h.id AS id,
MATCH (t:Topic)<-[:HAS_TOPIC]-(h:Hadith)
WHERE toLower(t.topic_arabic) CONTAINS toLower($q)
OR toLower(t.topic_english) CONTAINS toLower($q)
RETURN DISTINCT h.id AS id,
h.collection AS collection,
h.hadith_number AS hadith_number,
h.grade AS grade,
left(h.matn_text, 300) AS matn_text
substring(h.arabic_text, 0, 300) AS arabic_text
ORDER BY h.collection, h.hadith_number
LIMIT $limit
""", {"name": narrator_name, "limit": limit})
SKIP $skip LIMIT $limit
""", {"q": q_norm, "skip": skip, "limit": per_page})
return [HadithSummary(
id=str(r["id"]), collection=r["collection"] or "",
hadith_number=r["hadith_number"] or 0, grade=r["grade"],
matn_text=r["matn_text"],
) for r in rows]
data = [HadithSummary(**r) for r in rows]
return PaginatedResponse(data=data, meta=_paginate(total, page, per_page))
# ── Search by narrator (paginated, normalized) ─────────────────────────────
@router.get("/search/narrator", response_model=PaginatedResponse)
async def search_by_narrator(
q: str = Query(..., min_length=2, description="Narrator name (Arabic)"),
page: int = Query(1, ge=1),
per_page: int = Query(20, ge=1, le=100),
):
"""Find all hadiths where a narrator appears in the chain."""
q_norm = normalize_query(q)
skip = (page - 1) * per_page
total = db.neo4j_count("""
MATCH (n:Narrator)-[:APPEARS_IN]->(h:Hadith)
WHERE toLower(n.name_arabic) CONTAINS toLower($q)
RETURN count(DISTINCT h) AS count
""", {"q": q_norm})
rows = db.neo4j_query("""
MATCH (n:Narrator)-[:APPEARS_IN]->(h:Hadith)
WHERE toLower(n.name_arabic) CONTAINS toLower($q)
RETURN DISTINCT h.id AS id,
h.collection AS collection,
h.hadith_number AS hadith_number,
h.grade AS grade,
substring(h.arabic_text, 0, 300) AS arabic_text
ORDER BY h.collection, h.hadith_number
SKIP $skip LIMIT $limit
""", {"q": q_norm, "skip": skip, "limit": per_page})
data = [HadithSummary(**r) for r in rows]
return PaginatedResponse(data=data, meta=_paginate(total, page, per_page))

View File

@ -1,317 +1,436 @@
"""
Narrator endpoints profiles, teacher/student network, relationships, who met who.
Narrator endpoints search, profiles, network queries.
All queries normalize Arabic input to match post-dedup graph data.
"""
from fastapi import APIRouter, Query, HTTPException
from fastapi import APIRouter, Query, Path, HTTPException
from typing import Optional
from app.services.database import db
from app.utils.arabic import normalize_query, normalize_name
from app.models.schemas import (
NarratorProfile, NarratorSummary, HadithSummary,
NarratorInteraction, PlaceRelation,
PaginatedResponse, PaginationMeta,
NarratorSummary, NarratorProfile, NarratorInteraction,
NarratorConnection, NarratorNetwork,
WhoMetWhoResult, PathNode, PlaceRelation, NameForm, FamilyInfo,
HadithSummary, PaginatedResponse, PaginationMeta,
)
router = APIRouter(prefix="/narrators", tags=["Narrators"])
@router.get("/search", response_model=list[NarratorSummary],
summary="Search narrators by name",
description="Full-text search across narrator names in both Arabic and Latin transliteration. "
"Uses Neo4j full-text index for fast matching.")
def _paginate(total: int, page: int, per_page: int) -> PaginationMeta:
pages = max(1, (total + per_page - 1) // per_page)
return PaginationMeta(total=total, page=page, per_page=per_page, pages=pages)
# ── Search narrators by name (paginated, normalized) ───────────────────────
@router.get("/search", response_model=PaginatedResponse)
async def search_narrators(
q: str = Query(
..., min_length=2,
description="Narrator name in Arabic or transliteration. Examples: أبو هريرة, الزهري, Anas, Bukhari",
examples=["أبو هريرة", "الزهري", "Anas ibn Malik"],
),
limit: int = Query(20, ge=1, le=100, description="Maximum results to return"),
q: str = Query(..., min_length=2, description="Narrator name (Arabic). Diacritics stripped automatically."),
page: int = Query(1, ge=1),
per_page: int = Query(20, ge=1, le=100),
):
"""Search narrators by name (Arabic or transliterated)."""
"""
Search narrators by Arabic name. Input is normalized to match
the deduplicated graph (diacritics stripped, characters unified).
"""
q_norm = normalize_query(q)
skip = (page - 1) * per_page
total = db.neo4j_count("""
MATCH (n:Narrator)
WHERE toLower(n.name_arabic) CONTAINS toLower($q)
RETURN count(n) AS count
""", {"q": q_norm})
rows = db.neo4j_query("""
CALL db.index.fulltext.queryNodes('narrator_names', $query)
YIELD node, score
WITH node AS n, score
MATCH (n:Narrator)
WHERE toLower(n.name_arabic) CONTAINS toLower($q)
OPTIONAL MATCH (n)-[:APPEARS_IN]->(h:Hadith)
RETURN n.name_arabic AS name_arabic,
n.name_transliterated AS name_transliterated,
n.entity_type AS entity_type,
n.generation AS generation,
n.reliability_grade AS reliability_grade,
count(h) AS hadith_count,
score
ORDER BY score DESC
LIMIT $limit
""", {"query": q, "limit": limit})
count(DISTINCT h) AS hadith_count
ORDER BY hadith_count DESC
SKIP $skip LIMIT $limit
""", {"q": q_norm, "skip": skip, "limit": per_page})
return [NarratorSummary(**r) for r in rows]
data = [NarratorSummary(**r) for r in rows]
return PaginatedResponse(data=data, meta=_paginate(total, page, per_page))
@router.get("/profile/{name_arabic}", response_model=NarratorProfile,
summary="Get full narrator profile",
description="Complete narrator profile for the mobile app. Includes biography from classical "
"scholarship (Tahdhib al-Kamal, Taqrib al-Tahdhib), teacher/student network, "
"hadiths narrated, places, and tribal affiliations. "
"Example: `/narrators/profile/أبو هريرة`")
async def get_narrator_profile(name_arabic: str):
# ── Full narrator profile ──────────────────────────────────────────────────
@router.get("/profile/{name_arabic}", response_model=NarratorProfile)
async def get_narrator_profile(
name_arabic: str = Path(..., description="Narrator Arabic name (exact or close match)"),
):
"""
Full narrator profile biography, hadiths, teachers, students,
places, tribes. Powers the mobile app profile page.
Complete narrator profile biography, hadiths, teachers, students, places, tribes.
This is the mobile app profile page query.
"""
# Basic info
q_norm = normalize_name(name_arabic)
# Find the narrator node — exact first, then CONTAINS fallback
narrator = db.neo4j_query_one("""
MATCH (n:Narrator {name_arabic: $name})
RETURN n.name_arabic AS name_arabic,
n.name_transliterated AS name_transliterated,
n.entity_type AS entity_type,
n.full_nasab AS full_nasab,
n.kunya AS kunya,
n.nisba AS nisba,
n.laqab AS laqab,
n.generation AS generation,
n.reliability_grade AS reliability_grade,
n.reliability_detail AS reliability_detail,
n.birth_year_hijri AS birth_year_hijri,
n.death_year_hijri AS death_year_hijri,
n.birth_year_ce AS birth_year_ce,
n.death_year_ce AS death_year_ce,
n.biography_summary_arabic AS biography_summary_arabic,
n.biography_summary_english AS biography_summary_english,
n.total_hadiths_narrated_approx AS total_hadiths_narrated_approx,
n.bio_verified AS bio_verified
""", {"name": name_arabic})
MATCH (n:Narrator)
WHERE toLower(n.name_arabic) = toLower($q)
RETURN n
""", {"q": q_norm})
if not narrator:
raise HTTPException(status_code=404, detail="Narrator not found")
narrator = db.neo4j_query_one("""
MATCH (n:Narrator)
WHERE toLower(n.name_arabic) CONTAINS toLower($q)
RETURN n
""", {"q": q_norm})
# Hadiths
hadiths = db.neo4j_query("""
if not narrator:
raise HTTPException(status_code=404, detail=f"Narrator not found: {name_arabic}")
n = narrator.get("n", {})
actual_name = n.get("name_arabic", q_norm)
# Hadith count + collections
stats = db.neo4j_query_one("""
MATCH (n:Narrator {name_arabic: $name})-[:APPEARS_IN]->(h:Hadith)
RETURN count(DISTINCT h) AS hadith_count,
collect(DISTINCT h.collection) AS collections
""", {"name": actual_name}) or {}
# Teachers: narrator NARRATED_FROM teacher + teacher TEACHER_OF narrator
teachers_nf = db.neo4j_query("""
MATCH (n:Narrator {name_arabic: $name})-[:NARRATED_FROM]->(t:Narrator)
OPTIONAL MATCH (t)-[:APPEARS_IN]->(h:Hadith)
RETURN t.name_arabic AS name_arabic,
t.name_transliterated AS name_transliterated,
t.entity_type AS entity_type,
t.generation AS generation,
t.reliability_grade AS reliability_grade,
count(DISTINCT h) AS hadith_count
""", {"name": actual_name})
teachers_to = db.neo4j_query("""
MATCH (t:Narrator)-[:TEACHER_OF]->(n:Narrator {name_arabic: $name})
OPTIONAL MATCH (t)-[:APPEARS_IN]->(h:Hadith)
RETURN t.name_arabic AS name_arabic,
t.name_transliterated AS name_transliterated,
t.entity_type AS entity_type,
t.generation AS generation,
t.reliability_grade AS reliability_grade,
count(DISTINCT h) AS hadith_count
""", {"name": actual_name})
# Deduplicate teachers
seen_teachers = set()
teachers = []
for r in teachers_nf + teachers_to:
if r["name_arabic"] not in seen_teachers:
seen_teachers.add(r["name_arabic"])
teachers.append(NarratorSummary(**r))
# Students: student NARRATED_FROM narrator + narrator TEACHER_OF student
students_nf = db.neo4j_query("""
MATCH (s:Narrator)-[:NARRATED_FROM]->(n:Narrator {name_arabic: $name})
OPTIONAL MATCH (s)-[:APPEARS_IN]->(h:Hadith)
RETURN s.name_arabic AS name_arabic,
s.name_transliterated AS name_transliterated,
s.entity_type AS entity_type,
s.generation AS generation,
s.reliability_grade AS reliability_grade,
count(DISTINCT h) AS hadith_count
""", {"name": actual_name})
students_to = db.neo4j_query("""
MATCH (n:Narrator {name_arabic: $name})-[:TEACHER_OF]->(s:Narrator)
OPTIONAL MATCH (s)-[:APPEARS_IN]->(h:Hadith)
RETURN s.name_arabic AS name_arabic,
s.name_transliterated AS name_transliterated,
s.entity_type AS entity_type,
s.generation AS generation,
s.reliability_grade AS reliability_grade,
count(DISTINCT h) AS hadith_count
""", {"name": actual_name})
seen_students = set()
students = []
for r in students_nf + students_to:
if r["name_arabic"] not in seen_students:
seen_students.add(r["name_arabic"])
students.append(NarratorSummary(**r))
# Places
places_rows = db.neo4j_query("""
MATCH (n:Narrator {name_arabic: $name})-[r]->(p:Place)
WHERE type(r) IN ['BORN_IN', 'LIVED_IN', 'DIED_IN', 'TRAVELED_TO']
RETURN p.name_arabic AS place, type(r) AS relation
""", {"name": actual_name})
# Tribes
tribe_rows = db.neo4j_query("""
MATCH (n:Narrator {name_arabic: $name})-[:BELONGS_TO_TRIBE]->(t:Tribe)
RETURN t.name_arabic AS name
""", {"name": actual_name})
# Name forms (alternative names via RELATED_TO)
name_form_rows = db.neo4j_query("""
MATCH (n:Narrator {name_arabic: $name})-[:RELATED_TO]-(alt:Narrator)
WHERE alt.name_arabic <> $name
RETURN alt.name_arabic AS name, alt.entity_type AS type
""", {"name": actual_name})
# Family info
family_row = db.neo4j_query_one("""
MATCH (n:Narrator {name_arabic: $name})
RETURN n.father AS father, n.mother AS mother,
n.spouse AS spouse, n.children AS children
""", {"name": actual_name})
family = None
if family_row and any(family_row.get(k) for k in ["father", "mother", "spouse", "children"]):
family = FamilyInfo(
father=family_row.get("father"),
mother=family_row.get("mother"),
spouse=family_row.get("spouse"),
children=family_row.get("children") or [],
)
# Sample hadiths
hadith_rows = db.neo4j_query("""
MATCH (n:Narrator {name_arabic: $name})-[:APPEARS_IN]->(h:Hadith)
RETURN h.id AS id,
h.collection AS collection,
h.hadith_number AS hadith_number,
h.grade AS grade,
left(h.matn_text, 200) AS matn_text
substring(h.arabic_text, 0, 300) AS arabic_text
ORDER BY h.collection, h.hadith_number
LIMIT 50
""", {"name": name_arabic})
# Teachers (who taught this narrator)
teachers = db.neo4j_query("""
MATCH (teacher:Narrator)-[:TEACHER_OF]->(n:Narrator {name_arabic: $name})
OPTIONAL MATCH (teacher)-[:APPEARS_IN]->(h:Hadith)
RETURN teacher.name_arabic AS name_arabic,
teacher.name_transliterated AS name_transliterated,
teacher.entity_type AS entity_type,
teacher.generation AS generation,
teacher.reliability_grade AS reliability_grade,
count(h) AS hadith_count
""", {"name": name_arabic})
# Students (who this narrator taught)
students = db.neo4j_query("""
MATCH (n:Narrator {name_arabic: $name})-[:TEACHER_OF]->(student:Narrator)
OPTIONAL MATCH (student)-[:APPEARS_IN]->(h:Hadith)
RETURN student.name_arabic AS name_arabic,
student.name_transliterated AS name_transliterated,
student.entity_type AS entity_type,
student.generation AS generation,
student.reliability_grade AS reliability_grade,
count(h) AS hadith_count
""", {"name": name_arabic})
# Places
places = db.neo4j_query("""
MATCH (n:Narrator {name_arabic: $name})-[r:BORN_IN|LIVED_IN|DIED_IN|TRAVELED_TO]->(p:Place)
RETURN p.name_arabic AS place, type(r) AS relation
""", {"name": name_arabic})
# Tribes
tribes_rows = db.neo4j_query("""
MATCH (n:Narrator {name_arabic: $name})-[:BELONGS_TO_TRIBE]->(t:Tribe)
RETURN t.name_arabic AS tribe
""", {"name": name_arabic})
""", {"name": actual_name})
return NarratorProfile(
**narrator,
hadith_count=len(hadiths),
hadiths=[HadithSummary(
id=str(h["id"]), collection=h["collection"] or "",
hadith_number=h["hadith_number"] or 0, grade=h["grade"],
matn_text=h["matn_text"],
) for h in hadiths],
teachers=[NarratorSummary(**t) for t in teachers],
students=[NarratorSummary(**s) for s in students],
places=[PlaceRelation(**p) for p in places],
tribes=[t["tribe"] for t in tribes_rows],
name_arabic=n.get("name_arabic", actual_name),
name_transliterated=n.get("name_transliterated", ""),
entity_type=n.get("entity_type", ""),
full_nasab=n.get("full_nasab"),
kunya=n.get("kunya"),
nisba=n.get("nisba"),
laqab=n.get("laqab"),
generation=n.get("generation"),
reliability_grade=n.get("reliability_grade"),
reliability_detail=n.get("reliability_detail"),
birth_year_hijri=n.get("birth_year_hijri"),
death_year_hijri=n.get("death_year_hijri"),
birth_year_ce=n.get("birth_year_ce"),
death_year_ce=n.get("death_year_ce"),
biography_summary_arabic=n.get("biography_summary_arabic"),
biography_summary_english=n.get("biography_summary_english"),
total_hadiths_narrated_approx=n.get("total_hadiths_narrated_approx"),
hadith_count=stats.get("hadith_count", 0),
hadiths=[HadithSummary(**r) for r in hadith_rows],
teachers=teachers,
students=students,
name_forms=[NameForm(**r) for r in name_form_rows],
family=family,
places=[PlaceRelation(**r) for r in places_rows],
tribes=[r["name"] for r in tribe_rows],
bio_verified=n.get("bio_verified", False),
)
@router.get("/by-generation/{generation}", response_model=list[NarratorSummary])
# ── Narrators by generation (paginated, normalized) ────────────────────────
@router.get("/by-generation/{generation}", response_model=PaginatedResponse)
async def narrators_by_generation(
generation: str,
limit: int = Query(50, ge=1, le=200),
generation: str = Path(..., description="Generation: صحابي, تابعي, تابع التابعين, نبي"),
page: int = Query(1, ge=1),
per_page: int = Query(20, ge=1, le=100),
):
"""List narrators by generation (صحابي, تابعي, etc.)."""
"""List narrators by generation (e.g. Companions, Successors)."""
q_norm = normalize_query(generation)
skip = (page - 1) * per_page
total = db.neo4j_count("""
MATCH (n:Narrator)
WHERE toLower(n.generation) CONTAINS toLower($gen)
RETURN count(n) AS count
""", {"gen": q_norm})
rows = db.neo4j_query("""
MATCH (n:Narrator)
WHERE n.generation CONTAINS $gen
WHERE toLower(n.generation) CONTAINS toLower($gen)
OPTIONAL MATCH (n)-[:APPEARS_IN]->(h:Hadith)
RETURN n.name_arabic AS name_arabic,
n.name_transliterated AS name_transliterated,
n.entity_type AS entity_type,
n.generation AS generation,
n.reliability_grade AS reliability_grade,
count(h) AS hadith_count
count(DISTINCT h) AS hadith_count
ORDER BY hadith_count DESC
LIMIT $limit
""", {"gen": generation, "limit": limit})
SKIP $skip LIMIT $limit
""", {"gen": q_norm, "skip": skip, "limit": per_page})
return [NarratorSummary(**r) for r in rows]
data = [NarratorSummary(**r) for r in rows]
return PaginatedResponse(data=data, meta=_paginate(total, page, per_page))
@router.get("/by-place/{place_name}", response_model=list[NarratorSummary])
# ── Narrators by place (paginated, normalized) ─────────────────────────────
@router.get("/by-place/{place_name}", response_model=PaginatedResponse)
async def narrators_by_place(
place_name: str,
limit: int = Query(50, ge=1, le=200),
place_name: str = Path(..., description="Place name in Arabic (e.g. مكة)"),
page: int = Query(1, ge=1),
per_page: int = Query(50, ge=1, le=100),
):
"""Find narrators associated with a place."""
"""
Narrators associated with a place (born, lived, died, traveled).
Input is normalized مكة المكرمة matches مكه المكرمه.
"""
q_norm = normalize_query(place_name)
skip = (page - 1) * per_page
total = db.neo4j_count("""
MATCH (n:Narrator)-[r]->(p:Place)
WHERE type(r) IN ['BORN_IN', 'LIVED_IN', 'DIED_IN', 'TRAVELED_TO']
AND toLower(p.name_arabic) CONTAINS toLower($place)
RETURN count(DISTINCT n) AS count
""", {"place": q_norm})
rows = db.neo4j_query("""
MATCH (n:Narrator)-[:BORN_IN|LIVED_IN|DIED_IN|TRAVELED_TO]->(p:Place)
WHERE p.name_arabic CONTAINS $place
MATCH (n:Narrator)-[r]->(p:Place)
WHERE type(r) IN ['BORN_IN', 'LIVED_IN', 'DIED_IN', 'TRAVELED_TO']
AND toLower(p.name_arabic) CONTAINS toLower($place)
OPTIONAL MATCH (n)-[:APPEARS_IN]->(h:Hadith)
RETURN DISTINCT n.name_arabic AS name_arabic,
n.name_transliterated AS name_transliterated,
n.entity_type AS entity_type,
n.generation AS generation,
n.reliability_grade AS reliability_grade,
count(h) AS hadith_count
count(DISTINCT h) AS hadith_count
ORDER BY hadith_count DESC
LIMIT $limit
""", {"place": place_name, "limit": limit})
SKIP $skip LIMIT $limit
""", {"place": q_norm, "skip": skip, "limit": per_page})
return [NarratorSummary(**r) for r in rows]
data = [NarratorSummary(**r) for r in rows]
return PaginatedResponse(data=data, meta=_paginate(total, page, per_page))
@router.get("/interactions/{name_arabic}", response_model=list[NarratorInteraction],
summary="Get all narrator interactions",
description="Lists all relationships for a narrator: who they narrated from, "
"who narrated from them, their teachers, and their students. "
"Each interaction includes shared hadith count. "
"Example: `/narrators/interactions/الزهري`")
async def get_interactions(
name_arabic: str,
limit: int = Query(50, ge=1, le=200, description="Maximum interactions to return"),
# ── Narrator interactions ──────────────────────────────────────────────────
@router.get("/interactions/{name_arabic}", response_model=list[NarratorInteraction])
async def narrator_interactions(
name_arabic: str = Path(..., description="Narrator Arabic name"),
limit: int = Query(50, ge=1, le=200),
):
"""
Get all interactions of a narrator who they narrated from,
who narrated from them, teachers, students.
"""
"""All direct relationships for a narrator — who they narrated from/to."""
q_norm = normalize_name(name_arabic)
rows = db.neo4j_query("""
MATCH (n:Narrator {name_arabic: $name})
OPTIONAL MATCH (n)-[r1:NARRATED_FROM]->(other1:Narrator)
WITH n, collect(DISTINCT {
narrator_b: other1.name_arabic,
narrator_b_trans: other1.name_transliterated,
type: 'NARRATED_FROM',
hadith_ids: r1.hadith_ids
}) AS outgoing
OPTIONAL MATCH (other2:Narrator)-[r2:NARRATED_FROM]->(n)
WITH n, outgoing, collect(DISTINCT {
narrator_b: other2.name_arabic,
narrator_b_trans: other2.name_transliterated,
type: 'HEARD_BY',
hadith_ids: r2.hadith_ids
}) AS incoming
OPTIONAL MATCH (teacher:Narrator)-[r3:TEACHER_OF]->(n)
WITH n, outgoing, incoming, collect(DISTINCT {
narrator_b: teacher.name_arabic,
narrator_b_trans: teacher.name_transliterated,
type: 'TEACHER_OF',
hadith_ids: []
}) AS teacher_rels
OPTIONAL MATCH (n)-[r4:TEACHER_OF]->(student:Narrator)
WITH n, outgoing, incoming, teacher_rels, collect(DISTINCT {
narrator_b: student.name_arabic,
narrator_b_trans: student.name_transliterated,
type: 'STUDENT_OF',
hadith_ids: []
}) AS student_rels
RETURN n.name_arabic AS narrator_a,
n.name_transliterated AS narrator_a_trans,
outgoing + incoming + teacher_rels + student_rels AS interactions
""", {"name": name_arabic})
if not rows:
raise HTTPException(status_code=404, detail="Narrator not found")
result = []
row = rows[0]
for interaction in row["interactions"]:
if not interaction.get("narrator_b"):
continue
hadith_ids = interaction.get("hadith_ids") or []
result.append(NarratorInteraction(
narrator_a=row["narrator_a"],
narrator_a_transliterated=row.get("narrator_a_trans") or "",
narrator_b=interaction["narrator_b"],
narrator_b_transliterated=interaction.get("narrator_b_trans") or "",
relationship_type=interaction["type"],
shared_hadith_count=len(hadith_ids),
hadith_ids=[str(h) for h in hadith_ids[:20]],
))
return result[:limit]
@router.get("/who-met-who", response_model=list[NarratorInteraction],
summary="Check if two narrators are connected",
description="Finds the shortest path between two narrators in the knowledge graph. "
"Reveals whether they had a direct or indirect relationship through "
"narration chains, teacher/student bonds, or shared connections. "
"Example: `/narrators/who-met-who?narrator_a=الزهري&narrator_b=أنس بن مالك`")
async def who_met_who(
narrator_a: str = Query(
..., description="First narrator name (Arabic). Example: الزهري",
examples=["الزهري", "أبو هريرة"],
),
narrator_b: str = Query(
..., description="Second narrator name (Arabic). Example: أنس بن مالك",
examples=["أنس بن مالك", "عمر بن الخطاب"],
),
):
"""
Check if two narrators had a relationship did they meet,
narrate from each other, or share a teacher/student bond?
"""
rows = db.neo4j_query("""
MATCH (a:Narrator), (b:Narrator)
WHERE a.name_arabic CONTAINS $name_a
AND b.name_arabic CONTAINS $name_b
OPTIONAL MATCH path = shortestPath((a)-[*..6]-(b))
WITH a, b, path,
[r IN relationships(path) | {
type: type(r),
from: startNode(r).name_arabic,
from_trans: startNode(r).name_transliterated,
to: endNode(r).name_arabic,
to_trans: endNode(r).name_transliterated
}] AS rels
MATCH (a:Narrator)-[r]-(b:Narrator)
WHERE toLower(a.name_arabic) CONTAINS toLower($name)
AND type(r) IN ['NARRATED_FROM', 'TEACHER_OF']
WITH a, b, type(r) AS rel_type
OPTIONAL MATCH (a)-[:APPEARS_IN]->(h:Hadith)<-[:APPEARS_IN]-(b)
RETURN a.name_arabic AS narrator_a,
a.name_transliterated AS narrator_a_trans,
a.name_transliterated AS narrator_a_transliterated,
b.name_arabic AS narrator_b,
b.name_transliterated AS narrator_b_trans,
length(path) AS distance,
rels
""", {"name_a": narrator_a, "name_b": narrator_b})
b.name_transliterated AS narrator_b_transliterated,
rel_type AS relationship_type,
count(DISTINCT h) AS shared_hadith_count,
collect(DISTINCT h.id)[..20] AS hadith_ids
ORDER BY shared_hadith_count DESC
LIMIT $limit
""", {"name": q_norm, "limit": limit})
if not rows or rows[0].get("distance") is None:
return []
return [NarratorInteraction(**r) for r in rows]
row = rows[0]
return [NarratorInteraction(
narrator_a=rel["from"],
narrator_a_transliterated=rel.get("from_trans") or "",
narrator_b=rel["to"],
narrator_b_transliterated=rel.get("to_trans") or "",
relationship_type=rel["type"],
) for rel in (row.get("rels") or [])]
# ── Narrator network (graph visualization) ─────────────────────────────────
@router.get("/network/{name_arabic}", response_model=NarratorNetwork)
async def narrator_network(
name_arabic: str = Path(..., description="Narrator Arabic name"),
limit: int = Query(50, ge=1, le=200),
):
"""
Get a narrator's connection network — all incoming/outgoing relationships.
Useful for network visualization.
"""
q_norm = normalize_name(name_arabic)
# Center narrator
center_row = db.neo4j_query_one("""
MATCH (n:Narrator)
WHERE toLower(n.name_arabic) CONTAINS toLower($name)
OPTIONAL MATCH (n)-[:APPEARS_IN]->(h:Hadith)
RETURN n.name_arabic AS name_arabic,
n.name_transliterated AS name_transliterated,
n.entity_type AS entity_type,
n.generation AS generation,
n.reliability_grade AS reliability_grade,
count(DISTINCT h) AS hadith_count
""", {"name": q_norm})
if not center_row:
raise HTTPException(status_code=404, detail=f"Narrator not found: {name_arabic}")
# Connections
conn_rows = db.neo4j_query("""
MATCH (a:Narrator)-[r]-(b:Narrator)
WHERE toLower(a.name_arabic) CONTAINS toLower($name)
AND type(r) IN ['NARRATED_FROM', 'TEACHER_OF']
RETURN b.name_arabic AS narrator,
b.name_transliterated AS narrator_transliterated,
type(r) AS connection_type,
CASE WHEN startNode(r) = a THEN 'outgoing' ELSE 'incoming' END AS direction
LIMIT $limit
""", {"name": q_norm, "limit": limit})
return NarratorNetwork(
center=NarratorSummary(**center_row),
connections=[NarratorConnection(**r) for r in conn_rows],
total_connections=len(conn_rows),
)
# ── Who met who (shortest path) ────────────────────────────────────────────
@router.get("/who-met-who", response_model=WhoMetWhoResult)
async def who_met_who(
narrator_a: str = Query(..., description="First narrator (Arabic)"),
narrator_b: str = Query(..., description="Second narrator (Arabic)"),
):
"""
Shortest path between two narrators in the knowledge graph.
Useful to see how a narrator connects to the Prophet .
"""
a_norm = normalize_name(narrator_a)
b_norm = normalize_name(narrator_b)
row = db.neo4j_query_one("""
MATCH (a:Narrator), (b:Narrator)
WHERE toLower(a.name_arabic) CONTAINS toLower($a)
AND toLower(b.name_arabic) CONTAINS toLower($b)
WITH a, b LIMIT 1
MATCH path = shortestPath((a)-[*..10]-(b))
RETURN [n IN nodes(path) |
{name_arabic: n.name_arabic,
name_transliterated: n.name_transliterated,
generation: n.generation}] AS path_nodes,
[r IN relationships(path) | type(r)] AS rel_types,
length(path) AS path_length
""", {"a": a_norm, "b": b_norm})
if not row:
raise HTTPException(
status_code=404,
detail=f"No path found between '{narrator_a}' and '{narrator_b}'",
)
return WhoMetWhoResult(
narrator_a=narrator_a,
narrator_b=narrator_b,
path=[PathNode(**n) for n in (row.get("path_nodes") or [])],
path_length=row.get("path_length"),
relationship_types=row.get("rel_types", []),
)

View File

@ -6,12 +6,16 @@ from typing import Optional
from app.services.database import db
from app.config import get_settings
from app.models.schemas import SemanticSearchResult, FullTextSearchResult, HadithSummary
from app.utils.arabic import normalize_query
from app.models.schemas import (
SemanticSearchResult, FullTextSearchResult, CombinedSearchResult,
HadithSummary,
)
router = APIRouter(prefix="/search", tags=["Search"])
async def get_embedding(text: str) -> list[float]:
async def _get_embedding(text: str) -> list[float]:
"""Get embedding vector from TEI (BGE-M3)."""
settings = get_settings()
response = await db.http_client.post(
@ -22,7 +26,6 @@ async def get_embedding(text: str) -> list[float]:
raise HTTPException(status_code=502, detail=f"TEI embedding failed: {response.text}")
embeddings = response.json()
# TEI returns list of embeddings; we sent one input
if isinstance(embeddings, list) and len(embeddings) > 0:
if isinstance(embeddings[0], list):
return embeddings[0]
@ -30,34 +33,25 @@ async def get_embedding(text: str) -> list[float]:
raise HTTPException(status_code=502, detail="Unexpected TEI response format")
@router.get("/semantic", response_model=list[SemanticSearchResult],
summary="Semantic search (find by meaning)",
description="Search hadiths by meaning using BGE-M3 multilingual embeddings + Qdrant. "
"Supports cross-language queries: search in English and find Arabic hadiths, or vice versa. "
"Example: `what did the prophet say about fasting` → finds Arabic hadiths about صيام")
# ── Semantic search ─────────────────────────────────────────────────────────
@router.get("/semantic", response_model=list[SemanticSearchResult])
async def semantic_search(
q: str = Query(
..., min_length=2,
description="Search query in any language. The embedding model handles Arabic, English, and Urdu.",
examples=["what is the reward of prayer", "أحاديث عن الصيام", "حكم الربا"],
),
collection: Optional[str] = Query(
None,
description="Filter by collection name. Example: Sahih Bukhari",
),
limit: int = Query(10, ge=1, le=50, description="Number of results (max 50)"),
q: str = Query(..., min_length=2, description="Search query (any language — Arabic, English, etc.)"),
collection: Optional[str] = Query(None, description="Filter by collection name"),
limit: int = Query(10, ge=1, le=50),
):
"""
Semantic search find hadiths by meaning, not just keywords.
Supports Arabic, English, and cross-language queries.
Supports cross-language queries (English query Arabic results).
Uses BGE-M3 embeddings + Qdrant vector search.
"""
if not db.qdrant_available():
raise HTTPException(status_code=503, detail="Qdrant unavailable")
settings = get_settings()
query_vector = await _get_embedding(q)
# Get query embedding from TEI
query_vector = await get_embedding(q)
# Build Qdrant filter if collection specified
query_filter = None
if collection:
from qdrant_client.models import Filter, FieldCondition, MatchValue
@ -65,7 +59,6 @@ async def semantic_search(
must=[FieldCondition(key="collection", match=MatchValue(value=collection))]
)
# Search Qdrant
results = db.qdrant.search(
collection_name=settings.qdrant_collection,
query_vector=query_vector,
@ -80,8 +73,8 @@ async def semantic_search(
output.append(SemanticSearchResult(
hadith=HadithSummary(
id=str(payload.get("id", hit.id)),
collection=payload.get("collection", ""),
hadith_number=payload.get("hadith_number", 0),
collection=payload.get("collection"),
hadith_number=payload.get("hadith_number"),
grade=payload.get("grade"),
arabic_text=(payload.get("arabic_text") or "")[:300],
),
@ -92,74 +85,53 @@ async def semantic_search(
return output
@router.get("/fulltext", response_model=list[FullTextSearchResult],
summary="Full-text Arabic search",
description="Keyword search using Elasticsearch with Arabic morphological analysis (stemming, root extraction). "
"Returns highlighted text fragments showing where matches occurred. "
"Handles both vocalized (الصَّلاة) and unvocalized (الصلاة) Arabic.")
# ── Full-text Arabic search ─────────────────────────────────────────────────
@router.get("/fulltext", response_model=list[FullTextSearchResult])
async def fulltext_search(
q: str = Query(
..., min_length=2,
description="Arabic text search query. Examples: الصلاة (prayer), النكاح (marriage), الجهاد (jihad)",
examples=["الصلاة", "صيام رمضان", "بيع وشراء"],
),
collection: Optional[str] = Query(
None,
description="Filter by collection. Example: Sahih Muslim",
),
limit: int = Query(10, ge=1, le=50, description="Number of results (max 50)"),
q: str = Query(..., min_length=2, description="Arabic text search query"),
collection: Optional[str] = Query(None, description="Filter by collection"),
limit: int = Query(10, ge=1, le=50),
):
"""
Full-text Arabic search using Elasticsearch.
Supports Arabic morphological analysis.
Supports Arabic morphological analysis (root-based matching).
"""
if not db.es_available():
raise HTTPException(status_code=503, detail="Elasticsearch unavailable")
settings = get_settings()
# Build ES query
must = [
{
"multi_match": {
must = [{"multi_match": {
"query": q,
"fields": ["arabic_text^3", "arabic_normalized^2", "matn", "sanad"],
"fields": ["arabic_text^3", "english_text", "urdu_text"],
"type": "best_fields",
"analyzer": "arabic",
}
}
]
}}]
if collection:
must.append({"match": {"collection_name": collection}})
must.append({"match": {"collection": collection}})
body = {
"query": {"bool": {"must": must}},
"highlight": {
"fields": {
"arabic_text": {"fragment_size": 200, "number_of_fragments": 2},
"matn": {"fragment_size": 200, "number_of_fragments": 1},
}
"fields": {"arabic_text": {"fragment_size": 200, "number_of_fragments": 3}},
},
"size": limit,
}
try:
response = db.es.search(index=settings.es_index, body=body)
except Exception as e:
# ES index might not exist yet
raise HTTPException(status_code=503, detail=f"Elasticsearch error: {str(e)}")
resp = db.es.search(index=settings.es_index, body=body)
hits = resp.get("hits", {}).get("hits", [])
output = []
for hit in response["hits"]["hits"]:
for hit in hits:
src = hit["_source"]
highlights = []
if "highlight" in hit:
for field_highlights in hit["highlight"].values():
highlights.extend(field_highlights)
highlights = hit.get("highlight", {}).get("arabic_text", [])
output.append(FullTextSearchResult(
hadith=HadithSummary(
id=str(src.get("id", hit["_id"])),
collection=src.get("collection_name", ""),
hadith_number=src.get("hadith_number", 0),
collection=src.get("collection"),
hadith_number=src.get("hadith_number"),
grade=src.get("grade"),
arabic_text=(src.get("arabic_text") or "")[:300],
),
@ -170,38 +142,55 @@ async def fulltext_search(
return output
@router.get("/combined", response_model=dict,
summary="Combined search (semantic + full-text)",
description="Runs both semantic and full-text search in parallel and returns merged results. "
"Best for the mobile app search bar — gives both meaning-based and keyword-based results. "
"Returns `{semantic: [...], fulltext: [...], query: '...'}`")
# ── Combined search (semantic + fulltext) ───────────────────────────────────
@router.get("/combined", response_model=list[CombinedSearchResult])
async def combined_search(
q: str = Query(
..., min_length=2,
description="Search query. Works with Arabic keywords or natural language in any language.",
examples=["الصلاة في وقتها", "hadith about charity"],
),
collection: Optional[str] = Query(None, description="Filter by collection name"),
limit: int = Query(10, ge=1, le=20, description="Results per search type (max 20)"),
q: str = Query(..., min_length=2, description="Search query"),
collection: Optional[str] = Query(None),
limit: int = Query(10, ge=1, le=50),
semantic_weight: float = Query(0.6, ge=0, le=1, description="Weight for semantic score (0-1)"),
):
"""
Combined search runs both semantic and full-text in parallel,
returns merged results. Best for the mobile app search bar.
"""
import asyncio
"""Combined semantic + full-text search. Results merged and ranked by weighted score."""
results_map: dict[str, CombinedSearchResult] = {}
semantic_task = semantic_search(q=q, collection=collection, limit=limit)
# Full-text only makes sense for Arabic queries
fulltext_task = fulltext_search(q=q, collection=collection, limit=limit)
semantic_results, fulltext_results = await asyncio.gather(
semantic_task,
fulltext_task,
return_exceptions=True,
# Semantic
if db.qdrant_available():
try:
sem_results = await semantic_search(q=q, collection=collection, limit=limit)
for sr in sem_results:
hid = sr.hadith.id
results_map[hid] = CombinedSearchResult(
hadith=sr.hadith,
semantic_score=sr.score,
combined_score=sr.score * semantic_weight,
source="semantic",
)
except Exception:
pass
return {
"semantic": semantic_results if not isinstance(semantic_results, Exception) else [],
"fulltext": fulltext_results if not isinstance(fulltext_results, Exception) else [],
"query": q,
}
# Full-text
if db.es_available():
try:
ft_results = await fulltext_search(q=q, collection=collection, limit=limit)
ft_weight = 1.0 - semantic_weight
for fr in ft_results:
hid = fr.hadith.id
norm_score = min(fr.score / 20.0, 1.0)
if hid in results_map:
existing = results_map[hid]
existing.fulltext_score = norm_score
existing.combined_score += norm_score * ft_weight
existing.source = "both"
else:
results_map[hid] = CombinedSearchResult(
hadith=fr.hadith,
fulltext_score=norm_score,
combined_score=norm_score * ft_weight,
source="fulltext",
)
except Exception:
pass
results = sorted(results_map.values(), key=lambda x: x.combined_score, reverse=True)
return results[:limit]

View File

@ -1,6 +1,7 @@
"""
Database connection manager initializes and provides access to
PostgreSQL, Neo4j, Qdrant, and Elasticsearch clients.
Database connections PostgreSQL, Neo4j, Qdrant, Elasticsearch, TEI.
Resilient startup: each backend wrapped in try/except so the app
starts even if some services are temporarily unavailable.
"""
import psycopg2
import psycopg2.pool
@ -53,7 +54,7 @@ class Database:
except Exception as e:
print(f"⚠️ Neo4j failed: {e}")
# Qdrant
# Qdrant (URL-based connection — matches the working k8s setup)
try:
self.qdrant = QdrantClient(
url=f"http://{settings.qdrant_host}:{settings.qdrant_port}",
@ -92,13 +93,6 @@ class Database:
# ── PostgreSQL helpers ──
def get_pg(self):
conn = self.pg_pool.getconn()
try:
yield conn
finally:
self.pg_pool.putconn(conn)
def pg_query(self, query: str, params: tuple = None) -> list[dict]:
conn = self.pg_pool.getconn()
try:
@ -112,9 +106,16 @@ class Database:
rows = self.pg_query(query, params)
return rows[0] if rows else None
def pg_count(self, query: str, params: tuple = None) -> int:
"""Execute a SELECT count(*) query and return the integer."""
row = self.pg_query_one(query, params)
return row.get("count", 0) if row else 0
# ── Neo4j helpers ──
def neo4j_query(self, query: str, params: dict = None) -> list[dict]:
if not self.neo4j_driver:
return []
with self.neo4j_driver.session() as session:
result = session.run(query, params or {})
return [dict(record) for record in result]
@ -123,6 +124,25 @@ class Database:
rows = self.neo4j_query(query, params)
return rows[0] if rows else None
def neo4j_count(self, query: str, params: dict = None) -> int:
"""Execute a RETURN count(...) AS count query and return the integer."""
row = self.neo4j_query_one(query, params)
return row.get("count", 0) if row else 0
# ── Service availability checks ──
def pg_available(self) -> bool:
return self.pg_pool is not None
def neo4j_available(self) -> bool:
return self.neo4j_driver is not None
def qdrant_available(self) -> bool:
return self.qdrant is not None
def es_available(self) -> bool:
return self.es is not None
# Global instance
db = Database()