feat: Implement Hadith and Narrator endpoints with search functionality

- Added Hadith endpoints for retrieving details, listing by collection, and searching by keyword, topic, and narrator.
- Introduced Narrator endpoints for searching narrators, retrieving profiles, and exploring interactions.
- Created search endpoints for semantic and full-text search capabilities using Qdrant and Elasticsearch.
- Established a database connection manager for PostgreSQL, Neo4j, Qdrant, and Elasticsearch.
- Configured Kubernetes deployment with necessary secrets and environment variables for the API.
- Updated requirements.txt with necessary dependencies for the application.
This commit is contained in:
salah 2026-02-26 22:17:58 +01:00
commit 9d51393c86
16 changed files with 1826 additions and 0 deletions

22
.env.example Normal file
View File

@ -0,0 +1,22 @@
# Hadith Scholar API — Environment Variables
# Copy to .env and fill in values
HADITH_PG_HOST=pg.betelgeusebytes.io
HADITH_PG_PORT=5432
HADITH_PG_DBNAME=REPLACE_ME
HADITH_PG_USER=REPLACE_ME
HADITH_PG_PASSWORD=REPLACE_ME
HADITH_PG_SSLMODE=require
HADITH_NEO4J_URI=neo4j+ssc://neo4j.betelgeusebytes.io:7687
HADITH_NEO4J_USER=neo4j
HADITH_NEO4J_PASSWORD=NEO4J-PASS
HADITH_QDRANT_HOST=qdrant.vector.svc.cluster.local
HADITH_QDRANT_PORT=6333
HADITH_QDRANT_COLLECTION=hadiths
HADITH_ES_HOST=http://elasticsearch.elastic.svc.cluster.local:9200
HADITH_ES_INDEX=hadiths
HADITH_TEI_URL=http://tei.ml.svc.cluster.local:80

17
Dockerfile Normal file
View File

@ -0,0 +1,17 @@
FROM python:3.12-slim
WORKDIR /app
# Install system deps for psycopg2
RUN apt-get update && apt-get install -y --no-install-recommends \
libpq-dev gcc && \
rm -rf /var/lib/apt/lists/*
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
EXPOSE 8000
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]

0
app/__init__.py Normal file
View File

42
app/config.py Normal file
View File

@ -0,0 +1,42 @@
from pydantic_settings import BaseSettings
from functools import lru_cache
class Settings(BaseSettings):
app_name: str = "Hadith Scholar API"
app_version: str = "0.1.0"
debug: bool = False
# PostgreSQL
pg_host: str = "pg.betelgeusebytes.io"
pg_port: int = 5432
pg_dbname: str = "REPLACE_ME"
pg_user: str = "REPLACE_ME"
pg_password: str = "REPLACE_ME"
pg_sslmode: str = "require"
# Neo4j
neo4j_uri: str = "neo4j+ssc://neo4j.betelgeusebytes.io:7687"
neo4j_user: str = "neo4j"
neo4j_password: str = "NEO4J-PASS"
# Qdrant
qdrant_host: str = "qdrant.vector.svc.cluster.local"
qdrant_port: int = 6333
qdrant_collection: str = "hadiths"
# Elasticsearch
es_host: str = "http://elasticsearch.elastic.svc.cluster.local:9200"
es_index: str = "hadiths"
# TEI (embeddings)
tei_url: str = "http://tei.ml.svc.cluster.local:80"
class Config:
env_file = ".env"
env_prefix = "HADITH_"
@lru_cache()
def get_settings() -> Settings:
return Settings()

223
app/main.py Normal file
View File

@ -0,0 +1,223 @@
"""
Hadith Scholar API FastAPI application.
Endpoints:
/hadiths hadith details, search by keyword/narrator/topic/collection
/narrators narrator profiles, interactions, who-met-who
/chains isnad chain visualization data
/search semantic (Qdrant) + full-text Arabic (Elasticsearch)
"""
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from contextlib import asynccontextmanager
from app.config import get_settings
from app.services.database import db
from app.routers import hadiths, narrators, chains, search
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Connect all databases on startup, disconnect on shutdown."""
await db.connect()
yield
await db.disconnect()
settings = get_settings()
TAGS_METADATA = [
{
"name": "Hadiths",
"description": (
"Access and search hadith texts from 8+ major collections "
"(Sahih Bukhari, Sahih Muslim, Sunan Abu Dawood, Jami` at-Tirmidhi, "
"Sunan an-Nasa'i, Sunan Ibn Majah, Musnad Ahmad, and more). "
"Retrieve full hadith details including narrator chains, topic tags, "
"and sanad/matn separation. Search by Arabic keyword, narrator name, "
"topic, or collection."
),
},
{
"name": "Narrators",
"description": (
"Narrator (رجال الحديث) profiles and relationships. "
"Each narrator includes biographical data from classical scholarship "
"(Tahdhib al-Kamal, Taqrib al-Tahdhib, etc.): full nasab, kunya, nisba, "
"generation (طبقة), reliability grading (جرح وتعديل), birth/death dates, "
"teachers, students, places, and tribal affiliations. "
"Use the who-met-who endpoint to explore narrator connections."
),
},
{
"name": "Isnad Chains",
"description": (
"Isnad (chain of narration / إسناد) visualization data. "
"Returns graph-ready structures (nodes + links) for rendering "
"narrator chains using D3.js, vis.js, Cytoscape, or any graph library. "
"Each node includes narrator metadata; each link includes the "
"transmission verb (حدثنا، أخبرنا، عن، سمعت)."
),
},
{
"name": "Search",
"description": (
"Multi-modal hadith search. **Semantic search** uses BGE-M3 multilingual "
"embeddings + Qdrant to find hadiths by meaning (supports Arabic, English, "
"and cross-language queries). **Full-text search** uses Elasticsearch with "
"Arabic morphological analysis for exact and fuzzy keyword matching. "
"**Combined search** runs both in parallel for the best results."
),
},
{
"name": "Root",
"description": "Health checks, statistics, and API metadata.",
},
]
app = FastAPI(
title=settings.app_name,
version=settings.app_version,
description="""
# Hadith Scholar API — حَدِيثٌ
Production-grade REST API for analyzing Islamic hadith literature across 8+ major collections.
## Core Capabilities
| Feature | Backend | Endpoint |
|---------|---------|----------|
| Hadith lookup & keyword search | PostgreSQL | `GET /hadiths/*` |
| Narrator profiles & biography | Neo4j | `GET /narrators/*` |
| Isnad chain visualization | Neo4j | `GET /chains/*` |
| Semantic search (by meaning) | Qdrant + BGE-M3 | `GET /search/semantic` |
| Full-text Arabic search | Elasticsearch | `GET /search/fulltext` |
| Narrator relationships | Neo4j | `GET /narrators/who-met-who` |
## Data Sources
- **~41,000 hadiths** from Sahih Bukhari, Sahih Muslim, Sunan Abu Dawood, Jami` at-Tirmidhi, Sunan an-Nasa'i, Sunan Ibn Majah, Musnad Ahmad, Muwatta Malik, and more
- **Narrator knowledge graph** with biographies, teacher/student networks, places, tribes
- **1024-dim multilingual embeddings** (BGE-M3) for semantic search across Arabic/English/Urdu
## Authentication
Currently open (no auth required). API keys will be added in a future version.
## Arabic Text
All Arabic text preserves original diacritics (تشكيل). Search endpoints accept both vocalized and unvocalized Arabic.
## Example Queries
- Search for hadiths about prayer: `GET /hadiths/search/keyword?q=صلاة`
- Find narrator profile: `GET /narrators/profile/أبو هريرة`
- Semantic search: `GET /search/semantic?q=what did the prophet say about fasting`
- Who met who: `GET /narrators/who-met-who?narrator_a=الزهري&narrator_b=أنس بن مالك`
""",
docs_url="/docs",
redoc_url="/redoc",
openapi_url="/openapi.json",
openapi_tags=TAGS_METADATA,
lifespan=lifespan,
license_info={
"name": "MIT",
},
contact={
"name": "Hadith Scholar API",
"url": "https://betelgeusebytes.io",
},
)
# CORS — allow all for development; tighten for production
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Register routers
app.include_router(hadiths.router)
app.include_router(narrators.router)
app.include_router(chains.router)
app.include_router(search.router)
@app.get("/", tags=["Root"])
async def root():
return {
"name": settings.app_name,
"version": settings.app_version,
"docs": "/docs",
"endpoints": {
"hadiths": "/hadiths",
"narrators": "/narrators",
"chains": "/chains",
"search": "/search",
},
}
@app.get("/health", tags=["Root"])
async def health():
"""Health check — verifies all service connections."""
status = {"status": "ok", "services": {}}
# PostgreSQL
try:
db.pg_query_one("SELECT 1 AS ok")
status["services"]["postgresql"] = "ok"
except Exception as e:
status["services"]["postgresql"] = f"error: {e}"
status["status"] = "degraded"
# Neo4j
try:
db.neo4j_query_one("RETURN 1 AS ok")
status["services"]["neo4j"] = "ok"
except Exception as e:
status["services"]["neo4j"] = f"error: {e}"
status["status"] = "degraded"
# Qdrant
try:
db.qdrant.get_collections()
status["services"]["qdrant"] = "ok"
except Exception as e:
status["services"]["qdrant"] = f"error: {e}"
status["status"] = "degraded"
# Elasticsearch
try:
if db.es.ping():
status["services"]["elasticsearch"] = "ok"
else:
status["services"]["elasticsearch"] = "unreachable"
status["status"] = "degraded"
except Exception as e:
status["services"]["elasticsearch"] = f"error: {e}"
status["status"] = "degraded"
return status
@app.get("/stats", tags=["Root"])
async def stats():
"""Database statistics."""
pg_stats = db.pg_query_one("""
SELECT
(SELECT COUNT(*) FROM hadiths) AS total_hadiths,
(SELECT COUNT(*) FROM collections) AS total_collections
""")
neo4j_stats = db.neo4j_query_one("""
MATCH (h:Hadith) WITH count(h) AS hadiths
MATCH (n:Narrator) WITH hadiths, count(n) AS narrators
MATCH (p:Place) WITH hadiths, narrators, count(p) AS places
MATCH (t:Tribe) WITH hadiths, narrators, places, count(t) AS tribes
MATCH ()-[r]->() WITH hadiths, narrators, places, tribes, count(r) AS relationships
RETURN hadiths, narrators, places, tribes, relationships
""")
return {
"postgresql": pg_stats,
"neo4j_graph": neo4j_stats,
}

0
app/models/__init__.py Normal file
View File

352
app/models/schemas.py Normal file
View File

@ -0,0 +1,352 @@
from pydantic import BaseModel, Field
from typing import Optional
# ── Common ─────────────────────────────────────────────────────────────────
class PaginationMeta(BaseModel):
total: int
page: int
per_page: int
pages: int
model_config = {
"json_schema_extra": {
"examples": [{"total": 6986, "page": 1, "per_page": 20, "pages": 350}]
}
}
class PaginatedResponse(BaseModel):
meta: PaginationMeta
data: list
# ── Hadith ─────────────────────────────────────────────────────────────────
class HadithSummary(BaseModel):
id: str = Field(description="Unique hadith UUID")
collection: str = Field(description="Collection name in English")
hadith_number: int = Field(description="Hadith number within collection")
grade: Optional[str] = Field(None, description="Grading: Sahih, Hasan, Da'if, etc.")
arabic_text: Optional[str] = Field(None, description="Full Arabic text (may be truncated in list views)")
matn_text: Optional[str] = Field(None, description="Body text only (without isnad)")
sanad_text: Optional[str] = Field(None, description="Chain of narration text only")
model_config = {
"json_schema_extra": {
"examples": [{
"id": "dcf8df41-3185-4e20-a9af-db3696a48c79",
"collection": "Sahih Bukhari",
"hadith_number": 1,
"grade": "Sahih",
"arabic_text": "حَدَّثَنَا الْحُمَيْدِيُّ عَبْدُ اللَّهِ بْنُ الزُّبَيْرِ قَالَ حَدَّثَنَا سُفْيَانُ...",
"matn_text": "إِنَّمَا الأَعْمَالُ بِالنِّيَّاتِ وَإِنَّمَا لِكُلِّ امْرِئٍ مَا نَوَى...",
"sanad_text": "حَدَّثَنَا الْحُمَيْدِيُّ عَبْدُ اللَّهِ بْنُ الزُّبَيْرِ قَالَ حَدَّثَنَا سُفْيَانُ قَالَ حَدَّثَنَا يَحْيَى بْنُ سَعِيدٍ الأَنْصَارِيُّ"
}]
}
}
class TopicTag(BaseModel):
topic_arabic: str = Field(description="Topic name in Arabic, e.g. الصلاة")
topic_english: str = Field(description="Topic name in English, e.g. Prayer")
category: str = Field(description="Broad Islamic category: عقيدة، فقه، سيرة، أخلاق، تفسير")
model_config = {
"json_schema_extra": {
"examples": [{
"topic_arabic": "النية",
"topic_english": "Intention",
"category": "فقه"
}]
}
}
class NarratorInChain(BaseModel):
order: int = Field(description="Position in chain: 1=closest to compiler, last=closest to Prophet ﷺ")
name_arabic: str = Field(description="Narrator's Arabic name as it appears in the hadith text")
name_transliterated: str = Field("", description="Latin transliteration of the name")
entity_type: str = Field("", description="PERSON, KUNYA (أبو/أم), NISBA (attributional), or TITLE (رسول الله)")
transmission_verb: Optional[str] = Field(None, description="Exact Arabic transmission verb: حدثنا، أخبرنا، عن، سمعت")
model_config = {
"json_schema_extra": {
"examples": [{
"order": 1,
"name_arabic": "الْحُمَيْدِيُّ",
"name_transliterated": "al-Humaydi",
"entity_type": "NISBA",
"transmission_verb": "حَدَّثَنَا"
}]
}
}
class HadithDetail(BaseModel):
id: str = Field(description="Unique hadith UUID")
collection: str = Field(description="Collection English name")
hadith_number: int = Field(description="Number within collection")
grade: Optional[str] = Field(None, description="Hadith grade")
arabic_text: Optional[str] = Field(None, description="Complete Arabic text")
sanad_text: Optional[str] = Field(None, description="Isnad (chain) text only")
matn_text: Optional[str] = Field(None, description="Matn (body) text only")
narrator_chain: list[NarratorInChain] = Field(default_factory=list, description="Ordered narrator chain from Neo4j graph")
topics: list[TopicTag] = Field(default_factory=list, description="Topic tags for searchability")
model_config = {
"json_schema_extra": {
"examples": [{
"id": "dcf8df41-3185-4e20-a9af-db3696a48c79",
"collection": "Sahih Bukhari",
"hadith_number": 1,
"grade": "Sahih",
"arabic_text": "حَدَّثَنَا الْحُمَيْدِيُّ عَبْدُ اللَّهِ بْنُ الزُّبَيْرِ...",
"sanad_text": "حَدَّثَنَا الْحُمَيْدِيُّ...",
"matn_text": "إِنَّمَا الأَعْمَالُ بِالنِّيَّاتِ...",
"narrator_chain": [
{"order": 1, "name_arabic": "الْحُمَيْدِيُّ", "name_transliterated": "al-Humaydi", "entity_type": "NISBA", "transmission_verb": "حَدَّثَنَا"},
{"order": 2, "name_arabic": "سُفْيَانُ", "name_transliterated": "Sufyan", "entity_type": "PERSON", "transmission_verb": "حَدَّثَنَا"},
],
"topics": [
{"topic_arabic": "النية", "topic_english": "Intention", "category": "فقه"},
]
}]
}
}
# ── Narrator ───────────────────────────────────────────────────────────────
class NarratorSummary(BaseModel):
name_arabic: str = Field(description="Primary Arabic name")
name_transliterated: str = Field("", description="Latin transliteration")
entity_type: str = Field("", description="PERSON, KUNYA, NISBA, TITLE")
generation: Optional[str] = Field(None, description="طبقة: صحابي، تابعي، تابع التابعين")
reliability_grade: Optional[str] = Field(None, description="جرح وتعديل: ثقة، صدوق، ضعيف، متروك")
hadith_count: int = Field(0, description="Number of hadiths this narrator appears in")
model_config = {
"json_schema_extra": {
"examples": [{
"name_arabic": "أَبُو هُرَيْرَةَ",
"name_transliterated": "Abu Hurayrah",
"entity_type": "KUNYA",
"generation": "صحابي",
"reliability_grade": "ثقة",
"hadith_count": 5374
}]
}
}
class NameForm(BaseModel):
name: str = Field(description="Alternative name form")
type: str = Field(description="Name type: PERSON, KUNYA, NISBA, TITLE")
class FamilyInfo(BaseModel):
father: Optional[str] = None
mother: Optional[str] = None
spouse: Optional[str] = None
children: list[str] = Field(default_factory=list)
class PlaceRelation(BaseModel):
place: str = Field(description="Place name in Arabic")
relation: str = Field(description="BORN_IN, LIVED_IN, DIED_IN, or TRAVELED_TO")
model_config = {
"json_schema_extra": {
"examples": [{"place": "المدينة", "relation": "LIVED_IN"}]
}
}
class NarratorProfile(BaseModel):
name_arabic: str = Field(description="Primary Arabic name")
name_transliterated: str = Field("", description="Latin transliteration")
entity_type: str = Field("", description="PERSON, KUNYA, NISBA, TITLE")
full_nasab: Optional[str] = Field(None, description="Full lineage: فلان بن فلان بن فلان")
kunya: Optional[str] = Field(None, description="أبو/أم name (e.g. أبو هريرة)")
nisba: Optional[str] = Field(None, description="Attributional name (e.g. البخاري، المدني، الزهري)")
laqab: Optional[str] = Field(None, description="Title or epithet (e.g. أمير المؤمنين في الحديث)")
generation: Optional[str] = Field(None, description="طبقة: صحابي، تابعي، تابع التابعين، أتباع تابع التابعين")
reliability_grade: Optional[str] = Field(None, description="جرح وتعديل: ثقة، ثقة حافظ، صدوق، ضعيف، متروك")
reliability_detail: Optional[str] = Field(None, description="Extended grading explanation from scholars")
birth_year_hijri: Optional[int] = Field(None, description="Birth year (Hijri calendar)")
death_year_hijri: Optional[int] = Field(None, description="Death year (Hijri calendar)")
birth_year_ce: Optional[int] = Field(None, description="Birth year (CE)")
death_year_ce: Optional[int] = Field(None, description="Death year (CE)")
biography_summary_arabic: Optional[str] = Field(None, description="2-3 sentence biography in Arabic")
biography_summary_english: Optional[str] = Field(None, description="2-3 sentence biography in English")
total_hadiths_narrated_approx: Optional[int] = Field(None, description="Approximate total hadiths narrated across all collections")
hadith_count: int = Field(0, description="Hadiths in current database")
hadiths: list[HadithSummary] = Field(default_factory=list, description="Sample hadiths narrated (max 50)")
teachers: list[NarratorSummary] = Field(default_factory=list, description="Known teachers / شيوخ")
students: list[NarratorSummary] = Field(default_factory=list, description="Known students / تلاميذ")
places: list[PlaceRelation] = Field(default_factory=list, description="Associated places (born, lived, died, traveled)")
tribes: list[str] = Field(default_factory=list, description="Tribal affiliations (e.g. قريش، دوس، الأنصار)")
bio_verified: bool = Field(False, description="Whether biography has been manually verified against classical sources")
model_config = {
"json_schema_extra": {
"examples": [{
"name_arabic": "أَبُو هُرَيْرَةَ",
"name_transliterated": "Abu Hurayrah",
"entity_type": "KUNYA",
"full_nasab": "عبد الرحمن بن صخر الدوسي",
"kunya": "أبو هريرة",
"nisba": "الدوسي",
"laqab": None,
"generation": "صحابي",
"reliability_grade": "ثقة",
"reliability_detail": "صحابي جليل، أكثر الصحابة رواية للحديث",
"birth_year_hijri": None,
"death_year_hijri": 57,
"birth_year_ce": None,
"death_year_ce": 676,
"biography_summary_arabic": "أبو هريرة الدوسي، صحابي جليل، أكثر الصحابة رواية للحديث النبوي. أسلم عام خيبر ولازم النبي ﷺ.",
"biography_summary_english": "Abu Hurayrah al-Dawsi, a prominent Companion and the most prolific narrator of hadith. He accepted Islam during Khaybar and remained close to the Prophet ﷺ.",
"total_hadiths_narrated_approx": 5374,
"hadith_count": 142,
"hadiths": [],
"teachers": [{"name_arabic": "رسول الله ﷺ", "name_transliterated": "Prophet Muhammad", "entity_type": "TITLE", "generation": None, "reliability_grade": None, "hadith_count": 0}],
"students": [{"name_arabic": "الزهري", "name_transliterated": "al-Zuhri", "entity_type": "NISBA", "generation": "تابعي", "reliability_grade": "ثقة", "hadith_count": 89}],
"places": [{"place": "المدينة", "relation": "LIVED_IN"}],
"tribes": ["دوس"],
"bio_verified": False,
}]
}
}
# ── Isnad Chain ────────────────────────────────────────────────────────────
class IsnadNode(BaseModel):
name_arabic: str = Field(description="Narrator Arabic name")
name_transliterated: str = Field("", description="Latin transliteration")
entity_type: str = Field("", description="PERSON, KUNYA, NISBA, TITLE")
generation: Optional[str] = Field(None, description="طبقة")
reliability_grade: Optional[str] = Field(None, description="جرح وتعديل grade")
class IsnadLink(BaseModel):
source: str = Field(description="name_arabic of narrator who received the hadith")
target: str = Field(description="name_arabic of narrator they received it from")
transmission_verb: Optional[str] = Field(None, description="Exact verb: حدثنا، أخبرنا، عن، سمعت، أنبأنا")
class IsnadChain(BaseModel):
hadith_id: str = Field(description="UUID of the hadith")
collection: str = Field(description="Collection name")
hadith_number: int = Field(description="Hadith number")
nodes: list[IsnadNode] = Field(default_factory=list, description="Narrator nodes for graph visualization")
links: list[IsnadLink] = Field(default_factory=list, description="Directed edges: source heard from target")
model_config = {
"json_schema_extra": {
"examples": [{
"hadith_id": "dcf8df41-3185-4e20-a9af-db3696a48c79",
"collection": "Sahih Bukhari",
"hadith_number": 1,
"nodes": [
{"name_arabic": "الْحُمَيْدِيُّ", "name_transliterated": "al-Humaydi", "entity_type": "NISBA", "generation": "تابع التابعين", "reliability_grade": "ثقة"},
{"name_arabic": "سُفْيَانُ بْنُ عُيَيْنَةَ", "name_transliterated": "Sufyan ibn Uyaynah", "entity_type": "PERSON", "generation": "تابع التابعين", "reliability_grade": "ثقة"},
{"name_arabic": "يَحْيَى بْنُ سَعِيدٍ", "name_transliterated": "Yahya ibn Sa'id al-Ansari", "entity_type": "PERSON", "generation": "تابعي", "reliability_grade": "ثقة"},
{"name_arabic": "عُمَرُ بْنُ الْخَطَّابِ", "name_transliterated": "Umar ibn al-Khattab", "entity_type": "PERSON", "generation": "صحابي", "reliability_grade": "ثقة"},
],
"links": [
{"source": "الْحُمَيْدِيُّ", "target": "سُفْيَانُ بْنُ عُيَيْنَةَ", "transmission_verb": "حَدَّثَنَا"},
{"source": "سُفْيَانُ بْنُ عُيَيْنَةَ", "target": "يَحْيَى بْنُ سَعِيدٍ", "transmission_verb": "حَدَّثَنَا"},
{"source": "يَحْيَى بْنُ سَعِيدٍ", "target": "عُمَرُ بْنُ الْخَطَّابِ", "transmission_verb": "عن"},
]
}]
}
}
# ── Relationships / Who Met Who ────────────────────────────────────────────
class NarratorInteraction(BaseModel):
narrator_a: str = Field(description="First narrator Arabic name")
narrator_a_transliterated: str = Field("", description="First narrator transliteration")
narrator_b: str = Field(description="Second narrator Arabic name")
narrator_b_transliterated: str = Field("", description="Second narrator transliteration")
relationship_type: str = Field(description="NARRATED_FROM, TEACHER_OF, HEARD_BY, STUDENT_OF")
shared_hadith_count: int = Field(0, description="Number of hadiths connecting them")
hadith_ids: list[str] = Field(default_factory=list, description="IDs of connecting hadiths (max 20)")
model_config = {
"json_schema_extra": {
"examples": [{
"narrator_a": "الزهري",
"narrator_a_transliterated": "al-Zuhri",
"narrator_b": "أنس بن مالك",
"narrator_b_transliterated": "Anas ibn Malik",
"relationship_type": "NARRATED_FROM",
"shared_hadith_count": 23,
"hadith_ids": ["abc-123", "def-456"]
}]
}
}
class NarratorConnection(BaseModel):
narrator: str = Field(description="Connected narrator Arabic name")
narrator_transliterated: str = Field("", description="Transliteration")
connection_type: str = Field(description="Relationship type")
direction: str = Field(description="'incoming' (they → this) or 'outgoing' (this → them)")
class NarratorNetwork(BaseModel):
center: NarratorSummary
connections: list[NarratorConnection] = Field(default_factory=list)
total_connections: int = 0
# ── Search ─────────────────────────────────────────────────────────────────
class SemanticSearchResult(BaseModel):
hadith: HadithSummary = Field(description="Matching hadith")
score: float = Field(description="Cosine similarity score (0-1, higher = more relevant)")
collection: str = Field("", description="Collection name")
model_config = {
"json_schema_extra": {
"examples": [{
"hadith": {
"id": "abc-123",
"collection": "Sahih Bukhari",
"hadith_number": 1,
"grade": "Sahih",
"arabic_text": "إِنَّمَا الأَعْمَالُ بِالنِّيَّاتِ..."
},
"score": 0.9234,
"collection": "Sahih Bukhari"
}]
}
}
class FullTextSearchResult(BaseModel):
hadith: HadithSummary = Field(description="Matching hadith")
score: float = Field(description="Elasticsearch relevance score (higher = more relevant)")
highlights: list[str] = Field(default_factory=list, description="Text fragments with <em>highlighted</em> matches")
model_config = {
"json_schema_extra": {
"examples": [{
"hadith": {
"id": "abc-123",
"collection": "Sahih Muslim",
"hadith_number": 1599,
"grade": "Sahih",
"arabic_text": "..."
},
"score": 12.45,
"highlights": ["...عن <em>الصلاة</em> في المسجد الحرام..."]
}]
}
}

0
app/routers/__init__.py Normal file
View File

137
app/routers/chains.py Normal file
View File

@ -0,0 +1,137 @@
"""
Isnad chain endpoints chain visualization data for hadith detail views.
"""
from fastapi import APIRouter, Query, HTTPException
from app.services.database import db
from app.models.schemas import IsnadChain, IsnadNode, IsnadLink
router = APIRouter(prefix="/chains", tags=["Isnad Chains"])
@router.get("/hadith/{hadith_id}", response_model=IsnadChain,
summary="Get isnad chain for a hadith",
description="Returns the complete isnad (chain of narration) as a graph structure "
"with nodes (narrators) and links (transmission relationships). "
"Ready for visualization with D3.js, vis.js, Cytoscape.js, or any graph library. "
"Each node includes narrator metadata (generation, reliability); "
"each link includes the transmission verb (حدثنا، عن، أخبرنا).")
async def get_isnad_chain(hadith_id: str):
"""
Get the full isnad chain for a hadith as a graph (nodes + links)
ready for visualization (D3.js, vis.js, etc.).
"""
# Get hadith info
hadith = db.neo4j_query_one("""
MATCH (h:Hadith {id: $hid})
RETURN h.id AS id, h.collection AS collection, h.hadith_number AS hadith_number
""", {"hid": hadith_id})
if not hadith:
raise HTTPException(status_code=404, detail="Hadith not found in graph")
# Get chain nodes
nodes = db.neo4j_query("""
MATCH (n:Narrator)-[r:APPEARS_IN]->(h:Hadith {id: $hid})
RETURN n.name_arabic AS name_arabic,
n.name_transliterated AS name_transliterated,
n.entity_type AS entity_type,
n.generation AS generation,
n.reliability_grade AS reliability_grade,
r.chain_order AS chain_order
ORDER BY r.chain_order
""", {"hid": hadith_id})
# Get chain links (NARRATED_FROM within this hadith's narrators)
links = db.neo4j_query("""
MATCH (a:Narrator)-[r1:APPEARS_IN]->(h:Hadith {id: $hid})
MATCH (b:Narrator)-[r2:APPEARS_IN]->(h)
MATCH (a)-[nf:NARRATED_FROM]->(b)
WHERE $hid IN nf.hadith_ids
RETURN a.name_arabic AS source,
b.name_arabic AS target,
nf.transmission_verb AS transmission_verb
""", {"hid": hadith_id})
# If no NARRATED_FROM edges with hadith_id, fall back to chain order
if not links and len(nodes) > 1:
sorted_nodes = sorted(nodes, key=lambda n: n.get("chain_order") or 999)
links = []
for i in range(len(sorted_nodes) - 1):
links.append({
"source": sorted_nodes[i]["name_arabic"],
"target": sorted_nodes[i + 1]["name_arabic"],
"transmission_verb": None,
})
return IsnadChain(
hadith_id=str(hadith["id"]),
collection=hadith["collection"] or "",
hadith_number=hadith["hadith_number"] or 0,
nodes=[IsnadNode(**n) for n in nodes],
links=[IsnadLink(**l) for l in links],
)
@router.get("/narrator/{name_arabic}", response_model=list[IsnadChain],
summary="Get all chains for a narrator",
description="Returns all isnad chains that include a specific narrator. "
"Useful for visualizing how a narrator connects to the Prophet ﷺ "
"through different transmission paths. "
"Example: `/chains/narrator/الزهري`")
async def get_narrator_chains(
name_arabic: str,
limit: int = Query(10, ge=1, le=50, description="Maximum chains to return"),
):
"""
Get all isnad chains that include a specific narrator.
Useful for seeing how a narrator connects to the Prophet .
"""
hadith_ids = db.neo4j_query("""
MATCH (n:Narrator {name_arabic: $name})-[:APPEARS_IN]->(h:Hadith)
RETURN h.id AS id
LIMIT $limit
""", {"name": name_arabic, "limit": limit})
chains = []
for row in hadith_ids:
chain = await get_isnad_chain(str(row["id"]))
chains.append(chain)
return chains
@router.get("/common-chains", response_model=list[dict],
summary="Find shared chains between two narrators",
description="Find hadiths where both narrators appear in the same isnad chain. "
"Useful for verifying narrator relationships and finding corroborating chains. "
"Example: `/chains/common-chains?narrator_a=الزهري&narrator_b=أنس بن مالك`")
async def find_common_chains(
narrator_a: str = Query(
..., description="First narrator (Arabic). Example: الزهري",
examples=["الزهري"],
),
narrator_b: str = Query(
..., description="Second narrator (Arabic). Example: أنس بن مالك",
examples=["أنس بن مالك"],
),
limit: int = Query(10, ge=1, le=50, description="Maximum results"),
):
"""
Find hadiths where both narrators appear in the same chain.
Useful for verifying narrator relationships.
"""
rows = db.neo4j_query("""
MATCH (a:Narrator)-[:APPEARS_IN]->(h:Hadith)<-[:APPEARS_IN]-(b:Narrator)
WHERE a.name_arabic CONTAINS $name_a
AND b.name_arabic CONTAINS $name_b
AND a <> b
RETURN h.id AS hadith_id,
h.collection AS collection,
h.hadith_number AS hadith_number,
a.name_arabic AS narrator_a,
b.name_arabic AS narrator_b
LIMIT $limit
""", {"name_a": narrator_a, "name_b": narrator_b, "limit": limit})
return [dict(r) for r in rows]

245
app/routers/hadiths.py Normal file
View File

@ -0,0 +1,245 @@
"""
Hadith endpoints details, listing, search by keyword/narrator/topic/place.
"""
from fastapi import APIRouter, Query, HTTPException
from typing import Optional
from app.services.database import db
from app.models.schemas import (
HadithDetail, HadithSummary, NarratorInChain, TopicTag,
PaginatedResponse, PaginationMeta,
)
router = APIRouter(prefix="/hadiths", tags=["Hadiths"])
@router.get("/{hadith_id}", response_model=HadithDetail,
summary="Get hadith by ID",
description="Retrieve full hadith details including Arabic text, sanad/matn separation, "
"ordered narrator chain from the knowledge graph, and topic tags.")
async def get_hadith(hadith_id: str):
"""Get full hadith details by ID, including narrator chain and topics from Neo4j."""
# Base hadith from PostgreSQL
hadith = db.pg_query_one("""
SELECT h.id, c.name_english AS collection, h.hadith_number,
h.grade, h.arabic_text, h.sanad, h.matn
FROM hadiths h
JOIN collections c ON c.id = h.collection_id
WHERE h.id = %s
""", (hadith_id,))
if not hadith:
raise HTTPException(status_code=404, detail="Hadith not found")
# Enrich with chain + topics from Neo4j
chain = db.neo4j_query("""
MATCH (n:Narrator)-[r:APPEARS_IN]->(h:Hadith {id: $hid})
RETURN n.name_arabic AS name_arabic,
n.name_transliterated AS name_transliterated,
n.entity_type AS entity_type,
r.chain_order AS order,
r.transmission_verb AS transmission_verb
ORDER BY r.chain_order
""", {"hid": hadith_id})
topics = db.neo4j_query("""
MATCH (h:Hadith {id: $hid})-[:HAS_TOPIC]->(t:Topic)
RETURN t.topic_arabic AS topic_arabic,
t.topic_english AS topic_english,
t.category AS category
""", {"hid": hadith_id})
return HadithDetail(
id=str(hadith["id"]),
collection=hadith["collection"],
hadith_number=hadith["hadith_number"],
grade=hadith["grade"],
arabic_text=hadith["arabic_text"],
sanad_text=hadith.get("sanad"),
matn_text=hadith.get("matn"),
narrator_chain=[NarratorInChain(**c) for c in chain],
topics=[TopicTag(**t) for t in topics],
)
@router.get("/collection/{collection_name}", response_model=PaginatedResponse,
summary="List hadiths by collection",
description="Paginated listing of hadiths in a specific collection. "
"Collection names use partial matching (e.g. 'bukhari' matches 'Sahih Bukhari').")
async def list_by_collection(
collection_name: str = Field(description="Collection name (partial match). Examples: bukhari, muslim, tirmidhi, abudawud"),
page: int = Query(1, ge=1, description="Page number"),
per_page: int = Query(20, ge=1, le=100, description="Results per page"),
):
"""List hadiths in a collection with pagination."""
offset = (page - 1) * per_page
total_row = db.pg_query_one("""
SELECT COUNT(*) AS total
FROM hadiths h
JOIN collections c ON c.id = h.collection_id
WHERE c.name_english ILIKE %s
""", (f"%{collection_name}%",))
total = total_row["total"] if total_row else 0
rows = db.pg_query("""
SELECT h.id, c.name_english AS collection, h.hadith_number,
h.grade, LEFT(h.arabic_text, 300) AS arabic_text
FROM hadiths h
JOIN collections c ON c.id = h.collection_id
WHERE c.name_english ILIKE %s
ORDER BY h.hadith_number
LIMIT %s OFFSET %s
""", (f"%{collection_name}%", per_page, offset))
return PaginatedResponse(
meta=PaginationMeta(
total=total, page=page, per_page=per_page,
pages=(total + per_page - 1) // per_page,
),
data=[HadithSummary(
id=str(r["id"]), collection=r["collection"],
hadith_number=r["hadith_number"], grade=r["grade"],
arabic_text=r["arabic_text"],
) for r in rows],
)
@router.get("/number/{collection_name}/{number}", response_model=HadithDetail)
async def get_by_number(collection_name: str, number: int):
"""Get a hadith by collection name and number."""
hadith = db.pg_query_one("""
SELECT h.id
FROM hadiths h
JOIN collections c ON c.id = h.collection_id
WHERE c.name_english ILIKE %s AND h.hadith_number = %s
""", (f"%{collection_name}%", number))
if not hadith:
raise HTTPException(status_code=404, detail=f"Hadith #{number} not found in {collection_name}")
return await get_hadith(str(hadith["id"]))
@router.get("/search/keyword", response_model=PaginatedResponse,
summary="Search hadiths by Arabic keyword",
description="Full-text keyword search across all hadith Arabic text. "
"Supports both vocalized (مَكَّةَ) and unvocalized (مكة) Arabic.")
async def search_by_keyword(
q: str = Query(
..., min_length=2,
description="Arabic keyword to search. Examples: صلاة (prayer), زكاة (zakat), صيام (fasting), حج (hajj), نية (intention)",
examples=["صلاة", "الجنة", "رمضان"],
),
collection: Optional[str] = Query(
None,
description="Filter by collection name. Examples: Sahih Bukhari, Sahih Muslim, Sunan Abu Dawood",
examples=["Sahih Bukhari"],
),
grade: Optional[str] = Query(
None,
description="Filter by hadith grade. Examples: Sahih, Hasan, Da'if",
examples=["Sahih"],
),
page: int = Query(1, ge=1, description="Page number (1-indexed)"),
per_page: int = Query(20, ge=1, le=100, description="Results per page (max 100)"),
):
"""Search hadiths by Arabic keyword in text."""
offset = (page - 1) * per_page
conditions = ["h.arabic_text ILIKE %s"]
params = [f"%{q}%"]
if collection:
conditions.append("c.name_english ILIKE %s")
params.append(f"%{collection}%")
if grade:
conditions.append("h.grade ILIKE %s")
params.append(f"%{grade}%")
where = " AND ".join(conditions)
total_row = db.pg_query_one(f"""
SELECT COUNT(*) AS total
FROM hadiths h
JOIN collections c ON c.id = h.collection_id
WHERE {where}
""", tuple(params))
total = total_row["total"] if total_row else 0
params.extend([per_page, offset])
rows = db.pg_query(f"""
SELECT h.id, c.name_english AS collection, h.hadith_number,
h.grade, LEFT(h.arabic_text, 300) AS arabic_text
FROM hadiths h
JOIN collections c ON c.id = h.collection_id
WHERE {where}
ORDER BY c.name_english, h.hadith_number
LIMIT %s OFFSET %s
""", tuple(params))
return PaginatedResponse(
meta=PaginationMeta(
total=total, page=page, per_page=per_page,
pages=(total + per_page - 1) // per_page,
),
data=[HadithSummary(
id=str(r["id"]), collection=r["collection"],
hadith_number=r["hadith_number"], grade=r["grade"],
arabic_text=r["arabic_text"],
) for r in rows],
)
@router.get("/search/topic/{topic}", response_model=list[HadithSummary])
async def search_by_topic(topic: str, limit: int = Query(20, ge=1, le=100)):
"""Search hadiths by topic tag (from Neo4j)."""
rows = db.neo4j_query("""
CALL db.index.fulltext.queryNodes('hadith_arabic_text', $topic)
YIELD node, score
RETURN node.id AS id,
node.collection AS collection,
node.hadith_number AS hadith_number,
node.grade AS grade,
left(node.matn_text, 300) AS matn_text,
score
ORDER BY score DESC
LIMIT $limit
""", {"topic": topic, "limit": limit})
return [HadithSummary(
id=str(r["id"]), collection=r["collection"] or "",
hadith_number=r["hadith_number"] or 0, grade=r["grade"],
matn_text=r["matn_text"],
) for r in rows]
@router.get("/search/narrator/{narrator_name}", response_model=list[HadithSummary],
summary="Find hadiths by narrator",
description="Find all hadiths where a specific narrator appears in the chain. "
"Searches both Arabic name and transliteration. "
"Example: `/hadiths/search/narrator/أبو هريرة`")
async def search_by_narrator(
narrator_name: str,
limit: int = Query(50, ge=1, le=200, description="Maximum results"),
):
"""Find all hadiths narrated by a specific person."""
rows = db.neo4j_query("""
MATCH (n:Narrator)-[r:APPEARS_IN]->(h:Hadith)
WHERE n.name_arabic CONTAINS $name
OR n.name_transliterated CONTAINS $name
RETURN h.id AS id,
h.collection AS collection,
h.hadith_number AS hadith_number,
h.grade AS grade,
left(h.matn_text, 300) AS matn_text
ORDER BY h.collection, h.hadith_number
LIMIT $limit
""", {"name": narrator_name, "limit": limit})
return [HadithSummary(
id=str(r["id"]), collection=r["collection"] or "",
hadith_number=r["hadith_number"] or 0, grade=r["grade"],
matn_text=r["matn_text"],
) for r in rows]

317
app/routers/narrators.py Normal file
View File

@ -0,0 +1,317 @@
"""
Narrator endpoints profiles, teacher/student network, relationships, who met who.
"""
from fastapi import APIRouter, Query, HTTPException
from typing import Optional
from app.services.database import db
from app.models.schemas import (
NarratorProfile, NarratorSummary, HadithSummary,
NarratorInteraction, PlaceRelation,
PaginatedResponse, PaginationMeta,
)
router = APIRouter(prefix="/narrators", tags=["Narrators"])
@router.get("/search", response_model=list[NarratorSummary],
summary="Search narrators by name",
description="Full-text search across narrator names in both Arabic and Latin transliteration. "
"Uses Neo4j full-text index for fast matching.")
async def search_narrators(
q: str = Query(
..., min_length=2,
description="Narrator name in Arabic or transliteration. Examples: أبو هريرة, الزهري, Anas, Bukhari",
examples=["أبو هريرة", "الزهري", "Anas ibn Malik"],
),
limit: int = Query(20, ge=1, le=100, description="Maximum results to return"),
):
"""Search narrators by name (Arabic or transliterated)."""
rows = db.neo4j_query("""
CALL db.index.fulltext.queryNodes('narrator_names', $query)
YIELD node, score
WITH node AS n, score
OPTIONAL MATCH (n)-[:APPEARS_IN]->(h:Hadith)
RETURN n.name_arabic AS name_arabic,
n.name_transliterated AS name_transliterated,
n.entity_type AS entity_type,
n.generation AS generation,
n.reliability_grade AS reliability_grade,
count(h) AS hadith_count,
score
ORDER BY score DESC
LIMIT $limit
""", {"query": q, "limit": limit})
return [NarratorSummary(**r) for r in rows]
@router.get("/profile/{name_arabic}", response_model=NarratorProfile,
summary="Get full narrator profile",
description="Complete narrator profile for the mobile app. Includes biography from classical "
"scholarship (Tahdhib al-Kamal, Taqrib al-Tahdhib), teacher/student network, "
"hadiths narrated, places, and tribal affiliations. "
"Example: `/narrators/profile/أبو هريرة`")
async def get_narrator_profile(name_arabic: str):
"""
Full narrator profile biography, hadiths, teachers, students,
places, tribes. Powers the mobile app profile page.
"""
# Basic info
narrator = db.neo4j_query_one("""
MATCH (n:Narrator {name_arabic: $name})
RETURN n.name_arabic AS name_arabic,
n.name_transliterated AS name_transliterated,
n.entity_type AS entity_type,
n.full_nasab AS full_nasab,
n.kunya AS kunya,
n.nisba AS nisba,
n.laqab AS laqab,
n.generation AS generation,
n.reliability_grade AS reliability_grade,
n.reliability_detail AS reliability_detail,
n.birth_year_hijri AS birth_year_hijri,
n.death_year_hijri AS death_year_hijri,
n.birth_year_ce AS birth_year_ce,
n.death_year_ce AS death_year_ce,
n.biography_summary_arabic AS biography_summary_arabic,
n.biography_summary_english AS biography_summary_english,
n.total_hadiths_narrated_approx AS total_hadiths_narrated_approx,
n.bio_verified AS bio_verified
""", {"name": name_arabic})
if not narrator:
raise HTTPException(status_code=404, detail="Narrator not found")
# Hadiths
hadiths = db.neo4j_query("""
MATCH (n:Narrator {name_arabic: $name})-[:APPEARS_IN]->(h:Hadith)
RETURN h.id AS id,
h.collection AS collection,
h.hadith_number AS hadith_number,
h.grade AS grade,
left(h.matn_text, 200) AS matn_text
ORDER BY h.collection, h.hadith_number
LIMIT 50
""", {"name": name_arabic})
# Teachers (who taught this narrator)
teachers = db.neo4j_query("""
MATCH (teacher:Narrator)-[:TEACHER_OF]->(n:Narrator {name_arabic: $name})
OPTIONAL MATCH (teacher)-[:APPEARS_IN]->(h:Hadith)
RETURN teacher.name_arabic AS name_arabic,
teacher.name_transliterated AS name_transliterated,
teacher.entity_type AS entity_type,
teacher.generation AS generation,
teacher.reliability_grade AS reliability_grade,
count(h) AS hadith_count
""", {"name": name_arabic})
# Students (who this narrator taught)
students = db.neo4j_query("""
MATCH (n:Narrator {name_arabic: $name})-[:TEACHER_OF]->(student:Narrator)
OPTIONAL MATCH (student)-[:APPEARS_IN]->(h:Hadith)
RETURN student.name_arabic AS name_arabic,
student.name_transliterated AS name_transliterated,
student.entity_type AS entity_type,
student.generation AS generation,
student.reliability_grade AS reliability_grade,
count(h) AS hadith_count
""", {"name": name_arabic})
# Places
places = db.neo4j_query("""
MATCH (n:Narrator {name_arabic: $name})-[r:BORN_IN|LIVED_IN|DIED_IN|TRAVELED_TO]->(p:Place)
RETURN p.name_arabic AS place, type(r) AS relation
""", {"name": name_arabic})
# Tribes
tribes_rows = db.neo4j_query("""
MATCH (n:Narrator {name_arabic: $name})-[:BELONGS_TO_TRIBE]->(t:Tribe)
RETURN t.name_arabic AS tribe
""", {"name": name_arabic})
return NarratorProfile(
**narrator,
hadith_count=len(hadiths),
hadiths=[HadithSummary(
id=str(h["id"]), collection=h["collection"] or "",
hadith_number=h["hadith_number"] or 0, grade=h["grade"],
matn_text=h["matn_text"],
) for h in hadiths],
teachers=[NarratorSummary(**t) for t in teachers],
students=[NarratorSummary(**s) for s in students],
places=[PlaceRelation(**p) for p in places],
tribes=[t["tribe"] for t in tribes_rows],
)
@router.get("/by-generation/{generation}", response_model=list[NarratorSummary])
async def narrators_by_generation(
generation: str,
limit: int = Query(50, ge=1, le=200),
):
"""List narrators by generation (صحابي, تابعي, etc.)."""
rows = db.neo4j_query("""
MATCH (n:Narrator)
WHERE n.generation CONTAINS $gen
OPTIONAL MATCH (n)-[:APPEARS_IN]->(h:Hadith)
RETURN n.name_arabic AS name_arabic,
n.name_transliterated AS name_transliterated,
n.entity_type AS entity_type,
n.generation AS generation,
n.reliability_grade AS reliability_grade,
count(h) AS hadith_count
ORDER BY hadith_count DESC
LIMIT $limit
""", {"gen": generation, "limit": limit})
return [NarratorSummary(**r) for r in rows]
@router.get("/by-place/{place_name}", response_model=list[NarratorSummary])
async def narrators_by_place(
place_name: str,
limit: int = Query(50, ge=1, le=200),
):
"""Find narrators associated with a place."""
rows = db.neo4j_query("""
MATCH (n:Narrator)-[:BORN_IN|LIVED_IN|DIED_IN|TRAVELED_TO]->(p:Place)
WHERE p.name_arabic CONTAINS $place
OPTIONAL MATCH (n)-[:APPEARS_IN]->(h:Hadith)
RETURN DISTINCT n.name_arabic AS name_arabic,
n.name_transliterated AS name_transliterated,
n.entity_type AS entity_type,
n.generation AS generation,
n.reliability_grade AS reliability_grade,
count(h) AS hadith_count
ORDER BY hadith_count DESC
LIMIT $limit
""", {"place": place_name, "limit": limit})
return [NarratorSummary(**r) for r in rows]
@router.get("/interactions/{name_arabic}", response_model=list[NarratorInteraction],
summary="Get all narrator interactions",
description="Lists all relationships for a narrator: who they narrated from, "
"who narrated from them, their teachers, and their students. "
"Each interaction includes shared hadith count. "
"Example: `/narrators/interactions/الزهري`")
async def get_interactions(
name_arabic: str,
limit: int = Query(50, ge=1, le=200, description="Maximum interactions to return"),
):
"""
Get all interactions of a narrator who they narrated from,
who narrated from them, teachers, students.
"""
rows = db.neo4j_query("""
MATCH (n:Narrator {name_arabic: $name})
OPTIONAL MATCH (n)-[r1:NARRATED_FROM]->(other1:Narrator)
WITH n, collect(DISTINCT {
narrator_b: other1.name_arabic,
narrator_b_trans: other1.name_transliterated,
type: 'NARRATED_FROM',
hadith_ids: r1.hadith_ids
}) AS outgoing
OPTIONAL MATCH (other2:Narrator)-[r2:NARRATED_FROM]->(n)
WITH n, outgoing, collect(DISTINCT {
narrator_b: other2.name_arabic,
narrator_b_trans: other2.name_transliterated,
type: 'HEARD_BY',
hadith_ids: r2.hadith_ids
}) AS incoming
OPTIONAL MATCH (teacher:Narrator)-[r3:TEACHER_OF]->(n)
WITH n, outgoing, incoming, collect(DISTINCT {
narrator_b: teacher.name_arabic,
narrator_b_trans: teacher.name_transliterated,
type: 'TEACHER_OF',
hadith_ids: []
}) AS teacher_rels
OPTIONAL MATCH (n)-[r4:TEACHER_OF]->(student:Narrator)
WITH n, outgoing, incoming, teacher_rels, collect(DISTINCT {
narrator_b: student.name_arabic,
narrator_b_trans: student.name_transliterated,
type: 'STUDENT_OF',
hadith_ids: []
}) AS student_rels
RETURN n.name_arabic AS narrator_a,
n.name_transliterated AS narrator_a_trans,
outgoing + incoming + teacher_rels + student_rels AS interactions
""", {"name": name_arabic})
if not rows:
raise HTTPException(status_code=404, detail="Narrator not found")
result = []
row = rows[0]
for interaction in row["interactions"]:
if not interaction.get("narrator_b"):
continue
hadith_ids = interaction.get("hadith_ids") or []
result.append(NarratorInteraction(
narrator_a=row["narrator_a"],
narrator_a_transliterated=row.get("narrator_a_trans") or "",
narrator_b=interaction["narrator_b"],
narrator_b_transliterated=interaction.get("narrator_b_trans") or "",
relationship_type=interaction["type"],
shared_hadith_count=len(hadith_ids),
hadith_ids=[str(h) for h in hadith_ids[:20]],
))
return result[:limit]
@router.get("/who-met-who", response_model=list[NarratorInteraction],
summary="Check if two narrators are connected",
description="Finds the shortest path between two narrators in the knowledge graph. "
"Reveals whether they had a direct or indirect relationship through "
"narration chains, teacher/student bonds, or shared connections. "
"Example: `/narrators/who-met-who?narrator_a=الزهري&narrator_b=أنس بن مالك`")
async def who_met_who(
narrator_a: str = Query(
..., description="First narrator name (Arabic). Example: الزهري",
examples=["الزهري", "أبو هريرة"],
),
narrator_b: str = Query(
..., description="Second narrator name (Arabic). Example: أنس بن مالك",
examples=["أنس بن مالك", "عمر بن الخطاب"],
),
):
"""
Check if two narrators had a relationship did they meet,
narrate from each other, or share a teacher/student bond?
"""
rows = db.neo4j_query("""
MATCH (a:Narrator), (b:Narrator)
WHERE a.name_arabic CONTAINS $name_a
AND b.name_arabic CONTAINS $name_b
OPTIONAL MATCH path = shortestPath((a)-[*..6]-(b))
WITH a, b, path,
[r IN relationships(path) | {
type: type(r),
from: startNode(r).name_arabic,
from_trans: startNode(r).name_transliterated,
to: endNode(r).name_arabic,
to_trans: endNode(r).name_transliterated
}] AS rels
RETURN a.name_arabic AS narrator_a,
a.name_transliterated AS narrator_a_trans,
b.name_arabic AS narrator_b,
b.name_transliterated AS narrator_b_trans,
length(path) AS distance,
rels
""", {"name_a": narrator_a, "name_b": narrator_b})
if not rows or rows[0].get("distance") is None:
return []
row = rows[0]
return [NarratorInteraction(
narrator_a=rel["from"],
narrator_a_transliterated=rel.get("from_trans") or "",
narrator_b=rel["to"],
narrator_b_transliterated=rel.get("to_trans") or "",
relationship_type=rel["type"],
) for rel in (row.get("rels") or [])]

207
app/routers/search.py Normal file
View File

@ -0,0 +1,207 @@
"""
Search endpoints semantic search (Qdrant + TEI) and full-text Arabic (Elasticsearch).
"""
from fastapi import APIRouter, Query, HTTPException
from typing import Optional
from app.services.database import db
from app.config import get_settings
from app.models.schemas import SemanticSearchResult, FullTextSearchResult, HadithSummary
router = APIRouter(prefix="/search", tags=["Search"])
async def get_embedding(text: str) -> list[float]:
"""Get embedding vector from TEI (BGE-M3)."""
settings = get_settings()
response = await db.http_client.post(
f"{settings.tei_url}/embed",
json={"inputs": text},
)
if response.status_code != 200:
raise HTTPException(status_code=502, detail=f"TEI embedding failed: {response.text}")
embeddings = response.json()
# TEI returns list of embeddings; we sent one input
if isinstance(embeddings, list) and len(embeddings) > 0:
if isinstance(embeddings[0], list):
return embeddings[0]
return embeddings
raise HTTPException(status_code=502, detail="Unexpected TEI response format")
@router.get("/semantic", response_model=list[SemanticSearchResult],
summary="Semantic search (find by meaning)",
description="Search hadiths by meaning using BGE-M3 multilingual embeddings + Qdrant. "
"Supports cross-language queries: search in English and find Arabic hadiths, or vice versa. "
"Example: `what did the prophet say about fasting` → finds Arabic hadiths about صيام")
async def semantic_search(
q: str = Query(
..., min_length=2,
description="Search query in any language. The embedding model handles Arabic, English, and Urdu.",
examples=["what is the reward of prayer", "أحاديث عن الصيام", "حكم الربا"],
),
collection: Optional[str] = Query(
None,
description="Filter by collection name. Example: Sahih Bukhari",
),
limit: int = Query(10, ge=1, le=50, description="Number of results (max 50)"),
):
"""
Semantic search find hadiths by meaning, not just keywords.
Supports Arabic, English, and cross-language queries.
Uses BGE-M3 embeddings + Qdrant vector search.
"""
settings = get_settings()
# Get query embedding from TEI
query_vector = await get_embedding(q)
# Build Qdrant filter if collection specified
query_filter = None
if collection:
from qdrant_client.models import Filter, FieldCondition, MatchValue
query_filter = Filter(
must=[FieldCondition(key="collection", match=MatchValue(value=collection))]
)
# Search Qdrant
results = db.qdrant.search(
collection_name=settings.qdrant_collection,
query_vector=query_vector,
limit=limit,
query_filter=query_filter,
with_payload=True,
)
output = []
for hit in results:
payload = hit.payload or {}
output.append(SemanticSearchResult(
hadith=HadithSummary(
id=str(payload.get("id", hit.id)),
collection=payload.get("collection", ""),
hadith_number=payload.get("hadith_number", 0),
grade=payload.get("grade"),
arabic_text=(payload.get("arabic_text") or "")[:300],
),
score=round(hit.score, 4),
collection=payload.get("collection", ""),
))
return output
@router.get("/fulltext", response_model=list[FullTextSearchResult],
summary="Full-text Arabic search",
description="Keyword search using Elasticsearch with Arabic morphological analysis (stemming, root extraction). "
"Returns highlighted text fragments showing where matches occurred. "
"Handles both vocalized (الصَّلاة) and unvocalized (الصلاة) Arabic.")
async def fulltext_search(
q: str = Query(
..., min_length=2,
description="Arabic text search query. Examples: الصلاة (prayer), النكاح (marriage), الجهاد (jihad)",
examples=["الصلاة", "صيام رمضان", "بيع وشراء"],
),
collection: Optional[str] = Query(
None,
description="Filter by collection. Example: Sahih Muslim",
),
limit: int = Query(10, ge=1, le=50, description="Number of results (max 50)"),
):
"""
Full-text Arabic search using Elasticsearch.
Supports Arabic morphological analysis.
"""
settings = get_settings()
# Build ES query
must = [
{
"multi_match": {
"query": q,
"fields": ["arabic_text^3", "arabic_normalized^2", "matn", "sanad"],
"type": "best_fields",
"analyzer": "arabic",
}
}
]
if collection:
must.append({"match": {"collection_name": collection}})
body = {
"query": {"bool": {"must": must}},
"highlight": {
"fields": {
"arabic_text": {"fragment_size": 200, "number_of_fragments": 2},
"matn": {"fragment_size": 200, "number_of_fragments": 1},
}
},
"size": limit,
}
try:
response = db.es.search(index=settings.es_index, body=body)
except Exception as e:
# ES index might not exist yet
raise HTTPException(status_code=503, detail=f"Elasticsearch error: {str(e)}")
output = []
for hit in response["hits"]["hits"]:
src = hit["_source"]
highlights = []
if "highlight" in hit:
for field_highlights in hit["highlight"].values():
highlights.extend(field_highlights)
output.append(FullTextSearchResult(
hadith=HadithSummary(
id=str(src.get("id", hit["_id"])),
collection=src.get("collection_name", ""),
hadith_number=src.get("hadith_number", 0),
grade=src.get("grade"),
arabic_text=(src.get("arabic_text") or "")[:300],
),
score=round(hit["_score"], 4),
highlights=highlights,
))
return output
@router.get("/combined", response_model=dict,
summary="Combined search (semantic + full-text)",
description="Runs both semantic and full-text search in parallel and returns merged results. "
"Best for the mobile app search bar — gives both meaning-based and keyword-based results. "
"Returns `{semantic: [...], fulltext: [...], query: '...'}`")
async def combined_search(
q: str = Query(
..., min_length=2,
description="Search query. Works with Arabic keywords or natural language in any language.",
examples=["الصلاة في وقتها", "hadith about charity"],
),
collection: Optional[str] = Query(None, description="Filter by collection name"),
limit: int = Query(10, ge=1, le=20, description="Results per search type (max 20)"),
):
"""
Combined search runs both semantic and full-text in parallel,
returns merged results. Best for the mobile app search bar.
"""
import asyncio
semantic_task = semantic_search(q=q, collection=collection, limit=limit)
# Full-text only makes sense for Arabic queries
fulltext_task = fulltext_search(q=q, collection=collection, limit=limit)
semantic_results, fulltext_results = await asyncio.gather(
semantic_task,
fulltext_task,
return_exceptions=True,
)
return {
"semantic": semantic_results if not isinstance(semantic_results, Exception) else [],
"fulltext": fulltext_results if not isinstance(fulltext_results, Exception) else [],
"query": q,
}

0
app/services/__init__.py Normal file
View File

113
app/services/database.py Normal file
View File

@ -0,0 +1,113 @@
"""
Database connection manager initializes and provides access to
PostgreSQL, Neo4j, Qdrant, and Elasticsearch clients.
"""
import psycopg2
import psycopg2.pool
import psycopg2.extras
from neo4j import GraphDatabase
from qdrant_client import QdrantClient
from elasticsearch import Elasticsearch
import httpx
from app.config import get_settings
class Database:
"""Singleton holding all DB connections."""
def __init__(self):
self.pg_pool = None
self.neo4j_driver = None
self.qdrant = None
self.es = None
self.http_client = None # for TEI embeddings
async def connect(self):
settings = get_settings()
# PostgreSQL connection pool
self.pg_pool = psycopg2.pool.ThreadedConnectionPool(
minconn=2,
maxconn=10,
host=settings.pg_host,
port=settings.pg_port,
dbname=settings.pg_dbname,
user=settings.pg_user,
password=settings.pg_password,
sslmode=settings.pg_sslmode,
)
print(f"✅ PostgreSQL pool created ({settings.pg_host})")
# Neo4j
self.neo4j_driver = GraphDatabase.driver(
settings.neo4j_uri,
auth=(settings.neo4j_user, settings.neo4j_password),
)
self.neo4j_driver.verify_connectivity()
print(f"✅ Neo4j connected ({settings.neo4j_uri})")
# Qdrant
self.qdrant = QdrantClient(
host=settings.qdrant_host,
port=settings.qdrant_port,
)
collections = self.qdrant.get_collections()
print(f"✅ Qdrant connected ({settings.qdrant_host}, {len(collections.collections)} collections)")
# Elasticsearch
self.es = Elasticsearch(settings.es_host)
if self.es.ping():
print(f"✅ Elasticsearch connected ({settings.es_host})")
else:
print(f"⚠️ Elasticsearch ping failed ({settings.es_host})")
# HTTP client for TEI embedding requests
self.http_client = httpx.AsyncClient(timeout=30.0)
print(f"✅ HTTP client ready (TEI: {settings.tei_url})")
async def disconnect(self):
if self.pg_pool:
self.pg_pool.closeall()
if self.neo4j_driver:
self.neo4j_driver.close()
if self.http_client:
await self.http_client.aclose()
print("🔌 All connections closed")
# ── PostgreSQL helpers ──
def get_pg(self):
conn = self.pg_pool.getconn()
try:
yield conn
finally:
self.pg_pool.putconn(conn)
def pg_query(self, query: str, params: tuple = None) -> list[dict]:
conn = self.pg_pool.getconn()
try:
with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
cur.execute(query, params)
return [dict(row) for row in cur.fetchall()]
finally:
self.pg_pool.putconn(conn)
def pg_query_one(self, query: str, params: tuple = None) -> dict | None:
rows = self.pg_query(query, params)
return rows[0] if rows else None
# ── Neo4j helpers ──
def neo4j_query(self, query: str, params: dict = None) -> list[dict]:
with self.neo4j_driver.session() as session:
result = session.run(query, params or {})
return [dict(record) for record in result]
def neo4j_query_one(self, query: str, params: dict = None) -> dict | None:
rows = self.neo4j_query(query, params)
return rows[0] if rows else None
# Global instance
db = Database()

141
k8s/deployment.yaml Normal file
View File

@ -0,0 +1,141 @@
apiVersion: v1
kind: Namespace
metadata:
name: api
---
apiVersion: v1
kind: Secret
metadata:
name: hadith-api-secrets
namespace: api
type: Opaque
stringData:
PG_DBNAME: "REPLACE_ME"
PG_USER: "REPLACE_ME"
PG_PASSWORD: "REPLACE_ME"
NEO4J_PASSWORD: "NEO4J-PASS"
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: hadith-api
namespace: api
labels:
app: hadith-api
spec:
replicas: 2
selector:
matchLabels:
app: hadith-api
template:
metadata:
labels:
app: hadith-api
spec:
containers:
- name: hadith-api
image: registry.betelgeusebytes.io/hadith-api:latest # adjust to your registry
ports:
- containerPort: 8000
env:
- name: HADITH_PG_HOST
value: "pg.betelgeusebytes.io"
- name: HADITH_PG_PORT
value: "5432"
- name: HADITH_PG_DBNAME
valueFrom:
secretKeyRef:
name: hadith-api-secrets
key: PG_DBNAME
- name: HADITH_PG_USER
valueFrom:
secretKeyRef:
name: hadith-api-secrets
key: PG_USER
- name: HADITH_PG_PASSWORD
valueFrom:
secretKeyRef:
name: hadith-api-secrets
key: PG_PASSWORD
- name: HADITH_PG_SSLMODE
value: "require"
- name: HADITH_NEO4J_URI
value: "neo4j+ssc://neo4j.betelgeusebytes.io:7687"
- name: HADITH_NEO4J_USER
value: "neo4j"
- name: HADITH_NEO4J_PASSWORD
valueFrom:
secretKeyRef:
name: hadith-api-secrets
key: NEO4J_PASSWORD
- name: HADITH_QDRANT_HOST
value: "qdrant.vector.svc.cluster.local"
- name: HADITH_QDRANT_PORT
value: "6333"
- name: HADITH_QDRANT_COLLECTION
value: "hadiths"
- name: HADITH_ES_HOST
value: "http://elasticsearch.elastic.svc.cluster.local:9200"
- name: HADITH_ES_INDEX
value: "hadiths"
- name: HADITH_TEI_URL
value: "http://tei.ml.svc.cluster.local:80"
resources:
requests:
cpu: "250m"
memory: "256Mi"
limits:
cpu: "1000m"
memory: "512Mi"
livenessProbe:
httpGet:
path: /health
port: 8000
initialDelaySeconds: 10
periodSeconds: 30
readinessProbe:
httpGet:
path: /health
port: 8000
initialDelaySeconds: 5
periodSeconds: 10
---
apiVersion: v1
kind: Service
metadata:
name: hadith-api
namespace: api
spec:
selector:
app: hadith-api
ports:
- port: 80
targetPort: 8000
protocol: TCP
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: hadith-api-ingress
namespace: api
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
nginx.ingress.kubernetes.io/proxy-body-size: "10m"
nginx.ingress.kubernetes.io/enable-cors: "true"
spec:
ingressClassName: nginx
tls:
- hosts:
- api.betelgeusebytes.io
secretName: hadith-api-tls
rules:
- host: api.betelgeusebytes.io
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: hadith-api
port:
number: 80

10
requirements.txt Normal file
View File

@ -0,0 +1,10 @@
fastapi==0.115.0
uvicorn[standard]==0.30.0
psycopg2-binary==2.9.9
neo4j==5.25.0
qdrant-client==1.12.0
elasticsearch==8.14.0
pydantic==2.9.0
pydantic-settings==2.5.0
httpx==0.27.0
python-dotenv==1.0.1