commit 9d51393c86248fa72a818381eac592f77db3b493 Author: salah Date: Thu Feb 26 22:17:58 2026 +0100 feat: Implement Hadith and Narrator endpoints with search functionality - Added Hadith endpoints for retrieving details, listing by collection, and searching by keyword, topic, and narrator. - Introduced Narrator endpoints for searching narrators, retrieving profiles, and exploring interactions. - Created search endpoints for semantic and full-text search capabilities using Qdrant and Elasticsearch. - Established a database connection manager for PostgreSQL, Neo4j, Qdrant, and Elasticsearch. - Configured Kubernetes deployment with necessary secrets and environment variables for the API. - Updated requirements.txt with necessary dependencies for the application. diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..300a9df --- /dev/null +++ b/.env.example @@ -0,0 +1,22 @@ +# Hadith Scholar API — Environment Variables +# Copy to .env and fill in values + +HADITH_PG_HOST=pg.betelgeusebytes.io +HADITH_PG_PORT=5432 +HADITH_PG_DBNAME=REPLACE_ME +HADITH_PG_USER=REPLACE_ME +HADITH_PG_PASSWORD=REPLACE_ME +HADITH_PG_SSLMODE=require + +HADITH_NEO4J_URI=neo4j+ssc://neo4j.betelgeusebytes.io:7687 +HADITH_NEO4J_USER=neo4j +HADITH_NEO4J_PASSWORD=NEO4J-PASS + +HADITH_QDRANT_HOST=qdrant.vector.svc.cluster.local +HADITH_QDRANT_PORT=6333 +HADITH_QDRANT_COLLECTION=hadiths + +HADITH_ES_HOST=http://elasticsearch.elastic.svc.cluster.local:9200 +HADITH_ES_INDEX=hadiths + +HADITH_TEI_URL=http://tei.ml.svc.cluster.local:80 diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..b1666de --- /dev/null +++ b/Dockerfile @@ -0,0 +1,17 @@ +FROM python:3.12-slim + +WORKDIR /app + +# Install system deps for psycopg2 +RUN apt-get update && apt-get install -y --no-install-recommends \ + libpq-dev gcc && \ + rm -rf /var/lib/apt/lists/* + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY . . + +EXPOSE 8000 + +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/config.py b/app/config.py new file mode 100644 index 0000000..9006aee --- /dev/null +++ b/app/config.py @@ -0,0 +1,42 @@ +from pydantic_settings import BaseSettings +from functools import lru_cache + + +class Settings(BaseSettings): + app_name: str = "Hadith Scholar API" + app_version: str = "0.1.0" + debug: bool = False + + # PostgreSQL + pg_host: str = "pg.betelgeusebytes.io" + pg_port: int = 5432 + pg_dbname: str = "REPLACE_ME" + pg_user: str = "REPLACE_ME" + pg_password: str = "REPLACE_ME" + pg_sslmode: str = "require" + + # Neo4j + neo4j_uri: str = "neo4j+ssc://neo4j.betelgeusebytes.io:7687" + neo4j_user: str = "neo4j" + neo4j_password: str = "NEO4J-PASS" + + # Qdrant + qdrant_host: str = "qdrant.vector.svc.cluster.local" + qdrant_port: int = 6333 + qdrant_collection: str = "hadiths" + + # Elasticsearch + es_host: str = "http://elasticsearch.elastic.svc.cluster.local:9200" + es_index: str = "hadiths" + + # TEI (embeddings) + tei_url: str = "http://tei.ml.svc.cluster.local:80" + + class Config: + env_file = ".env" + env_prefix = "HADITH_" + + +@lru_cache() +def get_settings() -> Settings: + return Settings() diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000..f17d2f7 --- /dev/null +++ b/app/main.py @@ -0,0 +1,223 @@ +""" +Hadith Scholar API — FastAPI application. + +Endpoints: + /hadiths — hadith details, search by keyword/narrator/topic/collection + /narrators — narrator profiles, interactions, who-met-who + /chains — isnad chain visualization data + /search — semantic (Qdrant) + full-text Arabic (Elasticsearch) +""" +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware +from contextlib import asynccontextmanager + +from app.config import get_settings +from app.services.database import db +from app.routers import hadiths, narrators, chains, search + + +@asynccontextmanager +async def lifespan(app: FastAPI): + """Connect all databases on startup, disconnect on shutdown.""" + await db.connect() + yield + await db.disconnect() + + +settings = get_settings() + +TAGS_METADATA = [ + { + "name": "Hadiths", + "description": ( + "Access and search hadith texts from 8+ major collections " + "(Sahih Bukhari, Sahih Muslim, Sunan Abu Dawood, Jami` at-Tirmidhi, " + "Sunan an-Nasa'i, Sunan Ibn Majah, Musnad Ahmad, and more). " + "Retrieve full hadith details including narrator chains, topic tags, " + "and sanad/matn separation. Search by Arabic keyword, narrator name, " + "topic, or collection." + ), + }, + { + "name": "Narrators", + "description": ( + "Narrator (رجال الحديث) profiles and relationships. " + "Each narrator includes biographical data from classical scholarship " + "(Tahdhib al-Kamal, Taqrib al-Tahdhib, etc.): full nasab, kunya, nisba, " + "generation (طبقة), reliability grading (جرح وتعديل), birth/death dates, " + "teachers, students, places, and tribal affiliations. " + "Use the who-met-who endpoint to explore narrator connections." + ), + }, + { + "name": "Isnad Chains", + "description": ( + "Isnad (chain of narration / إسناد) visualization data. " + "Returns graph-ready structures (nodes + links) for rendering " + "narrator chains using D3.js, vis.js, Cytoscape, or any graph library. " + "Each node includes narrator metadata; each link includes the " + "transmission verb (حدثنا، أخبرنا، عن، سمعت)." + ), + }, + { + "name": "Search", + "description": ( + "Multi-modal hadith search. **Semantic search** uses BGE-M3 multilingual " + "embeddings + Qdrant to find hadiths by meaning (supports Arabic, English, " + "and cross-language queries). **Full-text search** uses Elasticsearch with " + "Arabic morphological analysis for exact and fuzzy keyword matching. " + "**Combined search** runs both in parallel for the best results." + ), + }, + { + "name": "Root", + "description": "Health checks, statistics, and API metadata.", + }, +] + +app = FastAPI( + title=settings.app_name, + version=settings.app_version, + description=""" +# Hadith Scholar API — حَدِيثٌ + +Production-grade REST API for analyzing Islamic hadith literature across 8+ major collections. + +## Core Capabilities + +| Feature | Backend | Endpoint | +|---------|---------|----------| +| Hadith lookup & keyword search | PostgreSQL | `GET /hadiths/*` | +| Narrator profiles & biography | Neo4j | `GET /narrators/*` | +| Isnad chain visualization | Neo4j | `GET /chains/*` | +| Semantic search (by meaning) | Qdrant + BGE-M3 | `GET /search/semantic` | +| Full-text Arabic search | Elasticsearch | `GET /search/fulltext` | +| Narrator relationships | Neo4j | `GET /narrators/who-met-who` | + +## Data Sources +- **~41,000 hadiths** from Sahih Bukhari, Sahih Muslim, Sunan Abu Dawood, Jami` at-Tirmidhi, Sunan an-Nasa'i, Sunan Ibn Majah, Musnad Ahmad, Muwatta Malik, and more +- **Narrator knowledge graph** with biographies, teacher/student networks, places, tribes +- **1024-dim multilingual embeddings** (BGE-M3) for semantic search across Arabic/English/Urdu + +## Authentication +Currently open (no auth required). API keys will be added in a future version. + +## Arabic Text +All Arabic text preserves original diacritics (تشكيل). Search endpoints accept both vocalized and unvocalized Arabic. + +## Example Queries +- Search for hadiths about prayer: `GET /hadiths/search/keyword?q=صلاة` +- Find narrator profile: `GET /narrators/profile/أبو هريرة` +- Semantic search: `GET /search/semantic?q=what did the prophet say about fasting` +- Who met who: `GET /narrators/who-met-who?narrator_a=الزهري&narrator_b=أنس بن مالك` + """, + docs_url="/docs", + redoc_url="/redoc", + openapi_url="/openapi.json", + openapi_tags=TAGS_METADATA, + lifespan=lifespan, + license_info={ + "name": "MIT", + }, + contact={ + "name": "Hadith Scholar API", + "url": "https://betelgeusebytes.io", + }, +) + +# CORS — allow all for development; tighten for production +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Register routers +app.include_router(hadiths.router) +app.include_router(narrators.router) +app.include_router(chains.router) +app.include_router(search.router) + + +@app.get("/", tags=["Root"]) +async def root(): + return { + "name": settings.app_name, + "version": settings.app_version, + "docs": "/docs", + "endpoints": { + "hadiths": "/hadiths", + "narrators": "/narrators", + "chains": "/chains", + "search": "/search", + }, + } + + +@app.get("/health", tags=["Root"]) +async def health(): + """Health check — verifies all service connections.""" + status = {"status": "ok", "services": {}} + + # PostgreSQL + try: + db.pg_query_one("SELECT 1 AS ok") + status["services"]["postgresql"] = "ok" + except Exception as e: + status["services"]["postgresql"] = f"error: {e}" + status["status"] = "degraded" + + # Neo4j + try: + db.neo4j_query_one("RETURN 1 AS ok") + status["services"]["neo4j"] = "ok" + except Exception as e: + status["services"]["neo4j"] = f"error: {e}" + status["status"] = "degraded" + + # Qdrant + try: + db.qdrant.get_collections() + status["services"]["qdrant"] = "ok" + except Exception as e: + status["services"]["qdrant"] = f"error: {e}" + status["status"] = "degraded" + + # Elasticsearch + try: + if db.es.ping(): + status["services"]["elasticsearch"] = "ok" + else: + status["services"]["elasticsearch"] = "unreachable" + status["status"] = "degraded" + except Exception as e: + status["services"]["elasticsearch"] = f"error: {e}" + status["status"] = "degraded" + + return status + + +@app.get("/stats", tags=["Root"]) +async def stats(): + """Database statistics.""" + pg_stats = db.pg_query_one(""" + SELECT + (SELECT COUNT(*) FROM hadiths) AS total_hadiths, + (SELECT COUNT(*) FROM collections) AS total_collections + """) + + neo4j_stats = db.neo4j_query_one(""" + MATCH (h:Hadith) WITH count(h) AS hadiths + MATCH (n:Narrator) WITH hadiths, count(n) AS narrators + MATCH (p:Place) WITH hadiths, narrators, count(p) AS places + MATCH (t:Tribe) WITH hadiths, narrators, places, count(t) AS tribes + MATCH ()-[r]->() WITH hadiths, narrators, places, tribes, count(r) AS relationships + RETURN hadiths, narrators, places, tribes, relationships + """) + + return { + "postgresql": pg_stats, + "neo4j_graph": neo4j_stats, + } diff --git a/app/models/__init__.py b/app/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/models/schemas.py b/app/models/schemas.py new file mode 100644 index 0000000..3b74fa6 --- /dev/null +++ b/app/models/schemas.py @@ -0,0 +1,352 @@ +from pydantic import BaseModel, Field +from typing import Optional + + +# ── Common ───────────────────────────────────────────────────────────────── + +class PaginationMeta(BaseModel): + total: int + page: int + per_page: int + pages: int + + model_config = { + "json_schema_extra": { + "examples": [{"total": 6986, "page": 1, "per_page": 20, "pages": 350}] + } + } + + +class PaginatedResponse(BaseModel): + meta: PaginationMeta + data: list + + +# ── Hadith ───────────────────────────────────────────────────────────────── + +class HadithSummary(BaseModel): + id: str = Field(description="Unique hadith UUID") + collection: str = Field(description="Collection name in English") + hadith_number: int = Field(description="Hadith number within collection") + grade: Optional[str] = Field(None, description="Grading: Sahih, Hasan, Da'if, etc.") + arabic_text: Optional[str] = Field(None, description="Full Arabic text (may be truncated in list views)") + matn_text: Optional[str] = Field(None, description="Body text only (without isnad)") + sanad_text: Optional[str] = Field(None, description="Chain of narration text only") + + model_config = { + "json_schema_extra": { + "examples": [{ + "id": "dcf8df41-3185-4e20-a9af-db3696a48c79", + "collection": "Sahih Bukhari", + "hadith_number": 1, + "grade": "Sahih", + "arabic_text": "حَدَّثَنَا الْحُمَيْدِيُّ عَبْدُ اللَّهِ بْنُ الزُّبَيْرِ قَالَ حَدَّثَنَا سُفْيَانُ...", + "matn_text": "إِنَّمَا الأَعْمَالُ بِالنِّيَّاتِ وَإِنَّمَا لِكُلِّ امْرِئٍ مَا نَوَى...", + "sanad_text": "حَدَّثَنَا الْحُمَيْدِيُّ عَبْدُ اللَّهِ بْنُ الزُّبَيْرِ قَالَ حَدَّثَنَا سُفْيَانُ قَالَ حَدَّثَنَا يَحْيَى بْنُ سَعِيدٍ الأَنْصَارِيُّ" + }] + } + } + + +class TopicTag(BaseModel): + topic_arabic: str = Field(description="Topic name in Arabic, e.g. الصلاة") + topic_english: str = Field(description="Topic name in English, e.g. Prayer") + category: str = Field(description="Broad Islamic category: عقيدة، فقه، سيرة، أخلاق، تفسير") + + model_config = { + "json_schema_extra": { + "examples": [{ + "topic_arabic": "النية", + "topic_english": "Intention", + "category": "فقه" + }] + } + } + + +class NarratorInChain(BaseModel): + order: int = Field(description="Position in chain: 1=closest to compiler, last=closest to Prophet ﷺ") + name_arabic: str = Field(description="Narrator's Arabic name as it appears in the hadith text") + name_transliterated: str = Field("", description="Latin transliteration of the name") + entity_type: str = Field("", description="PERSON, KUNYA (أبو/أم), NISBA (attributional), or TITLE (رسول الله)") + transmission_verb: Optional[str] = Field(None, description="Exact Arabic transmission verb: حدثنا، أخبرنا، عن، سمعت") + + model_config = { + "json_schema_extra": { + "examples": [{ + "order": 1, + "name_arabic": "الْحُمَيْدِيُّ", + "name_transliterated": "al-Humaydi", + "entity_type": "NISBA", + "transmission_verb": "حَدَّثَنَا" + }] + } + } + + +class HadithDetail(BaseModel): + id: str = Field(description="Unique hadith UUID") + collection: str = Field(description="Collection English name") + hadith_number: int = Field(description="Number within collection") + grade: Optional[str] = Field(None, description="Hadith grade") + arabic_text: Optional[str] = Field(None, description="Complete Arabic text") + sanad_text: Optional[str] = Field(None, description="Isnad (chain) text only") + matn_text: Optional[str] = Field(None, description="Matn (body) text only") + narrator_chain: list[NarratorInChain] = Field(default_factory=list, description="Ordered narrator chain from Neo4j graph") + topics: list[TopicTag] = Field(default_factory=list, description="Topic tags for searchability") + + model_config = { + "json_schema_extra": { + "examples": [{ + "id": "dcf8df41-3185-4e20-a9af-db3696a48c79", + "collection": "Sahih Bukhari", + "hadith_number": 1, + "grade": "Sahih", + "arabic_text": "حَدَّثَنَا الْحُمَيْدِيُّ عَبْدُ اللَّهِ بْنُ الزُّبَيْرِ...", + "sanad_text": "حَدَّثَنَا الْحُمَيْدِيُّ...", + "matn_text": "إِنَّمَا الأَعْمَالُ بِالنِّيَّاتِ...", + "narrator_chain": [ + {"order": 1, "name_arabic": "الْحُمَيْدِيُّ", "name_transliterated": "al-Humaydi", "entity_type": "NISBA", "transmission_verb": "حَدَّثَنَا"}, + {"order": 2, "name_arabic": "سُفْيَانُ", "name_transliterated": "Sufyan", "entity_type": "PERSON", "transmission_verb": "حَدَّثَنَا"}, + ], + "topics": [ + {"topic_arabic": "النية", "topic_english": "Intention", "category": "فقه"}, + ] + }] + } + } + + +# ── Narrator ─────────────────────────────────────────────────────────────── + +class NarratorSummary(BaseModel): + name_arabic: str = Field(description="Primary Arabic name") + name_transliterated: str = Field("", description="Latin transliteration") + entity_type: str = Field("", description="PERSON, KUNYA, NISBA, TITLE") + generation: Optional[str] = Field(None, description="طبقة: صحابي، تابعي، تابع التابعين") + reliability_grade: Optional[str] = Field(None, description="جرح وتعديل: ثقة، صدوق، ضعيف، متروك") + hadith_count: int = Field(0, description="Number of hadiths this narrator appears in") + + model_config = { + "json_schema_extra": { + "examples": [{ + "name_arabic": "أَبُو هُرَيْرَةَ", + "name_transliterated": "Abu Hurayrah", + "entity_type": "KUNYA", + "generation": "صحابي", + "reliability_grade": "ثقة", + "hadith_count": 5374 + }] + } + } + + +class NameForm(BaseModel): + name: str = Field(description="Alternative name form") + type: str = Field(description="Name type: PERSON, KUNYA, NISBA, TITLE") + + +class FamilyInfo(BaseModel): + father: Optional[str] = None + mother: Optional[str] = None + spouse: Optional[str] = None + children: list[str] = Field(default_factory=list) + + +class PlaceRelation(BaseModel): + place: str = Field(description="Place name in Arabic") + relation: str = Field(description="BORN_IN, LIVED_IN, DIED_IN, or TRAVELED_TO") + + model_config = { + "json_schema_extra": { + "examples": [{"place": "المدينة", "relation": "LIVED_IN"}] + } + } + + +class NarratorProfile(BaseModel): + name_arabic: str = Field(description="Primary Arabic name") + name_transliterated: str = Field("", description="Latin transliteration") + entity_type: str = Field("", description="PERSON, KUNYA, NISBA, TITLE") + full_nasab: Optional[str] = Field(None, description="Full lineage: فلان بن فلان بن فلان") + kunya: Optional[str] = Field(None, description="أبو/أم name (e.g. أبو هريرة)") + nisba: Optional[str] = Field(None, description="Attributional name (e.g. البخاري، المدني، الزهري)") + laqab: Optional[str] = Field(None, description="Title or epithet (e.g. أمير المؤمنين في الحديث)") + generation: Optional[str] = Field(None, description="طبقة: صحابي، تابعي، تابع التابعين، أتباع تابع التابعين") + reliability_grade: Optional[str] = Field(None, description="جرح وتعديل: ثقة، ثقة حافظ، صدوق، ضعيف، متروك") + reliability_detail: Optional[str] = Field(None, description="Extended grading explanation from scholars") + birth_year_hijri: Optional[int] = Field(None, description="Birth year (Hijri calendar)") + death_year_hijri: Optional[int] = Field(None, description="Death year (Hijri calendar)") + birth_year_ce: Optional[int] = Field(None, description="Birth year (CE)") + death_year_ce: Optional[int] = Field(None, description="Death year (CE)") + biography_summary_arabic: Optional[str] = Field(None, description="2-3 sentence biography in Arabic") + biography_summary_english: Optional[str] = Field(None, description="2-3 sentence biography in English") + total_hadiths_narrated_approx: Optional[int] = Field(None, description="Approximate total hadiths narrated across all collections") + hadith_count: int = Field(0, description="Hadiths in current database") + hadiths: list[HadithSummary] = Field(default_factory=list, description="Sample hadiths narrated (max 50)") + teachers: list[NarratorSummary] = Field(default_factory=list, description="Known teachers / شيوخ") + students: list[NarratorSummary] = Field(default_factory=list, description="Known students / تلاميذ") + places: list[PlaceRelation] = Field(default_factory=list, description="Associated places (born, lived, died, traveled)") + tribes: list[str] = Field(default_factory=list, description="Tribal affiliations (e.g. قريش، دوس، الأنصار)") + bio_verified: bool = Field(False, description="Whether biography has been manually verified against classical sources") + + model_config = { + "json_schema_extra": { + "examples": [{ + "name_arabic": "أَبُو هُرَيْرَةَ", + "name_transliterated": "Abu Hurayrah", + "entity_type": "KUNYA", + "full_nasab": "عبد الرحمن بن صخر الدوسي", + "kunya": "أبو هريرة", + "nisba": "الدوسي", + "laqab": None, + "generation": "صحابي", + "reliability_grade": "ثقة", + "reliability_detail": "صحابي جليل، أكثر الصحابة رواية للحديث", + "birth_year_hijri": None, + "death_year_hijri": 57, + "birth_year_ce": None, + "death_year_ce": 676, + "biography_summary_arabic": "أبو هريرة الدوسي، صحابي جليل، أكثر الصحابة رواية للحديث النبوي. أسلم عام خيبر ولازم النبي ﷺ.", + "biography_summary_english": "Abu Hurayrah al-Dawsi, a prominent Companion and the most prolific narrator of hadith. He accepted Islam during Khaybar and remained close to the Prophet ﷺ.", + "total_hadiths_narrated_approx": 5374, + "hadith_count": 142, + "hadiths": [], + "teachers": [{"name_arabic": "رسول الله ﷺ", "name_transliterated": "Prophet Muhammad", "entity_type": "TITLE", "generation": None, "reliability_grade": None, "hadith_count": 0}], + "students": [{"name_arabic": "الزهري", "name_transliterated": "al-Zuhri", "entity_type": "NISBA", "generation": "تابعي", "reliability_grade": "ثقة", "hadith_count": 89}], + "places": [{"place": "المدينة", "relation": "LIVED_IN"}], + "tribes": ["دوس"], + "bio_verified": False, + }] + } + } + + +# ── Isnad Chain ──────────────────────────────────────────────────────────── + +class IsnadNode(BaseModel): + name_arabic: str = Field(description="Narrator Arabic name") + name_transliterated: str = Field("", description="Latin transliteration") + entity_type: str = Field("", description="PERSON, KUNYA, NISBA, TITLE") + generation: Optional[str] = Field(None, description="طبقة") + reliability_grade: Optional[str] = Field(None, description="جرح وتعديل grade") + + +class IsnadLink(BaseModel): + source: str = Field(description="name_arabic of narrator who received the hadith") + target: str = Field(description="name_arabic of narrator they received it from") + transmission_verb: Optional[str] = Field(None, description="Exact verb: حدثنا، أخبرنا، عن، سمعت، أنبأنا") + + +class IsnadChain(BaseModel): + hadith_id: str = Field(description="UUID of the hadith") + collection: str = Field(description="Collection name") + hadith_number: int = Field(description="Hadith number") + nodes: list[IsnadNode] = Field(default_factory=list, description="Narrator nodes for graph visualization") + links: list[IsnadLink] = Field(default_factory=list, description="Directed edges: source heard from target") + + model_config = { + "json_schema_extra": { + "examples": [{ + "hadith_id": "dcf8df41-3185-4e20-a9af-db3696a48c79", + "collection": "Sahih Bukhari", + "hadith_number": 1, + "nodes": [ + {"name_arabic": "الْحُمَيْدِيُّ", "name_transliterated": "al-Humaydi", "entity_type": "NISBA", "generation": "تابع التابعين", "reliability_grade": "ثقة"}, + {"name_arabic": "سُفْيَانُ بْنُ عُيَيْنَةَ", "name_transliterated": "Sufyan ibn Uyaynah", "entity_type": "PERSON", "generation": "تابع التابعين", "reliability_grade": "ثقة"}, + {"name_arabic": "يَحْيَى بْنُ سَعِيدٍ", "name_transliterated": "Yahya ibn Sa'id al-Ansari", "entity_type": "PERSON", "generation": "تابعي", "reliability_grade": "ثقة"}, + {"name_arabic": "عُمَرُ بْنُ الْخَطَّابِ", "name_transliterated": "Umar ibn al-Khattab", "entity_type": "PERSON", "generation": "صحابي", "reliability_grade": "ثقة"}, + ], + "links": [ + {"source": "الْحُمَيْدِيُّ", "target": "سُفْيَانُ بْنُ عُيَيْنَةَ", "transmission_verb": "حَدَّثَنَا"}, + {"source": "سُفْيَانُ بْنُ عُيَيْنَةَ", "target": "يَحْيَى بْنُ سَعِيدٍ", "transmission_verb": "حَدَّثَنَا"}, + {"source": "يَحْيَى بْنُ سَعِيدٍ", "target": "عُمَرُ بْنُ الْخَطَّابِ", "transmission_verb": "عن"}, + ] + }] + } + } + + +# ── Relationships / Who Met Who ──────────────────────────────────────────── + +class NarratorInteraction(BaseModel): + narrator_a: str = Field(description="First narrator Arabic name") + narrator_a_transliterated: str = Field("", description="First narrator transliteration") + narrator_b: str = Field(description="Second narrator Arabic name") + narrator_b_transliterated: str = Field("", description="Second narrator transliteration") + relationship_type: str = Field(description="NARRATED_FROM, TEACHER_OF, HEARD_BY, STUDENT_OF") + shared_hadith_count: int = Field(0, description="Number of hadiths connecting them") + hadith_ids: list[str] = Field(default_factory=list, description="IDs of connecting hadiths (max 20)") + + model_config = { + "json_schema_extra": { + "examples": [{ + "narrator_a": "الزهري", + "narrator_a_transliterated": "al-Zuhri", + "narrator_b": "أنس بن مالك", + "narrator_b_transliterated": "Anas ibn Malik", + "relationship_type": "NARRATED_FROM", + "shared_hadith_count": 23, + "hadith_ids": ["abc-123", "def-456"] + }] + } + } + + +class NarratorConnection(BaseModel): + narrator: str = Field(description="Connected narrator Arabic name") + narrator_transliterated: str = Field("", description="Transliteration") + connection_type: str = Field(description="Relationship type") + direction: str = Field(description="'incoming' (they → this) or 'outgoing' (this → them)") + + +class NarratorNetwork(BaseModel): + center: NarratorSummary + connections: list[NarratorConnection] = Field(default_factory=list) + total_connections: int = 0 + + +# ── Search ───────────────────────────────────────────────────────────────── + +class SemanticSearchResult(BaseModel): + hadith: HadithSummary = Field(description="Matching hadith") + score: float = Field(description="Cosine similarity score (0-1, higher = more relevant)") + collection: str = Field("", description="Collection name") + + model_config = { + "json_schema_extra": { + "examples": [{ + "hadith": { + "id": "abc-123", + "collection": "Sahih Bukhari", + "hadith_number": 1, + "grade": "Sahih", + "arabic_text": "إِنَّمَا الأَعْمَالُ بِالنِّيَّاتِ..." + }, + "score": 0.9234, + "collection": "Sahih Bukhari" + }] + } + } + + +class FullTextSearchResult(BaseModel): + hadith: HadithSummary = Field(description="Matching hadith") + score: float = Field(description="Elasticsearch relevance score (higher = more relevant)") + highlights: list[str] = Field(default_factory=list, description="Text fragments with highlighted matches") + + model_config = { + "json_schema_extra": { + "examples": [{ + "hadith": { + "id": "abc-123", + "collection": "Sahih Muslim", + "hadith_number": 1599, + "grade": "Sahih", + "arabic_text": "..." + }, + "score": 12.45, + "highlights": ["...عن الصلاة في المسجد الحرام..."] + }] + } + } diff --git a/app/routers/__init__.py b/app/routers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/routers/chains.py b/app/routers/chains.py new file mode 100644 index 0000000..7d37d46 --- /dev/null +++ b/app/routers/chains.py @@ -0,0 +1,137 @@ +""" +Isnad chain endpoints — chain visualization data for hadith detail views. +""" +from fastapi import APIRouter, Query, HTTPException + +from app.services.database import db +from app.models.schemas import IsnadChain, IsnadNode, IsnadLink + +router = APIRouter(prefix="/chains", tags=["Isnad Chains"]) + + +@router.get("/hadith/{hadith_id}", response_model=IsnadChain, + summary="Get isnad chain for a hadith", + description="Returns the complete isnad (chain of narration) as a graph structure " + "with nodes (narrators) and links (transmission relationships). " + "Ready for visualization with D3.js, vis.js, Cytoscape.js, or any graph library. " + "Each node includes narrator metadata (generation, reliability); " + "each link includes the transmission verb (حدثنا، عن، أخبرنا).") +async def get_isnad_chain(hadith_id: str): + """ + Get the full isnad chain for a hadith as a graph (nodes + links) + ready for visualization (D3.js, vis.js, etc.). + """ + # Get hadith info + hadith = db.neo4j_query_one(""" + MATCH (h:Hadith {id: $hid}) + RETURN h.id AS id, h.collection AS collection, h.hadith_number AS hadith_number + """, {"hid": hadith_id}) + + if not hadith: + raise HTTPException(status_code=404, detail="Hadith not found in graph") + + # Get chain nodes + nodes = db.neo4j_query(""" + MATCH (n:Narrator)-[r:APPEARS_IN]->(h:Hadith {id: $hid}) + RETURN n.name_arabic AS name_arabic, + n.name_transliterated AS name_transliterated, + n.entity_type AS entity_type, + n.generation AS generation, + n.reliability_grade AS reliability_grade, + r.chain_order AS chain_order + ORDER BY r.chain_order + """, {"hid": hadith_id}) + + # Get chain links (NARRATED_FROM within this hadith's narrators) + links = db.neo4j_query(""" + MATCH (a:Narrator)-[r1:APPEARS_IN]->(h:Hadith {id: $hid}) + MATCH (b:Narrator)-[r2:APPEARS_IN]->(h) + MATCH (a)-[nf:NARRATED_FROM]->(b) + WHERE $hid IN nf.hadith_ids + RETURN a.name_arabic AS source, + b.name_arabic AS target, + nf.transmission_verb AS transmission_verb + """, {"hid": hadith_id}) + + # If no NARRATED_FROM edges with hadith_id, fall back to chain order + if not links and len(nodes) > 1: + sorted_nodes = sorted(nodes, key=lambda n: n.get("chain_order") or 999) + links = [] + for i in range(len(sorted_nodes) - 1): + links.append({ + "source": sorted_nodes[i]["name_arabic"], + "target": sorted_nodes[i + 1]["name_arabic"], + "transmission_verb": None, + }) + + return IsnadChain( + hadith_id=str(hadith["id"]), + collection=hadith["collection"] or "", + hadith_number=hadith["hadith_number"] or 0, + nodes=[IsnadNode(**n) for n in nodes], + links=[IsnadLink(**l) for l in links], + ) + + +@router.get("/narrator/{name_arabic}", response_model=list[IsnadChain], + summary="Get all chains for a narrator", + description="Returns all isnad chains that include a specific narrator. " + "Useful for visualizing how a narrator connects to the Prophet ﷺ " + "through different transmission paths. " + "Example: `/chains/narrator/الزهري`") +async def get_narrator_chains( + name_arabic: str, + limit: int = Query(10, ge=1, le=50, description="Maximum chains to return"), +): + """ + Get all isnad chains that include a specific narrator. + Useful for seeing how a narrator connects to the Prophet ﷺ. + """ + hadith_ids = db.neo4j_query(""" + MATCH (n:Narrator {name_arabic: $name})-[:APPEARS_IN]->(h:Hadith) + RETURN h.id AS id + LIMIT $limit + """, {"name": name_arabic, "limit": limit}) + + chains = [] + for row in hadith_ids: + chain = await get_isnad_chain(str(row["id"])) + chains.append(chain) + + return chains + + +@router.get("/common-chains", response_model=list[dict], + summary="Find shared chains between two narrators", + description="Find hadiths where both narrators appear in the same isnad chain. " + "Useful for verifying narrator relationships and finding corroborating chains. " + "Example: `/chains/common-chains?narrator_a=الزهري&narrator_b=أنس بن مالك`") +async def find_common_chains( + narrator_a: str = Query( + ..., description="First narrator (Arabic). Example: الزهري", + examples=["الزهري"], + ), + narrator_b: str = Query( + ..., description="Second narrator (Arabic). Example: أنس بن مالك", + examples=["أنس بن مالك"], + ), + limit: int = Query(10, ge=1, le=50, description="Maximum results"), +): + """ + Find hadiths where both narrators appear in the same chain. + Useful for verifying narrator relationships. + """ + rows = db.neo4j_query(""" + MATCH (a:Narrator)-[:APPEARS_IN]->(h:Hadith)<-[:APPEARS_IN]-(b:Narrator) + WHERE a.name_arabic CONTAINS $name_a + AND b.name_arabic CONTAINS $name_b + AND a <> b + RETURN h.id AS hadith_id, + h.collection AS collection, + h.hadith_number AS hadith_number, + a.name_arabic AS narrator_a, + b.name_arabic AS narrator_b + LIMIT $limit + """, {"name_a": narrator_a, "name_b": narrator_b, "limit": limit}) + + return [dict(r) for r in rows] diff --git a/app/routers/hadiths.py b/app/routers/hadiths.py new file mode 100644 index 0000000..013210e --- /dev/null +++ b/app/routers/hadiths.py @@ -0,0 +1,245 @@ +""" +Hadith endpoints — details, listing, search by keyword/narrator/topic/place. +""" +from fastapi import APIRouter, Query, HTTPException +from typing import Optional + +from app.services.database import db +from app.models.schemas import ( + HadithDetail, HadithSummary, NarratorInChain, TopicTag, + PaginatedResponse, PaginationMeta, +) + +router = APIRouter(prefix="/hadiths", tags=["Hadiths"]) + + +@router.get("/{hadith_id}", response_model=HadithDetail, + summary="Get hadith by ID", + description="Retrieve full hadith details including Arabic text, sanad/matn separation, " + "ordered narrator chain from the knowledge graph, and topic tags.") +async def get_hadith(hadith_id: str): + """Get full hadith details by ID, including narrator chain and topics from Neo4j.""" + + # Base hadith from PostgreSQL + hadith = db.pg_query_one(""" + SELECT h.id, c.name_english AS collection, h.hadith_number, + h.grade, h.arabic_text, h.sanad, h.matn + FROM hadiths h + JOIN collections c ON c.id = h.collection_id + WHERE h.id = %s + """, (hadith_id,)) + + if not hadith: + raise HTTPException(status_code=404, detail="Hadith not found") + + # Enrich with chain + topics from Neo4j + chain = db.neo4j_query(""" + MATCH (n:Narrator)-[r:APPEARS_IN]->(h:Hadith {id: $hid}) + RETURN n.name_arabic AS name_arabic, + n.name_transliterated AS name_transliterated, + n.entity_type AS entity_type, + r.chain_order AS order, + r.transmission_verb AS transmission_verb + ORDER BY r.chain_order + """, {"hid": hadith_id}) + + topics = db.neo4j_query(""" + MATCH (h:Hadith {id: $hid})-[:HAS_TOPIC]->(t:Topic) + RETURN t.topic_arabic AS topic_arabic, + t.topic_english AS topic_english, + t.category AS category + """, {"hid": hadith_id}) + + return HadithDetail( + id=str(hadith["id"]), + collection=hadith["collection"], + hadith_number=hadith["hadith_number"], + grade=hadith["grade"], + arabic_text=hadith["arabic_text"], + sanad_text=hadith.get("sanad"), + matn_text=hadith.get("matn"), + narrator_chain=[NarratorInChain(**c) for c in chain], + topics=[TopicTag(**t) for t in topics], + ) + + +@router.get("/collection/{collection_name}", response_model=PaginatedResponse, + summary="List hadiths by collection", + description="Paginated listing of hadiths in a specific collection. " + "Collection names use partial matching (e.g. 'bukhari' matches 'Sahih Bukhari').") +async def list_by_collection( + collection_name: str = Field(description="Collection name (partial match). Examples: bukhari, muslim, tirmidhi, abudawud"), + page: int = Query(1, ge=1, description="Page number"), + per_page: int = Query(20, ge=1, le=100, description="Results per page"), +): + """List hadiths in a collection with pagination.""" + offset = (page - 1) * per_page + + total_row = db.pg_query_one(""" + SELECT COUNT(*) AS total + FROM hadiths h + JOIN collections c ON c.id = h.collection_id + WHERE c.name_english ILIKE %s + """, (f"%{collection_name}%",)) + total = total_row["total"] if total_row else 0 + + rows = db.pg_query(""" + SELECT h.id, c.name_english AS collection, h.hadith_number, + h.grade, LEFT(h.arabic_text, 300) AS arabic_text + FROM hadiths h + JOIN collections c ON c.id = h.collection_id + WHERE c.name_english ILIKE %s + ORDER BY h.hadith_number + LIMIT %s OFFSET %s + """, (f"%{collection_name}%", per_page, offset)) + + return PaginatedResponse( + meta=PaginationMeta( + total=total, page=page, per_page=per_page, + pages=(total + per_page - 1) // per_page, + ), + data=[HadithSummary( + id=str(r["id"]), collection=r["collection"], + hadith_number=r["hadith_number"], grade=r["grade"], + arabic_text=r["arabic_text"], + ) for r in rows], + ) + + +@router.get("/number/{collection_name}/{number}", response_model=HadithDetail) +async def get_by_number(collection_name: str, number: int): + """Get a hadith by collection name and number.""" + hadith = db.pg_query_one(""" + SELECT h.id + FROM hadiths h + JOIN collections c ON c.id = h.collection_id + WHERE c.name_english ILIKE %s AND h.hadith_number = %s + """, (f"%{collection_name}%", number)) + + if not hadith: + raise HTTPException(status_code=404, detail=f"Hadith #{number} not found in {collection_name}") + + return await get_hadith(str(hadith["id"])) + + +@router.get("/search/keyword", response_model=PaginatedResponse, + summary="Search hadiths by Arabic keyword", + description="Full-text keyword search across all hadith Arabic text. " + "Supports both vocalized (مَكَّةَ) and unvocalized (مكة) Arabic.") +async def search_by_keyword( + q: str = Query( + ..., min_length=2, + description="Arabic keyword to search. Examples: صلاة (prayer), زكاة (zakat), صيام (fasting), حج (hajj), نية (intention)", + examples=["صلاة", "الجنة", "رمضان"], + ), + collection: Optional[str] = Query( + None, + description="Filter by collection name. Examples: Sahih Bukhari, Sahih Muslim, Sunan Abu Dawood", + examples=["Sahih Bukhari"], + ), + grade: Optional[str] = Query( + None, + description="Filter by hadith grade. Examples: Sahih, Hasan, Da'if", + examples=["Sahih"], + ), + page: int = Query(1, ge=1, description="Page number (1-indexed)"), + per_page: int = Query(20, ge=1, le=100, description="Results per page (max 100)"), +): + """Search hadiths by Arabic keyword in text.""" + offset = (page - 1) * per_page + + conditions = ["h.arabic_text ILIKE %s"] + params = [f"%{q}%"] + + if collection: + conditions.append("c.name_english ILIKE %s") + params.append(f"%{collection}%") + if grade: + conditions.append("h.grade ILIKE %s") + params.append(f"%{grade}%") + + where = " AND ".join(conditions) + + total_row = db.pg_query_one(f""" + SELECT COUNT(*) AS total + FROM hadiths h + JOIN collections c ON c.id = h.collection_id + WHERE {where} + """, tuple(params)) + total = total_row["total"] if total_row else 0 + + params.extend([per_page, offset]) + rows = db.pg_query(f""" + SELECT h.id, c.name_english AS collection, h.hadith_number, + h.grade, LEFT(h.arabic_text, 300) AS arabic_text + FROM hadiths h + JOIN collections c ON c.id = h.collection_id + WHERE {where} + ORDER BY c.name_english, h.hadith_number + LIMIT %s OFFSET %s + """, tuple(params)) + + return PaginatedResponse( + meta=PaginationMeta( + total=total, page=page, per_page=per_page, + pages=(total + per_page - 1) // per_page, + ), + data=[HadithSummary( + id=str(r["id"]), collection=r["collection"], + hadith_number=r["hadith_number"], grade=r["grade"], + arabic_text=r["arabic_text"], + ) for r in rows], + ) + + +@router.get("/search/topic/{topic}", response_model=list[HadithSummary]) +async def search_by_topic(topic: str, limit: int = Query(20, ge=1, le=100)): + """Search hadiths by topic tag (from Neo4j).""" + rows = db.neo4j_query(""" + CALL db.index.fulltext.queryNodes('hadith_arabic_text', $topic) + YIELD node, score + RETURN node.id AS id, + node.collection AS collection, + node.hadith_number AS hadith_number, + node.grade AS grade, + left(node.matn_text, 300) AS matn_text, + score + ORDER BY score DESC + LIMIT $limit + """, {"topic": topic, "limit": limit}) + + return [HadithSummary( + id=str(r["id"]), collection=r["collection"] or "", + hadith_number=r["hadith_number"] or 0, grade=r["grade"], + matn_text=r["matn_text"], + ) for r in rows] + + +@router.get("/search/narrator/{narrator_name}", response_model=list[HadithSummary], + summary="Find hadiths by narrator", + description="Find all hadiths where a specific narrator appears in the chain. " + "Searches both Arabic name and transliteration. " + "Example: `/hadiths/search/narrator/أبو هريرة`") +async def search_by_narrator( + narrator_name: str, + limit: int = Query(50, ge=1, le=200, description="Maximum results"), +): + """Find all hadiths narrated by a specific person.""" + rows = db.neo4j_query(""" + MATCH (n:Narrator)-[r:APPEARS_IN]->(h:Hadith) + WHERE n.name_arabic CONTAINS $name + OR n.name_transliterated CONTAINS $name + RETURN h.id AS id, + h.collection AS collection, + h.hadith_number AS hadith_number, + h.grade AS grade, + left(h.matn_text, 300) AS matn_text + ORDER BY h.collection, h.hadith_number + LIMIT $limit + """, {"name": narrator_name, "limit": limit}) + + return [HadithSummary( + id=str(r["id"]), collection=r["collection"] or "", + hadith_number=r["hadith_number"] or 0, grade=r["grade"], + matn_text=r["matn_text"], + ) for r in rows] diff --git a/app/routers/narrators.py b/app/routers/narrators.py new file mode 100644 index 0000000..7a61dc4 --- /dev/null +++ b/app/routers/narrators.py @@ -0,0 +1,317 @@ +""" +Narrator endpoints — profiles, teacher/student network, relationships, who met who. +""" +from fastapi import APIRouter, Query, HTTPException +from typing import Optional + +from app.services.database import db +from app.models.schemas import ( + NarratorProfile, NarratorSummary, HadithSummary, + NarratorInteraction, PlaceRelation, + PaginatedResponse, PaginationMeta, +) + +router = APIRouter(prefix="/narrators", tags=["Narrators"]) + + +@router.get("/search", response_model=list[NarratorSummary], + summary="Search narrators by name", + description="Full-text search across narrator names in both Arabic and Latin transliteration. " + "Uses Neo4j full-text index for fast matching.") +async def search_narrators( + q: str = Query( + ..., min_length=2, + description="Narrator name in Arabic or transliteration. Examples: أبو هريرة, الزهري, Anas, Bukhari", + examples=["أبو هريرة", "الزهري", "Anas ibn Malik"], + ), + limit: int = Query(20, ge=1, le=100, description="Maximum results to return"), +): + """Search narrators by name (Arabic or transliterated).""" + rows = db.neo4j_query(""" + CALL db.index.fulltext.queryNodes('narrator_names', $query) + YIELD node, score + WITH node AS n, score + OPTIONAL MATCH (n)-[:APPEARS_IN]->(h:Hadith) + RETURN n.name_arabic AS name_arabic, + n.name_transliterated AS name_transliterated, + n.entity_type AS entity_type, + n.generation AS generation, + n.reliability_grade AS reliability_grade, + count(h) AS hadith_count, + score + ORDER BY score DESC + LIMIT $limit + """, {"query": q, "limit": limit}) + + return [NarratorSummary(**r) for r in rows] + + +@router.get("/profile/{name_arabic}", response_model=NarratorProfile, + summary="Get full narrator profile", + description="Complete narrator profile for the mobile app. Includes biography from classical " + "scholarship (Tahdhib al-Kamal, Taqrib al-Tahdhib), teacher/student network, " + "hadiths narrated, places, and tribal affiliations. " + "Example: `/narrators/profile/أبو هريرة`") +async def get_narrator_profile(name_arabic: str): + """ + Full narrator profile — biography, hadiths, teachers, students, + places, tribes. Powers the mobile app profile page. + """ + # Basic info + narrator = db.neo4j_query_one(""" + MATCH (n:Narrator {name_arabic: $name}) + RETURN n.name_arabic AS name_arabic, + n.name_transliterated AS name_transliterated, + n.entity_type AS entity_type, + n.full_nasab AS full_nasab, + n.kunya AS kunya, + n.nisba AS nisba, + n.laqab AS laqab, + n.generation AS generation, + n.reliability_grade AS reliability_grade, + n.reliability_detail AS reliability_detail, + n.birth_year_hijri AS birth_year_hijri, + n.death_year_hijri AS death_year_hijri, + n.birth_year_ce AS birth_year_ce, + n.death_year_ce AS death_year_ce, + n.biography_summary_arabic AS biography_summary_arabic, + n.biography_summary_english AS biography_summary_english, + n.total_hadiths_narrated_approx AS total_hadiths_narrated_approx, + n.bio_verified AS bio_verified + """, {"name": name_arabic}) + + if not narrator: + raise HTTPException(status_code=404, detail="Narrator not found") + + # Hadiths + hadiths = db.neo4j_query(""" + MATCH (n:Narrator {name_arabic: $name})-[:APPEARS_IN]->(h:Hadith) + RETURN h.id AS id, + h.collection AS collection, + h.hadith_number AS hadith_number, + h.grade AS grade, + left(h.matn_text, 200) AS matn_text + ORDER BY h.collection, h.hadith_number + LIMIT 50 + """, {"name": name_arabic}) + + # Teachers (who taught this narrator) + teachers = db.neo4j_query(""" + MATCH (teacher:Narrator)-[:TEACHER_OF]->(n:Narrator {name_arabic: $name}) + OPTIONAL MATCH (teacher)-[:APPEARS_IN]->(h:Hadith) + RETURN teacher.name_arabic AS name_arabic, + teacher.name_transliterated AS name_transliterated, + teacher.entity_type AS entity_type, + teacher.generation AS generation, + teacher.reliability_grade AS reliability_grade, + count(h) AS hadith_count + """, {"name": name_arabic}) + + # Students (who this narrator taught) + students = db.neo4j_query(""" + MATCH (n:Narrator {name_arabic: $name})-[:TEACHER_OF]->(student:Narrator) + OPTIONAL MATCH (student)-[:APPEARS_IN]->(h:Hadith) + RETURN student.name_arabic AS name_arabic, + student.name_transliterated AS name_transliterated, + student.entity_type AS entity_type, + student.generation AS generation, + student.reliability_grade AS reliability_grade, + count(h) AS hadith_count + """, {"name": name_arabic}) + + # Places + places = db.neo4j_query(""" + MATCH (n:Narrator {name_arabic: $name})-[r:BORN_IN|LIVED_IN|DIED_IN|TRAVELED_TO]->(p:Place) + RETURN p.name_arabic AS place, type(r) AS relation + """, {"name": name_arabic}) + + # Tribes + tribes_rows = db.neo4j_query(""" + MATCH (n:Narrator {name_arabic: $name})-[:BELONGS_TO_TRIBE]->(t:Tribe) + RETURN t.name_arabic AS tribe + """, {"name": name_arabic}) + + return NarratorProfile( + **narrator, + hadith_count=len(hadiths), + hadiths=[HadithSummary( + id=str(h["id"]), collection=h["collection"] or "", + hadith_number=h["hadith_number"] or 0, grade=h["grade"], + matn_text=h["matn_text"], + ) for h in hadiths], + teachers=[NarratorSummary(**t) for t in teachers], + students=[NarratorSummary(**s) for s in students], + places=[PlaceRelation(**p) for p in places], + tribes=[t["tribe"] for t in tribes_rows], + ) + + +@router.get("/by-generation/{generation}", response_model=list[NarratorSummary]) +async def narrators_by_generation( + generation: str, + limit: int = Query(50, ge=1, le=200), +): + """List narrators by generation (صحابي, تابعي, etc.).""" + rows = db.neo4j_query(""" + MATCH (n:Narrator) + WHERE n.generation CONTAINS $gen + OPTIONAL MATCH (n)-[:APPEARS_IN]->(h:Hadith) + RETURN n.name_arabic AS name_arabic, + n.name_transliterated AS name_transliterated, + n.entity_type AS entity_type, + n.generation AS generation, + n.reliability_grade AS reliability_grade, + count(h) AS hadith_count + ORDER BY hadith_count DESC + LIMIT $limit + """, {"gen": generation, "limit": limit}) + + return [NarratorSummary(**r) for r in rows] + + +@router.get("/by-place/{place_name}", response_model=list[NarratorSummary]) +async def narrators_by_place( + place_name: str, + limit: int = Query(50, ge=1, le=200), +): + """Find narrators associated with a place.""" + rows = db.neo4j_query(""" + MATCH (n:Narrator)-[:BORN_IN|LIVED_IN|DIED_IN|TRAVELED_TO]->(p:Place) + WHERE p.name_arabic CONTAINS $place + OPTIONAL MATCH (n)-[:APPEARS_IN]->(h:Hadith) + RETURN DISTINCT n.name_arabic AS name_arabic, + n.name_transliterated AS name_transliterated, + n.entity_type AS entity_type, + n.generation AS generation, + n.reliability_grade AS reliability_grade, + count(h) AS hadith_count + ORDER BY hadith_count DESC + LIMIT $limit + """, {"place": place_name, "limit": limit}) + + return [NarratorSummary(**r) for r in rows] + + +@router.get("/interactions/{name_arabic}", response_model=list[NarratorInteraction], + summary="Get all narrator interactions", + description="Lists all relationships for a narrator: who they narrated from, " + "who narrated from them, their teachers, and their students. " + "Each interaction includes shared hadith count. " + "Example: `/narrators/interactions/الزهري`") +async def get_interactions( + name_arabic: str, + limit: int = Query(50, ge=1, le=200, description="Maximum interactions to return"), +): + """ + Get all interactions of a narrator — who they narrated from, + who narrated from them, teachers, students. + """ + rows = db.neo4j_query(""" + MATCH (n:Narrator {name_arabic: $name}) + OPTIONAL MATCH (n)-[r1:NARRATED_FROM]->(other1:Narrator) + WITH n, collect(DISTINCT { + narrator_b: other1.name_arabic, + narrator_b_trans: other1.name_transliterated, + type: 'NARRATED_FROM', + hadith_ids: r1.hadith_ids + }) AS outgoing + OPTIONAL MATCH (other2:Narrator)-[r2:NARRATED_FROM]->(n) + WITH n, outgoing, collect(DISTINCT { + narrator_b: other2.name_arabic, + narrator_b_trans: other2.name_transliterated, + type: 'HEARD_BY', + hadith_ids: r2.hadith_ids + }) AS incoming + OPTIONAL MATCH (teacher:Narrator)-[r3:TEACHER_OF]->(n) + WITH n, outgoing, incoming, collect(DISTINCT { + narrator_b: teacher.name_arabic, + narrator_b_trans: teacher.name_transliterated, + type: 'TEACHER_OF', + hadith_ids: [] + }) AS teacher_rels + OPTIONAL MATCH (n)-[r4:TEACHER_OF]->(student:Narrator) + WITH n, outgoing, incoming, teacher_rels, collect(DISTINCT { + narrator_b: student.name_arabic, + narrator_b_trans: student.name_transliterated, + type: 'STUDENT_OF', + hadith_ids: [] + }) AS student_rels + RETURN n.name_arabic AS narrator_a, + n.name_transliterated AS narrator_a_trans, + outgoing + incoming + teacher_rels + student_rels AS interactions + """, {"name": name_arabic}) + + if not rows: + raise HTTPException(status_code=404, detail="Narrator not found") + + result = [] + row = rows[0] + for interaction in row["interactions"]: + if not interaction.get("narrator_b"): + continue + hadith_ids = interaction.get("hadith_ids") or [] + result.append(NarratorInteraction( + narrator_a=row["narrator_a"], + narrator_a_transliterated=row.get("narrator_a_trans") or "", + narrator_b=interaction["narrator_b"], + narrator_b_transliterated=interaction.get("narrator_b_trans") or "", + relationship_type=interaction["type"], + shared_hadith_count=len(hadith_ids), + hadith_ids=[str(h) for h in hadith_ids[:20]], + )) + + return result[:limit] + + +@router.get("/who-met-who", response_model=list[NarratorInteraction], + summary="Check if two narrators are connected", + description="Finds the shortest path between two narrators in the knowledge graph. " + "Reveals whether they had a direct or indirect relationship through " + "narration chains, teacher/student bonds, or shared connections. " + "Example: `/narrators/who-met-who?narrator_a=الزهري&narrator_b=أنس بن مالك`") +async def who_met_who( + narrator_a: str = Query( + ..., description="First narrator name (Arabic). Example: الزهري", + examples=["الزهري", "أبو هريرة"], + ), + narrator_b: str = Query( + ..., description="Second narrator name (Arabic). Example: أنس بن مالك", + examples=["أنس بن مالك", "عمر بن الخطاب"], + ), +): + """ + Check if two narrators had a relationship — did they meet, + narrate from each other, or share a teacher/student bond? + """ + rows = db.neo4j_query(""" + MATCH (a:Narrator), (b:Narrator) + WHERE a.name_arabic CONTAINS $name_a + AND b.name_arabic CONTAINS $name_b + OPTIONAL MATCH path = shortestPath((a)-[*..6]-(b)) + WITH a, b, path, + [r IN relationships(path) | { + type: type(r), + from: startNode(r).name_arabic, + from_trans: startNode(r).name_transliterated, + to: endNode(r).name_arabic, + to_trans: endNode(r).name_transliterated + }] AS rels + RETURN a.name_arabic AS narrator_a, + a.name_transliterated AS narrator_a_trans, + b.name_arabic AS narrator_b, + b.name_transliterated AS narrator_b_trans, + length(path) AS distance, + rels + """, {"name_a": narrator_a, "name_b": narrator_b}) + + if not rows or rows[0].get("distance") is None: + return [] + + row = rows[0] + return [NarratorInteraction( + narrator_a=rel["from"], + narrator_a_transliterated=rel.get("from_trans") or "", + narrator_b=rel["to"], + narrator_b_transliterated=rel.get("to_trans") or "", + relationship_type=rel["type"], + ) for rel in (row.get("rels") or [])] diff --git a/app/routers/search.py b/app/routers/search.py new file mode 100644 index 0000000..3155ab2 --- /dev/null +++ b/app/routers/search.py @@ -0,0 +1,207 @@ +""" +Search endpoints — semantic search (Qdrant + TEI) and full-text Arabic (Elasticsearch). +""" +from fastapi import APIRouter, Query, HTTPException +from typing import Optional + +from app.services.database import db +from app.config import get_settings +from app.models.schemas import SemanticSearchResult, FullTextSearchResult, HadithSummary + +router = APIRouter(prefix="/search", tags=["Search"]) + + +async def get_embedding(text: str) -> list[float]: + """Get embedding vector from TEI (BGE-M3).""" + settings = get_settings() + response = await db.http_client.post( + f"{settings.tei_url}/embed", + json={"inputs": text}, + ) + if response.status_code != 200: + raise HTTPException(status_code=502, detail=f"TEI embedding failed: {response.text}") + + embeddings = response.json() + # TEI returns list of embeddings; we sent one input + if isinstance(embeddings, list) and len(embeddings) > 0: + if isinstance(embeddings[0], list): + return embeddings[0] + return embeddings + raise HTTPException(status_code=502, detail="Unexpected TEI response format") + + +@router.get("/semantic", response_model=list[SemanticSearchResult], + summary="Semantic search (find by meaning)", + description="Search hadiths by meaning using BGE-M3 multilingual embeddings + Qdrant. " + "Supports cross-language queries: search in English and find Arabic hadiths, or vice versa. " + "Example: `what did the prophet say about fasting` → finds Arabic hadiths about صيام") +async def semantic_search( + q: str = Query( + ..., min_length=2, + description="Search query in any language. The embedding model handles Arabic, English, and Urdu.", + examples=["what is the reward of prayer", "أحاديث عن الصيام", "حكم الربا"], + ), + collection: Optional[str] = Query( + None, + description="Filter by collection name. Example: Sahih Bukhari", + ), + limit: int = Query(10, ge=1, le=50, description="Number of results (max 50)"), +): + """ + Semantic search — find hadiths by meaning, not just keywords. + Supports Arabic, English, and cross-language queries. + Uses BGE-M3 embeddings + Qdrant vector search. + """ + settings = get_settings() + + # Get query embedding from TEI + query_vector = await get_embedding(q) + + # Build Qdrant filter if collection specified + query_filter = None + if collection: + from qdrant_client.models import Filter, FieldCondition, MatchValue + query_filter = Filter( + must=[FieldCondition(key="collection", match=MatchValue(value=collection))] + ) + + # Search Qdrant + results = db.qdrant.search( + collection_name=settings.qdrant_collection, + query_vector=query_vector, + limit=limit, + query_filter=query_filter, + with_payload=True, + ) + + output = [] + for hit in results: + payload = hit.payload or {} + output.append(SemanticSearchResult( + hadith=HadithSummary( + id=str(payload.get("id", hit.id)), + collection=payload.get("collection", ""), + hadith_number=payload.get("hadith_number", 0), + grade=payload.get("grade"), + arabic_text=(payload.get("arabic_text") or "")[:300], + ), + score=round(hit.score, 4), + collection=payload.get("collection", ""), + )) + + return output + + +@router.get("/fulltext", response_model=list[FullTextSearchResult], + summary="Full-text Arabic search", + description="Keyword search using Elasticsearch with Arabic morphological analysis (stemming, root extraction). " + "Returns highlighted text fragments showing where matches occurred. " + "Handles both vocalized (الصَّلاة) and unvocalized (الصلاة) Arabic.") +async def fulltext_search( + q: str = Query( + ..., min_length=2, + description="Arabic text search query. Examples: الصلاة (prayer), النكاح (marriage), الجهاد (jihad)", + examples=["الصلاة", "صيام رمضان", "بيع وشراء"], + ), + collection: Optional[str] = Query( + None, + description="Filter by collection. Example: Sahih Muslim", + ), + limit: int = Query(10, ge=1, le=50, description="Number of results (max 50)"), +): + """ + Full-text Arabic search using Elasticsearch. + Supports Arabic morphological analysis. + """ + settings = get_settings() + + # Build ES query + must = [ + { + "multi_match": { + "query": q, + "fields": ["arabic_text^3", "arabic_normalized^2", "matn", "sanad"], + "type": "best_fields", + "analyzer": "arabic", + } + } + ] + + if collection: + must.append({"match": {"collection_name": collection}}) + + body = { + "query": {"bool": {"must": must}}, + "highlight": { + "fields": { + "arabic_text": {"fragment_size": 200, "number_of_fragments": 2}, + "matn": {"fragment_size": 200, "number_of_fragments": 1}, + } + }, + "size": limit, + } + + try: + response = db.es.search(index=settings.es_index, body=body) + except Exception as e: + # ES index might not exist yet + raise HTTPException(status_code=503, detail=f"Elasticsearch error: {str(e)}") + + output = [] + for hit in response["hits"]["hits"]: + src = hit["_source"] + highlights = [] + if "highlight" in hit: + for field_highlights in hit["highlight"].values(): + highlights.extend(field_highlights) + + output.append(FullTextSearchResult( + hadith=HadithSummary( + id=str(src.get("id", hit["_id"])), + collection=src.get("collection_name", ""), + hadith_number=src.get("hadith_number", 0), + grade=src.get("grade"), + arabic_text=(src.get("arabic_text") or "")[:300], + ), + score=round(hit["_score"], 4), + highlights=highlights, + )) + + return output + + +@router.get("/combined", response_model=dict, + summary="Combined search (semantic + full-text)", + description="Runs both semantic and full-text search in parallel and returns merged results. " + "Best for the mobile app search bar — gives both meaning-based and keyword-based results. " + "Returns `{semantic: [...], fulltext: [...], query: '...'}`") +async def combined_search( + q: str = Query( + ..., min_length=2, + description="Search query. Works with Arabic keywords or natural language in any language.", + examples=["الصلاة في وقتها", "hadith about charity"], + ), + collection: Optional[str] = Query(None, description="Filter by collection name"), + limit: int = Query(10, ge=1, le=20, description="Results per search type (max 20)"), +): + """ + Combined search — runs both semantic and full-text in parallel, + returns merged results. Best for the mobile app search bar. + """ + import asyncio + + semantic_task = semantic_search(q=q, collection=collection, limit=limit) + # Full-text only makes sense for Arabic queries + fulltext_task = fulltext_search(q=q, collection=collection, limit=limit) + + semantic_results, fulltext_results = await asyncio.gather( + semantic_task, + fulltext_task, + return_exceptions=True, + ) + + return { + "semantic": semantic_results if not isinstance(semantic_results, Exception) else [], + "fulltext": fulltext_results if not isinstance(fulltext_results, Exception) else [], + "query": q, + } diff --git a/app/services/__init__.py b/app/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/services/database.py b/app/services/database.py new file mode 100644 index 0000000..5ad32df --- /dev/null +++ b/app/services/database.py @@ -0,0 +1,113 @@ +""" +Database connection manager — initializes and provides access to +PostgreSQL, Neo4j, Qdrant, and Elasticsearch clients. +""" +import psycopg2 +import psycopg2.pool +import psycopg2.extras +from neo4j import GraphDatabase +from qdrant_client import QdrantClient +from elasticsearch import Elasticsearch +import httpx + +from app.config import get_settings + + +class Database: + """Singleton holding all DB connections.""" + + def __init__(self): + self.pg_pool = None + self.neo4j_driver = None + self.qdrant = None + self.es = None + self.http_client = None # for TEI embeddings + + async def connect(self): + settings = get_settings() + + # PostgreSQL connection pool + self.pg_pool = psycopg2.pool.ThreadedConnectionPool( + minconn=2, + maxconn=10, + host=settings.pg_host, + port=settings.pg_port, + dbname=settings.pg_dbname, + user=settings.pg_user, + password=settings.pg_password, + sslmode=settings.pg_sslmode, + ) + print(f"✅ PostgreSQL pool created ({settings.pg_host})") + + # Neo4j + self.neo4j_driver = GraphDatabase.driver( + settings.neo4j_uri, + auth=(settings.neo4j_user, settings.neo4j_password), + ) + self.neo4j_driver.verify_connectivity() + print(f"✅ Neo4j connected ({settings.neo4j_uri})") + + # Qdrant + self.qdrant = QdrantClient( + host=settings.qdrant_host, + port=settings.qdrant_port, + ) + collections = self.qdrant.get_collections() + print(f"✅ Qdrant connected ({settings.qdrant_host}, {len(collections.collections)} collections)") + + # Elasticsearch + self.es = Elasticsearch(settings.es_host) + if self.es.ping(): + print(f"✅ Elasticsearch connected ({settings.es_host})") + else: + print(f"⚠️ Elasticsearch ping failed ({settings.es_host})") + + # HTTP client for TEI embedding requests + self.http_client = httpx.AsyncClient(timeout=30.0) + print(f"✅ HTTP client ready (TEI: {settings.tei_url})") + + async def disconnect(self): + if self.pg_pool: + self.pg_pool.closeall() + if self.neo4j_driver: + self.neo4j_driver.close() + if self.http_client: + await self.http_client.aclose() + print("🔌 All connections closed") + + # ── PostgreSQL helpers ── + + def get_pg(self): + conn = self.pg_pool.getconn() + try: + yield conn + finally: + self.pg_pool.putconn(conn) + + def pg_query(self, query: str, params: tuple = None) -> list[dict]: + conn = self.pg_pool.getconn() + try: + with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: + cur.execute(query, params) + return [dict(row) for row in cur.fetchall()] + finally: + self.pg_pool.putconn(conn) + + def pg_query_one(self, query: str, params: tuple = None) -> dict | None: + rows = self.pg_query(query, params) + return rows[0] if rows else None + + # ── Neo4j helpers ── + + def neo4j_query(self, query: str, params: dict = None) -> list[dict]: + with self.neo4j_driver.session() as session: + result = session.run(query, params or {}) + return [dict(record) for record in result] + + def neo4j_query_one(self, query: str, params: dict = None) -> dict | None: + rows = self.neo4j_query(query, params) + return rows[0] if rows else None + + +# Global instance +db = Database() diff --git a/k8s/deployment.yaml b/k8s/deployment.yaml new file mode 100644 index 0000000..7c9dada --- /dev/null +++ b/k8s/deployment.yaml @@ -0,0 +1,141 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: api +--- +apiVersion: v1 +kind: Secret +metadata: + name: hadith-api-secrets + namespace: api +type: Opaque +stringData: + PG_DBNAME: "REPLACE_ME" + PG_USER: "REPLACE_ME" + PG_PASSWORD: "REPLACE_ME" + NEO4J_PASSWORD: "NEO4J-PASS" +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: hadith-api + namespace: api + labels: + app: hadith-api +spec: + replicas: 2 + selector: + matchLabels: + app: hadith-api + template: + metadata: + labels: + app: hadith-api + spec: + containers: + - name: hadith-api + image: registry.betelgeusebytes.io/hadith-api:latest # adjust to your registry + ports: + - containerPort: 8000 + env: + - name: HADITH_PG_HOST + value: "pg.betelgeusebytes.io" + - name: HADITH_PG_PORT + value: "5432" + - name: HADITH_PG_DBNAME + valueFrom: + secretKeyRef: + name: hadith-api-secrets + key: PG_DBNAME + - name: HADITH_PG_USER + valueFrom: + secretKeyRef: + name: hadith-api-secrets + key: PG_USER + - name: HADITH_PG_PASSWORD + valueFrom: + secretKeyRef: + name: hadith-api-secrets + key: PG_PASSWORD + - name: HADITH_PG_SSLMODE + value: "require" + - name: HADITH_NEO4J_URI + value: "neo4j+ssc://neo4j.betelgeusebytes.io:7687" + - name: HADITH_NEO4J_USER + value: "neo4j" + - name: HADITH_NEO4J_PASSWORD + valueFrom: + secretKeyRef: + name: hadith-api-secrets + key: NEO4J_PASSWORD + - name: HADITH_QDRANT_HOST + value: "qdrant.vector.svc.cluster.local" + - name: HADITH_QDRANT_PORT + value: "6333" + - name: HADITH_QDRANT_COLLECTION + value: "hadiths" + - name: HADITH_ES_HOST + value: "http://elasticsearch.elastic.svc.cluster.local:9200" + - name: HADITH_ES_INDEX + value: "hadiths" + - name: HADITH_TEI_URL + value: "http://tei.ml.svc.cluster.local:80" + resources: + requests: + cpu: "250m" + memory: "256Mi" + limits: + cpu: "1000m" + memory: "512Mi" + livenessProbe: + httpGet: + path: /health + port: 8000 + initialDelaySeconds: 10 + periodSeconds: 30 + readinessProbe: + httpGet: + path: /health + port: 8000 + initialDelaySeconds: 5 + periodSeconds: 10 +--- +apiVersion: v1 +kind: Service +metadata: + name: hadith-api + namespace: api +spec: + selector: + app: hadith-api + ports: + - port: 80 + targetPort: 8000 + protocol: TCP +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: hadith-api-ingress + namespace: api + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + nginx.ingress.kubernetes.io/proxy-body-size: "10m" + nginx.ingress.kubernetes.io/enable-cors: "true" +spec: + ingressClassName: nginx + tls: + - hosts: + - api.betelgeusebytes.io + secretName: hadith-api-tls + rules: + - host: api.betelgeusebytes.io + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: hadith-api + port: + number: 80 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..22d4d2e --- /dev/null +++ b/requirements.txt @@ -0,0 +1,10 @@ +fastapi==0.115.0 +uvicorn[standard]==0.30.0 +psycopg2-binary==2.9.9 +neo4j==5.25.0 +qdrant-client==1.12.0 +elasticsearch==8.14.0 +pydantic==2.9.0 +pydantic-settings==2.5.0 +httpx==0.27.0 +python-dotenv==1.0.1