224 lines
7.5 KiB
Python
224 lines
7.5 KiB
Python
"""
|
|
Hadith Scholar API — FastAPI application.
|
|
|
|
Endpoints:
|
|
/hadiths — hadith details, search by keyword/narrator/topic/collection
|
|
/narrators — narrator profiles, interactions, who-met-who
|
|
/chains — isnad chain visualization data
|
|
/search — semantic (Qdrant) + full-text Arabic (Elasticsearch)
|
|
"""
|
|
from fastapi import FastAPI
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
from contextlib import asynccontextmanager
|
|
|
|
from app.config import get_settings
|
|
from app.services.database import db
|
|
from app.routers import hadiths, narrators, chains, search
|
|
|
|
|
|
@asynccontextmanager
|
|
async def lifespan(app: FastAPI):
|
|
"""Connect all databases on startup, disconnect on shutdown."""
|
|
await db.connect()
|
|
yield
|
|
await db.disconnect()
|
|
|
|
|
|
settings = get_settings()
|
|
|
|
TAGS_METADATA = [
|
|
{
|
|
"name": "Hadiths",
|
|
"description": (
|
|
"Access and search hadith texts from 8+ major collections "
|
|
"(Sahih Bukhari, Sahih Muslim, Sunan Abu Dawood, Jami` at-Tirmidhi, "
|
|
"Sunan an-Nasa'i, Sunan Ibn Majah, Musnad Ahmad, and more). "
|
|
"Retrieve full hadith details including narrator chains, topic tags, "
|
|
"and sanad/matn separation. Search by Arabic keyword, narrator name, "
|
|
"topic, or collection."
|
|
),
|
|
},
|
|
{
|
|
"name": "Narrators",
|
|
"description": (
|
|
"Narrator (رجال الحديث) profiles and relationships. "
|
|
"Each narrator includes biographical data from classical scholarship "
|
|
"(Tahdhib al-Kamal, Taqrib al-Tahdhib, etc.): full nasab, kunya, nisba, "
|
|
"generation (طبقة), reliability grading (جرح وتعديل), birth/death dates, "
|
|
"teachers, students, places, and tribal affiliations. "
|
|
"Use the who-met-who endpoint to explore narrator connections."
|
|
),
|
|
},
|
|
{
|
|
"name": "Isnad Chains",
|
|
"description": (
|
|
"Isnad (chain of narration / إسناد) visualization data. "
|
|
"Returns graph-ready structures (nodes + links) for rendering "
|
|
"narrator chains using D3.js, vis.js, Cytoscape, or any graph library. "
|
|
"Each node includes narrator metadata; each link includes the "
|
|
"transmission verb (حدثنا، أخبرنا، عن، سمعت)."
|
|
),
|
|
},
|
|
{
|
|
"name": "Search",
|
|
"description": (
|
|
"Multi-modal hadith search. **Semantic search** uses BGE-M3 multilingual "
|
|
"embeddings + Qdrant to find hadiths by meaning (supports Arabic, English, "
|
|
"and cross-language queries). **Full-text search** uses Elasticsearch with "
|
|
"Arabic morphological analysis for exact and fuzzy keyword matching. "
|
|
"**Combined search** runs both in parallel for the best results."
|
|
),
|
|
},
|
|
{
|
|
"name": "Root",
|
|
"description": "Health checks, statistics, and API metadata.",
|
|
},
|
|
]
|
|
|
|
app = FastAPI(
|
|
title=settings.app_name,
|
|
version=settings.app_version,
|
|
description="""
|
|
# Hadith Scholar API — حَدِيثٌ
|
|
|
|
Production-grade REST API for analyzing Islamic hadith literature across 8+ major collections.
|
|
|
|
## Core Capabilities
|
|
|
|
| Feature | Backend | Endpoint |
|
|
|---------|---------|----------|
|
|
| Hadith lookup & keyword search | PostgreSQL | `GET /hadiths/*` |
|
|
| Narrator profiles & biography | Neo4j | `GET /narrators/*` |
|
|
| Isnad chain visualization | Neo4j | `GET /chains/*` |
|
|
| Semantic search (by meaning) | Qdrant + BGE-M3 | `GET /search/semantic` |
|
|
| Full-text Arabic search | Elasticsearch | `GET /search/fulltext` |
|
|
| Narrator relationships | Neo4j | `GET /narrators/who-met-who` |
|
|
|
|
## Data Sources
|
|
- **~41,000 hadiths** from Sahih Bukhari, Sahih Muslim, Sunan Abu Dawood, Jami` at-Tirmidhi, Sunan an-Nasa'i, Sunan Ibn Majah, Musnad Ahmad, Muwatta Malik, and more
|
|
- **Narrator knowledge graph** with biographies, teacher/student networks, places, tribes
|
|
- **1024-dim multilingual embeddings** (BGE-M3) for semantic search across Arabic/English/Urdu
|
|
|
|
## Authentication
|
|
Currently open (no auth required). API keys will be added in a future version.
|
|
|
|
## Arabic Text
|
|
All Arabic text preserves original diacritics (تشكيل). Search endpoints accept both vocalized and unvocalized Arabic.
|
|
|
|
## Example Queries
|
|
- Search for hadiths about prayer: `GET /hadiths/search/keyword?q=صلاة`
|
|
- Find narrator profile: `GET /narrators/profile/أبو هريرة`
|
|
- Semantic search: `GET /search/semantic?q=what did the prophet say about fasting`
|
|
- Who met who: `GET /narrators/who-met-who?narrator_a=الزهري&narrator_b=أنس بن مالك`
|
|
""",
|
|
docs_url="/docs",
|
|
redoc_url="/redoc",
|
|
openapi_url="/openapi.json",
|
|
openapi_tags=TAGS_METADATA,
|
|
lifespan=lifespan,
|
|
license_info={
|
|
"name": "MIT",
|
|
},
|
|
contact={
|
|
"name": "Hadith Scholar API",
|
|
"url": "https://betelgeusebytes.io",
|
|
},
|
|
)
|
|
|
|
# CORS — allow all for development; tighten for production
|
|
app.add_middleware(
|
|
CORSMiddleware,
|
|
allow_origins=["*"],
|
|
allow_credentials=True,
|
|
allow_methods=["*"],
|
|
allow_headers=["*"],
|
|
)
|
|
|
|
# Register routers
|
|
app.include_router(hadiths.router)
|
|
app.include_router(narrators.router)
|
|
app.include_router(chains.router)
|
|
app.include_router(search.router)
|
|
|
|
|
|
@app.get("/", tags=["Root"])
|
|
async def root():
|
|
return {
|
|
"name": settings.app_name,
|
|
"version": settings.app_version,
|
|
"docs": "/docs",
|
|
"endpoints": {
|
|
"hadiths": "/hadiths",
|
|
"narrators": "/narrators",
|
|
"chains": "/chains",
|
|
"search": "/search",
|
|
},
|
|
}
|
|
|
|
|
|
@app.get("/health", tags=["Root"])
|
|
async def health():
|
|
"""Health check — verifies all service connections."""
|
|
status = {"status": "ok", "services": {}}
|
|
|
|
# PostgreSQL
|
|
try:
|
|
db.pg_query_one("SELECT 1 AS ok")
|
|
status["services"]["postgresql"] = "ok"
|
|
except Exception as e:
|
|
status["services"]["postgresql"] = f"error: {e}"
|
|
status["status"] = "degraded"
|
|
|
|
# Neo4j
|
|
try:
|
|
db.neo4j_query_one("RETURN 1 AS ok")
|
|
status["services"]["neo4j"] = "ok"
|
|
except Exception as e:
|
|
status["services"]["neo4j"] = f"error: {e}"
|
|
status["status"] = "degraded"
|
|
|
|
# Qdrant
|
|
try:
|
|
db.qdrant.get_collections()
|
|
status["services"]["qdrant"] = "ok"
|
|
except Exception as e:
|
|
status["services"]["qdrant"] = f"error: {e}"
|
|
status["status"] = "degraded"
|
|
|
|
# Elasticsearch
|
|
try:
|
|
if db.es.ping():
|
|
status["services"]["elasticsearch"] = "ok"
|
|
else:
|
|
status["services"]["elasticsearch"] = "unreachable"
|
|
status["status"] = "degraded"
|
|
except Exception as e:
|
|
status["services"]["elasticsearch"] = f"error: {e}"
|
|
status["status"] = "degraded"
|
|
|
|
return status
|
|
|
|
|
|
@app.get("/stats", tags=["Root"])
|
|
async def stats():
|
|
"""Database statistics."""
|
|
pg_stats = db.pg_query_one("""
|
|
SELECT
|
|
(SELECT COUNT(*) FROM hadiths) AS total_hadiths,
|
|
(SELECT COUNT(*) FROM collections) AS total_collections
|
|
""")
|
|
|
|
neo4j_stats = db.neo4j_query_one("""
|
|
MATCH (h:Hadith) WITH count(h) AS hadiths
|
|
MATCH (n:Narrator) WITH hadiths, count(n) AS narrators
|
|
MATCH (p:Place) WITH hadiths, narrators, count(p) AS places
|
|
MATCH (t:Tribe) WITH hadiths, narrators, places, count(t) AS tribes
|
|
MATCH ()-[r]->() WITH hadiths, narrators, places, tribes, count(r) AS relationships
|
|
RETURN hadiths, narrators, places, tribes, relationships
|
|
""")
|
|
|
|
return {
|
|
"postgresql": pg_stats,
|
|
"neo4j_graph": neo4j_stats,
|
|
}
|