hadith-api/app/main.py

224 lines
7.5 KiB
Python

"""
Hadith Scholar API — FastAPI application.
Endpoints:
/hadiths — hadith details, search by keyword/narrator/topic/collection
/narrators — narrator profiles, interactions, who-met-who
/chains — isnad chain visualization data
/search — semantic (Qdrant) + full-text Arabic (Elasticsearch)
"""
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from contextlib import asynccontextmanager
from app.config import get_settings
from app.services.database import db
from app.routers import hadiths, narrators, chains, search
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Connect all databases on startup, disconnect on shutdown."""
await db.connect()
yield
await db.disconnect()
settings = get_settings()
TAGS_METADATA = [
{
"name": "Hadiths",
"description": (
"Access and search hadith texts from 8+ major collections "
"(Sahih Bukhari, Sahih Muslim, Sunan Abu Dawood, Jami` at-Tirmidhi, "
"Sunan an-Nasa'i, Sunan Ibn Majah, Musnad Ahmad, and more). "
"Retrieve full hadith details including narrator chains, topic tags, "
"and sanad/matn separation. Search by Arabic keyword, narrator name, "
"topic, or collection."
),
},
{
"name": "Narrators",
"description": (
"Narrator (رجال الحديث) profiles and relationships. "
"Each narrator includes biographical data from classical scholarship "
"(Tahdhib al-Kamal, Taqrib al-Tahdhib, etc.): full nasab, kunya, nisba, "
"generation (طبقة), reliability grading (جرح وتعديل), birth/death dates, "
"teachers, students, places, and tribal affiliations. "
"Use the who-met-who endpoint to explore narrator connections."
),
},
{
"name": "Isnad Chains",
"description": (
"Isnad (chain of narration / إسناد) visualization data. "
"Returns graph-ready structures (nodes + links) for rendering "
"narrator chains using D3.js, vis.js, Cytoscape, or any graph library. "
"Each node includes narrator metadata; each link includes the "
"transmission verb (حدثنا، أخبرنا، عن، سمعت)."
),
},
{
"name": "Search",
"description": (
"Multi-modal hadith search. **Semantic search** uses BGE-M3 multilingual "
"embeddings + Qdrant to find hadiths by meaning (supports Arabic, English, "
"and cross-language queries). **Full-text search** uses Elasticsearch with "
"Arabic morphological analysis for exact and fuzzy keyword matching. "
"**Combined search** runs both in parallel for the best results."
),
},
{
"name": "Root",
"description": "Health checks, statistics, and API metadata.",
},
]
app = FastAPI(
title=settings.app_name,
version=settings.app_version,
description="""
# Hadith Scholar API — حَدِيثٌ
Production-grade REST API for analyzing Islamic hadith literature across 8+ major collections.
## Core Capabilities
| Feature | Backend | Endpoint |
|---------|---------|----------|
| Hadith lookup & keyword search | PostgreSQL | `GET /hadiths/*` |
| Narrator profiles & biography | Neo4j | `GET /narrators/*` |
| Isnad chain visualization | Neo4j | `GET /chains/*` |
| Semantic search (by meaning) | Qdrant + BGE-M3 | `GET /search/semantic` |
| Full-text Arabic search | Elasticsearch | `GET /search/fulltext` |
| Narrator relationships | Neo4j | `GET /narrators/who-met-who` |
## Data Sources
- **~41,000 hadiths** from Sahih Bukhari, Sahih Muslim, Sunan Abu Dawood, Jami` at-Tirmidhi, Sunan an-Nasa'i, Sunan Ibn Majah, Musnad Ahmad, Muwatta Malik, and more
- **Narrator knowledge graph** with biographies, teacher/student networks, places, tribes
- **1024-dim multilingual embeddings** (BGE-M3) for semantic search across Arabic/English/Urdu
## Authentication
Currently open (no auth required). API keys will be added in a future version.
## Arabic Text
All Arabic text preserves original diacritics (تشكيل). Search endpoints accept both vocalized and unvocalized Arabic.
## Example Queries
- Search for hadiths about prayer: `GET /hadiths/search/keyword?q=صلاة`
- Find narrator profile: `GET /narrators/profile/أبو هريرة`
- Semantic search: `GET /search/semantic?q=what did the prophet say about fasting`
- Who met who: `GET /narrators/who-met-who?narrator_a=الزهري&narrator_b=أنس بن مالك`
""",
docs_url="/docs",
redoc_url="/redoc",
openapi_url="/openapi.json",
openapi_tags=TAGS_METADATA,
lifespan=lifespan,
license_info={
"name": "MIT",
},
contact={
"name": "Hadith Scholar API",
"url": "https://betelgeusebytes.io",
},
)
# CORS — allow all for development; tighten for production
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Register routers
app.include_router(hadiths.router)
app.include_router(narrators.router)
app.include_router(chains.router)
app.include_router(search.router)
@app.get("/", tags=["Root"])
async def root():
return {
"name": settings.app_name,
"version": settings.app_version,
"docs": "/docs",
"endpoints": {
"hadiths": "/hadiths",
"narrators": "/narrators",
"chains": "/chains",
"search": "/search",
},
}
@app.get("/health", tags=["Root"])
async def health():
"""Health check — verifies all service connections."""
status = {"status": "ok", "services": {}}
# PostgreSQL
try:
db.pg_query_one("SELECT 1 AS ok")
status["services"]["postgresql"] = "ok"
except Exception as e:
status["services"]["postgresql"] = f"error: {e}"
status["status"] = "degraded"
# Neo4j
try:
db.neo4j_query_one("RETURN 1 AS ok")
status["services"]["neo4j"] = "ok"
except Exception as e:
status["services"]["neo4j"] = f"error: {e}"
status["status"] = "degraded"
# Qdrant
try:
db.qdrant.get_collections()
status["services"]["qdrant"] = "ok"
except Exception as e:
status["services"]["qdrant"] = f"error: {e}"
status["status"] = "degraded"
# Elasticsearch
try:
if db.es.ping():
status["services"]["elasticsearch"] = "ok"
else:
status["services"]["elasticsearch"] = "unreachable"
status["status"] = "degraded"
except Exception as e:
status["services"]["elasticsearch"] = f"error: {e}"
status["status"] = "degraded"
return status
@app.get("/stats", tags=["Root"])
async def stats():
"""Database statistics."""
pg_stats = db.pg_query_one("""
SELECT
(SELECT COUNT(*) FROM hadiths) AS total_hadiths,
(SELECT COUNT(*) FROM collections) AS total_collections
""")
neo4j_stats = db.neo4j_query_one("""
MATCH (h:Hadith) WITH count(h) AS hadiths
MATCH (n:Narrator) WITH hadiths, count(n) AS narrators
MATCH (p:Place) WITH hadiths, narrators, count(p) AS places
MATCH (t:Tribe) WITH hadiths, narrators, places, count(t) AS tribes
MATCH ()-[r]->() WITH hadiths, narrators, places, tribes, count(r) AS relationships
RETURN hadiths, narrators, places, tribes, relationships
""")
return {
"postgresql": pg_stats,
"neo4j_graph": neo4j_stats,
}