hadith-api/app/routers/hadiths.py

232 lines
9.4 KiB
Python

"""
Hadith endpoints — details, listing, search by keyword/narrator/topic/place.
All query parameters are Arabic-normalized for consistent matching.
All list endpoints support pagination via page + per_page.
"""
from fastapi import APIRouter, Query, Path, HTTPException
from typing import Optional
from app.services.database import db
from app.utils.arabic import normalize_query
from app.models.schemas import (
HadithDetail, HadithSummary, NarratorInChain, TopicTag,
PaginatedResponse, PaginationMeta,
)
router = APIRouter(prefix="/hadiths", tags=["Hadiths"])
def _paginate(total: int, page: int, per_page: int) -> PaginationMeta:
pages = max(1, (total + per_page - 1) // per_page)
return PaginationMeta(total=total, page=page, per_page=per_page, pages=pages)
# ── Single hadith by ID ────────────────────────────────────────────────────
@router.get("/{hadith_id}", response_model=HadithDetail)
async def get_hadith(hadith_id: str = Path(..., description="Hadith UUID")):
"""Get full hadith details by ID, including narrator chain and topics from Neo4j."""
# Base hadith from PostgreSQL
hadith = db.pg_query_one("""
SELECT h.id, c.name_english AS collection, h.hadith_number,
h.book_number, h.grade, h.arabic_text, h.english_text,
h.urdu_text, h.sanad, h.matn
FROM hadiths h
JOIN collections c ON c.id = h.collection_id
WHERE h.id::text = %s
""", (hadith_id,))
if not hadith:
raise HTTPException(status_code=404, detail="Hadith not found")
# Narrator chain from Neo4j
chain = db.neo4j_query("""
MATCH (n:Narrator)-[r:APPEARS_IN]->(h:Hadith {id: $hid})
RETURN n.name_arabic AS name_arabic,
n.name_transliterated AS name_transliterated,
n.entity_type AS entity_type,
n.generation AS generation,
n.reliability_grade AS reliability_grade,
r.chain_order AS order,
r.transmission_verb AS transmission_verb
ORDER BY r.chain_order
""", {"hid": hadith_id})
# Topics from Neo4j
topics = db.neo4j_query("""
MATCH (h:Hadith {id: $hid})-[:HAS_TOPIC]->(t:Topic)
RETURN t.topic_arabic AS topic_arabic,
t.topic_english AS topic_english,
t.category AS category
""", {"hid": hadith_id})
return HadithDetail(
id=str(hadith["id"]),
collection=hadith.get("collection"),
hadith_number=hadith.get("hadith_number"),
book_number=hadith.get("book_number"),
grade=hadith.get("grade"),
arabic_text=hadith.get("arabic_text"),
english_text=hadith.get("english_text"),
urdu_text=hadith.get("urdu_text"),
sanad_text=hadith.get("sanad"),
matn_text=hadith.get("matn"),
narrator_chain=[NarratorInChain(**c) for c in chain],
topics=[TopicTag(**t) for t in topics],
)
# ── By collection + number ─────────────────────────────────────────────────
@router.get("/by-ref/{collection}/{number}", response_model=HadithDetail)
async def get_hadith_by_reference(
collection: str = Path(..., description="Collection name, e.g. 'Sahih Bukhari'"),
number: int = Path(..., description="Hadith number within the collection"),
):
"""Lookup hadith by collection name + hadith number."""
row = db.pg_query_one("""
SELECT h.id FROM hadiths h
JOIN collections c ON c.id = h.collection_id
WHERE c.name_english = %s AND h.hadith_number = %s
""", (collection, number))
if not row:
raise HTTPException(status_code=404, detail=f"Hadith {collection} #{number} not found")
return await get_hadith(str(row["id"]))
# ── List by collection (paginated) ─────────────────────────────────────────
@router.get("/collection/{collection_name}", response_model=PaginatedResponse)
async def list_by_collection(
collection_name: str = Path(..., description="Collection name"),
page: int = Query(1, ge=1, description="Page number"),
per_page: int = Query(20, ge=1, le=100, description="Items per page"),
):
"""List hadiths in a collection with pagination."""
total = db.pg_count("""
SELECT count(*) FROM hadiths h
JOIN collections c ON c.id = h.collection_id
WHERE c.name_english = %s
""", (collection_name,))
offset = (page - 1) * per_page
rows = db.pg_query("""
SELECT h.id, c.name_english AS collection, h.hadith_number,
h.grade, LEFT(h.arabic_text, 300) AS arabic_text
FROM hadiths h
JOIN collections c ON c.id = h.collection_id
WHERE c.name_english = %s
ORDER BY h.hadith_number
LIMIT %s OFFSET %s
""", (collection_name, per_page, offset))
data = [HadithSummary(id=str(r["id"]), **{k: r[k] for k in r if k != "id"}) for r in rows]
return PaginatedResponse(data=data, meta=_paginate(total, page, per_page))
# ── Keyword search (paginated, normalized) ─────────────────────────────────
@router.get("/search/keyword", response_model=PaginatedResponse)
async def search_keyword(
q: str = Query(..., min_length=2, description="Arabic keyword(s) — diacritics stripped automatically"),
collection: Optional[str] = Query(None, description="Filter by collection"),
page: int = Query(1, ge=1),
per_page: int = Query(20, ge=1, le=100),
):
"""Keyword search in Arabic hadith text. Query is normalized for consistent matching."""
q_norm = normalize_query(q)
where = "WHERE h.arabic_text ILIKE %s"
params: list = [f"%{q_norm}%"]
if collection:
where += " AND c.name_english = %s"
params.append(collection)
total = db.pg_count(
f"SELECT count(*) FROM hadiths h JOIN collections c ON c.id = h.collection_id {where}",
tuple(params),
)
offset = (page - 1) * per_page
rows = db.pg_query(
f"SELECT h.id, c.name_english AS collection, h.hadith_number, "
f"h.grade, LEFT(h.arabic_text, 300) AS arabic_text "
f"FROM hadiths h JOIN collections c ON c.id = h.collection_id "
f"{where} ORDER BY c.name_english, h.hadith_number "
f"LIMIT %s OFFSET %s",
tuple(params + [per_page, offset]),
)
data = [HadithSummary(id=str(r["id"]), **{k: r[k] for k in r if k != "id"}) for r in rows]
return PaginatedResponse(data=data, meta=_paginate(total, page, per_page))
# ── Search by topic (paginated, normalized) ────────────────────────────────
@router.get("/search/topic", response_model=PaginatedResponse)
async def search_by_topic(
q: str = Query(..., min_length=2, description="Topic keyword (Arabic or English)"),
page: int = Query(1, ge=1),
per_page: int = Query(20, ge=1, le=100),
):
"""Find hadiths by topic tag from the knowledge graph."""
q_norm = normalize_query(q)
skip = (page - 1) * per_page
total = db.neo4j_count("""
MATCH (t:Topic)<-[:HAS_TOPIC]-(h:Hadith)
WHERE toLower(t.topic_arabic) CONTAINS toLower($q)
OR toLower(t.topic_english) CONTAINS toLower($q)
RETURN count(DISTINCT h) AS count
""", {"q": q_norm})
rows = db.neo4j_query("""
MATCH (t:Topic)<-[:HAS_TOPIC]-(h:Hadith)
WHERE toLower(t.topic_arabic) CONTAINS toLower($q)
OR toLower(t.topic_english) CONTAINS toLower($q)
RETURN DISTINCT h.id AS id,
h.collection AS collection,
h.hadith_number AS hadith_number,
h.grade AS grade,
substring(h.arabic_text, 0, 300) AS arabic_text
ORDER BY h.collection, h.hadith_number
SKIP $skip LIMIT $limit
""", {"q": q_norm, "skip": skip, "limit": per_page})
data = [HadithSummary(**r) for r in rows]
return PaginatedResponse(data=data, meta=_paginate(total, page, per_page))
# ── Search by narrator (paginated, normalized) ─────────────────────────────
@router.get("/search/narrator", response_model=PaginatedResponse)
async def search_by_narrator(
q: str = Query(..., min_length=2, description="Narrator name (Arabic)"),
page: int = Query(1, ge=1),
per_page: int = Query(20, ge=1, le=100),
):
"""Find all hadiths where a narrator appears in the chain."""
q_norm = normalize_query(q)
skip = (page - 1) * per_page
total = db.neo4j_count("""
MATCH (n:Narrator)-[:APPEARS_IN]->(h:Hadith)
WHERE toLower(n.name_arabic) CONTAINS toLower($q)
RETURN count(DISTINCT h) AS count
""", {"q": q_norm})
rows = db.neo4j_query("""
MATCH (n:Narrator)-[:APPEARS_IN]->(h:Hadith)
WHERE toLower(n.name_arabic) CONTAINS toLower($q)
RETURN DISTINCT h.id AS id,
h.collection AS collection,
h.hadith_number AS hadith_number,
h.grade AS grade,
substring(h.arabic_text, 0, 300) AS arabic_text
ORDER BY h.collection, h.hadith_number
SKIP $skip LIMIT $limit
""", {"q": q_norm, "skip": skip, "limit": per_page})
data = [HadithSummary(**r) for r in rows]
return PaginatedResponse(data=data, meta=_paginate(total, page, per_page))