hadith-api/app/routers/hadiths.py

"""
Hadith endpoints — details, listing, search by keyword/narrator/topic/place.
All query parameters are Arabic-normalized for consistent matching.
All list endpoints support pagination via page + per_page.
"""
from fastapi import APIRouter, Query, Path, HTTPException
from typing import Optional

from app.services.database import db
from app.utils.arabic import normalize_query
from app.models.schemas import (
    HadithDetail, HadithSummary, NarratorInChain, TopicTag,
    PaginatedResponse, PaginationMeta,
)

router = APIRouter(prefix="/hadiths", tags=["Hadiths"])


def _paginate(total: int, page: int, per_page: int) -> PaginationMeta:
    pages = max(1, (total + per_page - 1) // per_page)
    return PaginationMeta(total=total, page=page, per_page=per_page, pages=pages)


# ── Single hadith by ID ────────────────────────────────────────────────────

@router.get("/{hadith_id}", response_model=HadithDetail)
async def get_hadith(hadith_id: str = Path(..., description="Hadith UUID")):
    """Get full hadith details by ID, including narrator chain and topics from Neo4j."""

    # Base hadith from PostgreSQL
    hadith = db.pg_query_one("""
        SELECT h.id, c.name_english AS collection, h.hadith_number,
               h.book_number, h.grade, h.arabic_text, h.english_text,
               h.urdu_text, h.sanad, h.matn
        FROM hadiths h
        JOIN collections c ON c.id = h.collection_id
        WHERE h.id::text = %s
    """, (hadith_id,))

    if not hadith:
        raise HTTPException(status_code=404, detail="Hadith not found")

    # Narrator chain from Neo4j
    chain = db.neo4j_query("""
        MATCH (n:Narrator)-[r:APPEARS_IN]->(h:Hadith {id: $hid})
        RETURN n.name_arabic AS name_arabic,
               n.name_transliterated AS name_transliterated,
               n.entity_type AS entity_type,
               n.generation AS generation,
               n.reliability_grade AS reliability_grade,
               r.chain_order AS order,
               r.transmission_verb AS transmission_verb
        ORDER BY r.chain_order
    """, {"hid": hadith_id})

    # Topics from Neo4j
    topics = db.neo4j_query("""
        MATCH (h:Hadith {id: $hid})-[:HAS_TOPIC]->(t:Topic)
        RETURN t.topic_arabic AS topic_arabic,
               t.topic_english AS topic_english,
               t.category AS category
    """, {"hid": hadith_id})

    return HadithDetail(
        id=str(hadith["id"]),
        collection=hadith.get("collection"),
        hadith_number=hadith.get("hadith_number"),
        book_number=hadith.get("book_number"),
        grade=hadith.get("grade"),
        arabic_text=hadith.get("arabic_text"),
        english_text=hadith.get("english_text"),
        urdu_text=hadith.get("urdu_text"),
        sanad_text=hadith.get("sanad"),
        matn_text=hadith.get("matn"),
        narrator_chain=[NarratorInChain(**c) for c in chain],
        topics=[TopicTag(**t) for t in topics],
    )


# ── By collection + number ─────────────────────────────────────────────────

@router.get("/by-ref/{collection}/{number}", response_model=HadithDetail)
async def get_hadith_by_reference(
    collection: str = Path(..., description="Collection name, e.g. 'Sahih Bukhari'"),
    number: int = Path(..., description="Hadith number within the collection"),
):
    """Lookup hadith by collection name + hadith number."""
    row = db.pg_query_one("""
        SELECT h.id FROM hadiths h
        JOIN collections c ON c.id = h.collection_id
        WHERE c.name_english = %s AND h.hadith_number = %s
    """, (collection, number))
    if not row:
        raise HTTPException(status_code=404, detail=f"Hadith {collection} #{number} not found")
    return await get_hadith(str(row["id"]))


# ── List by collection (paginated) ─────────────────────────────────────────

@router.get("/collection/{collection_name}", response_model=PaginatedResponse)
async def list_by_collection(
    collection_name: str = Path(..., description="Collection name"),
    page: int = Query(1, ge=1, description="Page number"),
    per_page: int = Query(20, ge=1, le=100, description="Items per page"),
):
    """List hadiths in a collection with pagination."""
    total = db.pg_count("""
        SELECT count(*) FROM hadiths h
        JOIN collections c ON c.id = h.collection_id
        WHERE c.name_english = %s
    """, (collection_name,))

    offset = (page - 1) * per_page
    rows = db.pg_query("""
        SELECT h.id, c.name_english AS collection, h.hadith_number,
               h.grade, LEFT(h.arabic_text, 300) AS arabic_text
        FROM hadiths h
        JOIN collections c ON c.id = h.collection_id
        WHERE c.name_english = %s
        ORDER BY h.hadith_number
        LIMIT %s OFFSET %s
    """, (collection_name, per_page, offset))

    data = [HadithSummary(id=str(r["id"]), **{k: r[k] for k in r if k != "id"}) for r in rows]
    return PaginatedResponse(data=data, meta=_paginate(total, page, per_page))


# ── Keyword search (paginated, normalized) ─────────────────────────────────

@router.get("/search/keyword", response_model=PaginatedResponse)
async def search_keyword(
    q: str = Query(..., min_length=2, description="Arabic keyword(s) — diacritics stripped automatically"),
    collection: Optional[str] = Query(None, description="Filter by collection"),
    page: int = Query(1, ge=1),
    per_page: int = Query(20, ge=1, le=100),
):
    """Keyword search in Arabic hadith text. Query is normalized for consistent matching."""
    q_norm = normalize_query(q)

    where = "WHERE h.arabic_text ILIKE %s"
    params: list = [f"%{q_norm}%"]
    if collection:
        where += " AND c.name_english = %s"
        params.append(collection)

    total = db.pg_count(
        f"SELECT count(*) FROM hadiths h JOIN collections c ON c.id = h.collection_id {where}",
        tuple(params),
    )
    offset = (page - 1) * per_page
    rows = db.pg_query(
        f"SELECT h.id, c.name_english AS collection, h.hadith_number, "
        f"h.grade, LEFT(h.arabic_text, 300) AS arabic_text "
        f"FROM hadiths h JOIN collections c ON c.id = h.collection_id "
        f"{where} ORDER BY c.name_english, h.hadith_number "
        f"LIMIT %s OFFSET %s",
        tuple(params + [per_page, offset]),
    )

    data = [HadithSummary(id=str(r["id"]), **{k: r[k] for k in r if k != "id"}) for r in rows]
    return PaginatedResponse(data=data, meta=_paginate(total, page, per_page))


# ── Search by topic (paginated, normalized) ────────────────────────────────

@router.get("/search/topic", response_model=PaginatedResponse)
async def search_by_topic(
    q: str = Query(..., min_length=2, description="Topic keyword (Arabic or English)"),
    page: int = Query(1, ge=1),
    per_page: int = Query(20, ge=1, le=100),
):
    """Find hadiths by topic tag from the knowledge graph."""
    q_norm = normalize_query(q)
    skip = (page - 1) * per_page

    total = db.neo4j_count("""
        MATCH (t:Topic)<-[:HAS_TOPIC]-(h:Hadith)
        WHERE toLower(t.topic_arabic) CONTAINS toLower($q)
           OR toLower(t.topic_english) CONTAINS toLower($q)
        RETURN count(DISTINCT h) AS count
    """, {"q": q_norm})

    rows = db.neo4j_query("""
        MATCH (t:Topic)<-[:HAS_TOPIC]-(h:Hadith)
        WHERE toLower(t.topic_arabic) CONTAINS toLower($q)
           OR toLower(t.topic_english) CONTAINS toLower($q)
        RETURN DISTINCT h.id AS id,
               h.collection AS collection,
               h.hadith_number AS hadith_number,
               h.grade AS grade,
               substring(h.arabic_text, 0, 300) AS arabic_text
        ORDER BY h.collection, h.hadith_number
        SKIP $skip LIMIT $limit
    """, {"q": q_norm, "skip": skip, "limit": per_page})

    data = [HadithSummary(**r) for r in rows]
    return PaginatedResponse(data=data, meta=_paginate(total, page, per_page))


# ── Search by narrator (paginated, normalized) ─────────────────────────────

@router.get("/search/narrator", response_model=PaginatedResponse)
async def search_by_narrator(
    q: str = Query(..., min_length=2, description="Narrator name (Arabic)"),
    page: int = Query(1, ge=1),
    per_page: int = Query(20, ge=1, le=100),
):
    """Find all hadiths where a narrator appears in the chain."""
    q_norm = normalize_query(q)
    skip = (page - 1) * per_page

    total = db.neo4j_count("""
        MATCH (n:Narrator)-[:APPEARS_IN]->(h:Hadith)
        WHERE toLower(n.name_arabic) CONTAINS toLower($q)
        RETURN count(DISTINCT h) AS count
    """, {"q": q_norm})

    rows = db.neo4j_query("""
        MATCH (n:Narrator)-[:APPEARS_IN]->(h:Hadith)
        WHERE toLower(n.name_arabic) CONTAINS toLower($q)
        RETURN DISTINCT h.id AS id,
               h.collection AS collection,
               h.hadith_number AS hadith_number,
               h.grade AS grade,
               substring(h.arabic_text, 0, 300) AS arabic_text
        ORDER BY h.collection, h.hadith_number
        SKIP $skip LIMIT $limit
    """, {"q": q_norm, "skip": skip, "limit": per_page})

    data = [HadithSummary(**r) for r in rows]
    return PaginatedResponse(data=data, meta=_paginate(total, page, per_page))