From b81d48a008ac577649be4748e2a1f66837028432 Mon Sep 17 00:00:00 2001 From: salah Date: Thu, 26 Feb 2026 22:24:39 +0100 Subject: [PATCH] fix: Update database credentials and Qdrant host in deployment configuration --- README.md | 309 ++++++++++++++++++++++++++++++++++++++++++++ k8s/deployment.yaml | 18 +-- 2 files changed, 318 insertions(+), 9 deletions(-) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..fe2e088 --- /dev/null +++ b/README.md @@ -0,0 +1,309 @@ +# Hadith Scholar API — حَدِيثٌ + +Production-grade REST API for analyzing Islamic hadith literature across 8+ major collections. + +Built with **FastAPI** · **PostgreSQL** · **Neo4j** · **Qdrant** · **Elasticsearch** + +--- + +## Overview + +The Hadith Scholar API provides structured access to ~41,000 hadiths from the major canonical collections, enriched with: + +- **LLM-extracted narrator chains** — structured isnad parsing with entity typing +- **Narrator knowledge graph** — biographies, teacher/student networks, places, tribes (Neo4j) +- **Multilingual semantic search** — find hadiths by meaning in Arabic, English, or Urdu (BGE-M3 + Qdrant) +- **Full-text Arabic search** — morphological analysis with stemming and root extraction (Elasticsearch) +- **Interactive API docs** — Swagger UI with Arabic examples on every endpoint + +### Collections + +| Collection | Arabic | Hadiths | +|------------|--------|---------| +| Sahih Bukhari | صحيح البخاري | 6,986 | +| Sahih Muslim | صحيح مسلم | 15,034 | +| Sunan Abu Dawood | سنن أبي داود | 5,274 | +| Jami` at-Tirmidhi | جامع الترمذي | — | +| Sunan an-Nasa'i | سنن النسائي | 5,758 | +| Sunan Ibn Majah | سنن ابن ماجه | 4,341 | +| Musnad Ahmad | مسند أحمد | — | +| Muwatta Malik | موطأ مالك | — | + +--- + +## API Endpoints + +### Hadiths (`/hadiths`) + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `GET` | `/hadiths/{hadith_id}` | Full hadith details with narrator chain and topics | +| `GET` | `/hadiths/collection/{name}` | Paginated listing by collection | +| `GET` | `/hadiths/number/{collection}/{number}` | Lookup by collection name + hadith number | +| `GET` | `/hadiths/search/keyword?q=صلاة` | Arabic keyword search with filters | +| `GET` | `/hadiths/search/topic/{topic}` | Search by topic tag | +| `GET` | `/hadiths/search/narrator/{name}` | Find hadiths by narrator | + +### Narrators (`/narrators`) + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `GET` | `/narrators/search?q=أبو هريرة` | Search by name (Arabic or transliterated) | +| `GET` | `/narrators/profile/{name_arabic}` | Full biography, hadiths, teachers, students, places | +| `GET` | `/narrators/by-generation/{gen}` | List narrators by طبقة (صحابي, تابعي, etc.) | +| `GET` | `/narrators/by-place/{place}` | Narrators associated with a place | +| `GET` | `/narrators/interactions/{name}` | All relationships for a narrator | +| `GET` | `/narrators/who-met-who?narrator_a=X&narrator_b=Y` | Shortest path between two narrators | + +### Isnad Chains (`/chains`) + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `GET` | `/chains/hadith/{hadith_id}` | Chain as graph (nodes + links) for visualization | +| `GET` | `/chains/narrator/{name}` | All chains containing a narrator | +| `GET` | `/chains/common-chains?narrator_a=X&narrator_b=Y` | Hadiths where both narrators appear | + +### Search (`/search`) + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `GET` | `/search/semantic?q=what did the prophet say about fasting` | Semantic search (any language) | +| `GET` | `/search/fulltext?q=الصلاة` | Arabic full-text with morphological analysis | +| `GET` | `/search/combined?q=صيام رمضان` | Both semantic + full-text in parallel | + +### System + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `GET` | `/` | API info and endpoint listing | +| `GET` | `/health` | Health check (verifies all 4 backends) | +| `GET` | `/stats` | Database statistics | +| `GET` | `/docs` | Swagger UI | +| `GET` | `/redoc` | ReDoc documentation | +| `GET` | `/openapi.json` | OpenAPI 3.1 spec | + +--- + +## Example Requests + +### Search for hadiths about prayer +```bash +curl "https://hadith-api.betelgeusebytes.io/hadiths/search/keyword?q=صلاة&collection=Sahih%20Bukhari&grade=Sahih" +``` + +### Get narrator profile +```bash +curl "https://hadith-api.betelgeusebytes.io/narrators/profile/أبو%20هريرة" +``` + +### Semantic search (English → Arabic results) +```bash +curl "https://hadith-api.betelgeusebytes.io/search/semantic?q=what%20is%20the%20reward%20of%20prayer" +``` + +### Check if two narrators are connected +```bash +curl "https://hadith-api.betelgeusebytes.io/narrators/who-met-who?narrator_a=الزهري&narrator_b=أنس%20بن%20مالك" +``` + +### Get isnad chain for a hadith +```bash +curl "https://hadith-api.betelgeusebytes.io/chains/hadith/{hadith_uuid}" +``` + +--- + +## Architecture + +``` + ┌──────────────────────────────┐ + │ FastAPI Application │ + │ hadith-api.betelgeusebytes.io │ + └─────────┬────────────────────┘ + │ + ┌─────────────────┼─────────────────────┐ + │ │ │ + ┌───────▼──────┐ ┌──────▼───────┐ ┌───────────▼──────────┐ + │ PostgreSQL │ │ Neo4j │ │ Qdrant + TEI │ + │ 41k hadiths │ │ Knowledge │ │ Semantic search │ + │ full text │ │ Graph │ │ 1024-dim BGE-M3 │ + └──────────────┘ │ - Narrators │ └──────────────────────┘ + │ - Chains │ + │ - Places │ ┌──────────────────────┐ + │ - Tribes │ │ Elasticsearch │ + │ - Topics │ │ Arabic full-text │ + └──────────────┘ │ morphological │ + └──────────────────────┘ +``` + +### Backend Responsibilities + +| Backend | What it stores | Used by | +|---------|---------------|---------| +| **PostgreSQL** | Raw hadith text (Arabic/English/Urdu), metadata, grades | `/hadiths/*` keyword search, collection listing | +| **Neo4j** | Narrator graph, isnad chains, topics, places, tribes | `/narrators/*`, `/chains/*`, topic search | +| **Qdrant** | 1024-dim BGE-M3 embeddings for all 41k hadiths | `/search/semantic` | +| **Elasticsearch** | Arabic-analyzed hadith text index | `/search/fulltext` | +| **TEI** | BGE-M3 embedding inference (query → vector) | `/search/semantic` (query encoding) | + +--- + +## Knowledge Graph Model + +``` +(:Narrator)-[:APPEARS_IN {chain_order, transmission_verb}]->(:Hadith) +(:Narrator)-[:NARRATED_FROM {hadith_ids}]->(:Narrator) +(:Narrator)-[:TEACHER_OF]->(:Narrator) +(:Narrator)-[:BORN_IN|LIVED_IN|DIED_IN|TRAVELED_TO]->(:Place) +(:Narrator)-[:BELONGS_TO_TRIBE]->(:Tribe) +(:Hadith)-[:HAS_TOPIC]->(:Topic) +``` + +### Narrator Properties +- `name_arabic` / `name_transliterated` — primary identifiers +- `full_nasab` — complete lineage (فلان بن فلان بن فلان) +- `kunya` — أبو/أم names +- `nisba` — attributional (-i suffix: البخاري، المدني) +- `generation` — طبقة: صحابي، تابعي، تابع التابعين +- `reliability_grade` — جرح وتعديل: ثقة، صدوق، ضعيف +- `biography_summary_arabic` / `biography_summary_english` — bilingual bios +- `birth_year_hijri` / `death_year_hijri` — dates in Hijri calendar + +--- + +## Setup + +### Prerequisites +- Python 3.12+ +- Docker +- Access to PostgreSQL, Neo4j, Qdrant, Elasticsearch, TEI + +### Local Development + +```bash +# Clone +git clone +cd hadith-api + +# Configure +cp .env.example .env +# Edit .env with your credentials + +# Install +pip install -r requirements.txt + +# Run +uvicorn app.main:app --reload --port 8000 + +# Open docs +open http://localhost:8000/docs +``` + +### Environment Variables + +| Variable | Description | Default | +|----------|-------------|---------| +| `HADITH_PG_HOST` | PostgreSQL host | `pg.betelgeusebytes.io` | +| `HADITH_PG_PORT` | PostgreSQL port | `5432` | +| `HADITH_PG_DBNAME` | Database name | — | +| `HADITH_PG_USER` | Database user | — | +| `HADITH_PG_PASSWORD` | Database password | — | +| `HADITH_PG_SSLMODE` | SSL mode | `require` | +| `HADITH_NEO4J_URI` | Neo4j bolt URI | `neo4j+ssc://neo4j.betelgeusebytes.io:7687` | +| `HADITH_NEO4J_USER` | Neo4j user | `neo4j` | +| `HADITH_NEO4J_PASSWORD` | Neo4j password | — | +| `HADITH_QDRANT_HOST` | Qdrant host | `qdrant.vector.svc.cluster.local` | +| `HADITH_QDRANT_PORT` | Qdrant port | `6333` | +| `HADITH_QDRANT_COLLECTION` | Qdrant collection name | `hadiths` | +| `HADITH_ES_HOST` | Elasticsearch URL | `http://elasticsearch.elastic.svc.cluster.local:9200` | +| `HADITH_ES_INDEX` | Elasticsearch index | `hadiths` | +| `HADITH_TEI_URL` | TEI embedding service | `http://tei.ml.svc.cluster.local:80` | + +--- + +## Deployment (Kubernetes) + +### Build & Push + +```bash +docker build -t axxs/hadith-api:latest . +docker push axxs/hadith-api:latest +``` + +### Deploy + +```bash +# Edit secrets in k8s/deployment.yaml first +kubectl apply -f k8s/deployment.yaml + +# Watch rollout +kubectl rollout status deployment/hadith-api -n api + +# Verify +kubectl get pods -n api -l app=hadith-api +curl https://hadith-api.betelgeusebytes.io/health +``` + +### What gets created +- **Namespace**: `api` +- **Secret**: `hadith-api-secrets` (PG + Neo4j credentials) +- **Deployment**: 2 replicas with health checks +- **Service**: ClusterIP on port 80 → container 8000 +- **Ingress**: TLS via cert-manager at `hadith-api.betelgeusebytes.io` + +### Resource Limits +- Requests: 250m CPU, 256Mi RAM per pod +- Limits: 1 CPU, 512Mi RAM per pod + +--- + +## Project Structure + +``` +hadith-api/ +├── app/ +│ ├── main.py # FastAPI app, lifespan, health, stats +│ ├── config.py # Pydantic settings (env vars) +│ ├── models/ +│ │ └── schemas.py # Response models with examples +│ ├── routers/ +│ │ ├── hadiths.py # /hadiths/* — details, search, listing +│ │ ├── narrators.py # /narrators/* — profiles, relationships +│ │ ├── chains.py # /chains/* — isnad visualization +│ │ └── search.py # /search/* — semantic + full-text +│ └── services/ +│ └── database.py # PG, Neo4j, Qdrant, ES connections +├── k8s/ +│ └── deployment.yaml # K8s namespace + secret + deploy + svc + ingress +├── Dockerfile +├── .dockerignore +├── .env.example +├── requirements.txt +├── deploy.sh +└── README.md +``` + +--- + +## Data Pipeline + +The API consumes data produced by the hadith extraction pipeline: + +``` + HadithAPI.com ──► PostgreSQL (41k hadiths, raw text) + │ + ├──► TEI (BGE-M3) ──► Qdrant (embeddings) + ├──► Elasticsearch (full-text index) + └──► LLM Extraction (OpenAI/Gemini) + │ + ├──► Phase A: sanad/matn split, narrator chains, entities, topics + └──► Phase B: narrator biographies from classical scholarship + │ + └──► MinIO (JSON) ──► Neo4j (knowledge graph) +``` + +--- + +## License + +MIT \ No newline at end of file diff --git a/k8s/deployment.yaml b/k8s/deployment.yaml index 7c9dada..41e0e6d 100644 --- a/k8s/deployment.yaml +++ b/k8s/deployment.yaml @@ -10,9 +10,9 @@ metadata: namespace: api type: Opaque stringData: - PG_DBNAME: "REPLACE_ME" - PG_USER: "REPLACE_ME" - PG_PASSWORD: "REPLACE_ME" + PG_DBNAME: "hadith_db4" + PG_USER: "hadith_ingest" + PG_PASSWORD: "hadith_ingest" NEO4J_PASSWORD: "NEO4J-PASS" --- apiVersion: apps/v1 @@ -69,7 +69,7 @@ spec: name: hadith-api-secrets key: NEO4J_PASSWORD - name: HADITH_QDRANT_HOST - value: "qdrant.vector.svc.cluster.local" + value: "qdrant.db.svc.cluster.local" - name: HADITH_QDRANT_PORT value: "6333" - name: HADITH_QDRANT_COLLECTION @@ -118,16 +118,16 @@ kind: Ingress metadata: name: hadith-api-ingress namespace: api - annotations: - cert-manager.io/cluster-issuer: letsencrypt-prod - nginx.ingress.kubernetes.io/proxy-body-size: "10m" - nginx.ingress.kubernetes.io/enable-cors: "true" + # annotations: + # cert-manager.io/cluster-issuer: letsencrypt-prod + # nginx.ingress.kubernetes.io/proxy-body-size: "10m" + # nginx.ingress.kubernetes.io/enable-cors: "true" spec: ingressClassName: nginx tls: - hosts: - api.betelgeusebytes.io - secretName: hadith-api-tls + secretName: wildcard-betelgeusebytes-tls rules: - host: api.betelgeusebytes.io http: