correct hadithapi iterators
This commit is contained in:
parent
e4546cd007
commit
53cd6e2415
Binary file not shown.
|
|
@ -0,0 +1,5 @@
|
|||
find . -type f -name "*.txt" -o -name "production" -o -name "*.py" -o -name "*.yaml" -o -name "Dockerfile" -o -name "*.sh" -o -name "*.env" -o -name "*.md" ! -name "*.xls" ! -name "*.xlsx"| while read file; do
|
||||
echo "=== $file ===" >> combined.txt
|
||||
cat "$file" >> combined.txt
|
||||
echo "" >> combined.txt
|
||||
done
|
||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,46 @@
|
|||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: hadith-ingestion-list-books
|
||||
namespace: ml
|
||||
spec:
|
||||
restartPolicy: Never
|
||||
containers:
|
||||
- name: hadith-ingestion
|
||||
image: axxs/hadith-ingestion:latest
|
||||
# command: ["python"]
|
||||
# args: ["/app/src/main_hadithapi.py", "--list-books"]
|
||||
command: ["sh","-c","sleep infinity"]
|
||||
env:
|
||||
- name: DATABASE_HOST
|
||||
value: "postgres.db.svc.cluster.local"
|
||||
- name: DATABASE_PORT
|
||||
value: "5432"
|
||||
- name: DATABASE_NAME
|
||||
value: "hadith_db"
|
||||
- name: DATABASE_USER
|
||||
value: "hadith_ingest"
|
||||
- name: DATABASE_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: hadith-db-secret
|
||||
key: password
|
||||
- name: HADITHAPI_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: hadithapi-secret
|
||||
key: api-key
|
||||
- name: MINIO_ENDPOINT
|
||||
value: "minio.storage.svc.cluster.local:9000"
|
||||
- name: MINIO_ACCESS_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: minio-secret
|
||||
key: access-key
|
||||
- name: MINIO_SECRET_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: minio-secret
|
||||
key: secret-key
|
||||
- name: LOG_LEVEL
|
||||
value: "INFO"
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -1,7 +1,7 @@
|
|||
"""
|
||||
Client for HadithAPI.com API
|
||||
"""
|
||||
from typing import List, Dict, Any, Optional, Generator
|
||||
from typing import List, Dict, Any, Optional, Generator, Tuple
|
||||
import structlog
|
||||
from .base_client import BaseAPIClient
|
||||
from config.settings import settings
|
||||
|
|
@ -45,7 +45,8 @@ class HadithAPIClient(BaseAPIClient):
|
|||
)
|
||||
raise Exception(f"API Error: {response.get('message')}")
|
||||
|
||||
books = response.get('data', [])
|
||||
books = response.get('books', [])
|
||||
|
||||
|
||||
logger.info(
|
||||
"books_fetched",
|
||||
|
|
@ -80,7 +81,8 @@ class HadithAPIClient(BaseAPIClient):
|
|||
)
|
||||
raise Exception(f"API Error: {response.get('message')}")
|
||||
|
||||
chapters = response.get('data', [])
|
||||
chapters = response.get('chapters', [])
|
||||
|
||||
|
||||
logger.info(
|
||||
"chapters_fetched",
|
||||
|
|
@ -127,7 +129,10 @@ class HadithAPIClient(BaseAPIClient):
|
|||
)
|
||||
|
||||
response = self.get("hadiths", params=params)
|
||||
|
||||
# logger.debug(
|
||||
# "fetching_hadiths_page####",
|
||||
# response=response
|
||||
# )
|
||||
if response.get('status') != 200:
|
||||
logger.error(
|
||||
"api_error",
|
||||
|
|
@ -136,7 +141,7 @@ class HadithAPIClient(BaseAPIClient):
|
|||
)
|
||||
raise Exception(f"API Error: {response.get('message')}")
|
||||
|
||||
return response.get('data', {})
|
||||
return response.get('hadiths', {})
|
||||
|
||||
def iter_all_hadiths_in_book(
|
||||
self,
|
||||
|
|
@ -162,15 +167,21 @@ class HadithAPIClient(BaseAPIClient):
|
|||
|
||||
while True:
|
||||
response_data = self.get_hadiths_page(
|
||||
book_id=book_id,
|
||||
book_id=book_slug,
|
||||
chapter_id=chapter_id,
|
||||
page=page,
|
||||
limit=batch_size
|
||||
)
|
||||
|
||||
hadiths = response_data.get('hadiths', [])
|
||||
hadiths = response_data.get('data', [])
|
||||
pagination = response_data.get('pagination', {})
|
||||
|
||||
# logger.info(
|
||||
# "book_complete",
|
||||
# book_slug=book_slug,
|
||||
# hadiths=hadiths,
|
||||
# pagination=pagination,
|
||||
# response = response_data
|
||||
# )
|
||||
if not hadiths:
|
||||
logger.info(
|
||||
"book_complete",
|
||||
|
|
@ -190,12 +201,12 @@ class HadithAPIClient(BaseAPIClient):
|
|||
"progress",
|
||||
book_slug=book_slug,
|
||||
fetched=total_fetched,
|
||||
total=pagination.get('total', '?')
|
||||
total=response_data.get('total', '?')
|
||||
)
|
||||
|
||||
# Check if there are more pages
|
||||
current_page = pagination.get('current_page', page)
|
||||
last_page = pagination.get('last_page', 1)
|
||||
current_page = response_data.get('current_page', page)
|
||||
last_page = response_data.get('last_page', 1)
|
||||
|
||||
if current_page >= last_page:
|
||||
logger.info(
|
||||
|
|
@ -213,7 +224,7 @@ class HadithAPIClient(BaseAPIClient):
|
|||
book_id: int,
|
||||
book_slug: str,
|
||||
batch_size: int = 100
|
||||
) -> Generator[tuple[Dict[str, Any], Optional[Dict[str, Any]]], None, None]:
|
||||
) -> Generator[Tuple[Dict[str, Any], Optional[Dict[str, Any]]], None, None]:
|
||||
"""
|
||||
Iterator that yields all hadiths in a book, organized by chapter
|
||||
|
||||
|
|
|
|||
|
|
@ -151,7 +151,8 @@ class HadithAPIIngestionService:
|
|||
|
||||
book_info = book_mapping[book_slug]
|
||||
collection_id = book_info['collection_id']
|
||||
book_id = book_info['book_id']
|
||||
# book_id = book_info['book_id']
|
||||
book_id = book_slug
|
||||
|
||||
# Create ingestion job
|
||||
job_id = self.repo.create_ingestion_job(
|
||||
|
|
|
|||
|
|
@ -0,0 +1,88 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Quick test script for hadithapi_client.py
|
||||
"""
|
||||
import sys
|
||||
from venv import logger
|
||||
sys.path.insert(0, '/app')
|
||||
|
||||
from src.api_clients.hadithapi_client import HadithAPIClient
|
||||
from config.settings import settings
|
||||
|
||||
def test_api_connection():
|
||||
"""Test basic API connectivity"""
|
||||
print("=== Testing HadithAPI Client ===\n")
|
||||
|
||||
client = HadithAPIClient()
|
||||
|
||||
# Test 1: Get books
|
||||
print("Test 1: Fetching available books...")
|
||||
try:
|
||||
books = client.get_books()
|
||||
print(f"✓ Success! Found {len(books)} books")
|
||||
for book in books[:3]: # Show first 3
|
||||
print(f" - {book.get('bookName')} ({book.get('bookSlug')})")
|
||||
print(f" Hadiths: {book.get('hadiths_count')}, Chapters: {book.get('chapters_count')}")
|
||||
logger.info(f"Fetched {len(books)} books successfully")
|
||||
|
||||
except Exception as e:
|
||||
print(f"✗ Failed: {e}")
|
||||
return False
|
||||
|
||||
# Test 2: Get chapters for Sahih Bukhari
|
||||
print("\nTest 2: Fetching chapters for Sahih Bukhari...")
|
||||
try:
|
||||
chapters = client.get_chapters('sahih-bukhari')
|
||||
print(f"✓ Success! Found {len(chapters)} chapters")
|
||||
if chapters:
|
||||
print(f" First chapter: {chapters[0].get('chapterEnglish')}")
|
||||
except Exception as e:
|
||||
print(f"✗ Failed: {e}")
|
||||
return False
|
||||
|
||||
# Test 3: Fetch first page of hadiths
|
||||
print("\nTest 3: Fetching first page of hadiths...")
|
||||
book_id = None
|
||||
try:
|
||||
book = client.get_book_by_slug('sahih-bukhari')
|
||||
if not book:
|
||||
print("✗ Failed: Book 'sahih-bukhari' not found")
|
||||
return False
|
||||
book_id = book.get('id')
|
||||
page_data = client.get_hadiths_page('sahih-bukhari', page=1, limit=5)
|
||||
hadiths = page_data.get('hadiths', [])
|
||||
print(f"✓ Success! Fetched {len(hadiths)} hadiths")
|
||||
if hadiths:
|
||||
first = hadiths[0]
|
||||
print(f" First hadith number: {first.get('hadithNumber')}")
|
||||
print(f" Arabic text (first 100 chars): {first.get('hadithArabic', '')[:100]}...")
|
||||
except Exception as e:
|
||||
print(f"✗ Failed: {e}")
|
||||
return False
|
||||
|
||||
if book_id is None:
|
||||
print("✗ Failed: Book ID unavailable for iterator test")
|
||||
return False
|
||||
|
||||
# # Test 4: Test iterator (fetch 3 hadiths)
|
||||
print("\nTest 4: Testing hadith iterator (3 hadiths)...")
|
||||
try:
|
||||
count = 0
|
||||
|
||||
for hadith in client.iter_all_hadiths_in_book(book_id='sahih-bukhari', book_slug='sahih-bukhari', batch_size=10):
|
||||
count += 1
|
||||
print(f" Hadith #{hadith.get('hadithNumber')} is {hadith.get('englishNarrator')} and is {hadith.get('status')} ")
|
||||
if count >= 3:
|
||||
break
|
||||
print(f"✓ Success! Iterator working correctly")
|
||||
except Exception as e:
|
||||
print(f"✗ Failed: {e}")
|
||||
return False
|
||||
|
||||
client.close()
|
||||
print("\n=== All Tests Passed! ===")
|
||||
return True
|
||||
|
||||
if __name__ == "__main__":
|
||||
success = test_api_connection()
|
||||
sys.exit(0 if success else 1)
|
||||
Loading…
Reference in New Issue