update ingestion slugs

This commit is contained in:
salahangal 2025-11-17 14:18:52 +01:00
parent 079735a81b
commit 7fdcb1417d
3 changed files with 76 additions and 9 deletions

View File

@ -140,28 +140,43 @@ spec:
arguments: arguments:
parameters: parameters:
- name: book-slug - name: book-slug
value: "sunan-abu-dawood" value: "abu-dawood"
- - name: jami-at-tirmidhi - - name: jami-at-tirmidhi
template: ingest-book template: ingest-book
arguments: arguments:
parameters: parameters:
- name: book-slug - name: book-slug
value: "jami-at-tirmidhi" value: "al-tirmidhi"
- - name: sunan-an-nasai - - name: sunan-an-nasai
template: ingest-book template: ingest-book
arguments: arguments:
parameters: parameters:
- name: book-slug - name: book-slug
value: "sunan-an-nasai" value: "sunan-nasai"
- - name: sunan-ibn-e-majah - - name: sunan-ibn-e-majah
template: ingest-book template: ingest-book
arguments: arguments:
parameters: parameters:
- name: book-slug - name: book-slug
value: "sunan-ibn-e-majah" value: "ibn-e-majah"
- - name: musnad-ahmad
template: ingest-book
arguments:
parameters:
- name: book-slug
value: "musnad-ahmad"
- - name: al-silsila-sahiha
template: ingest-book
arguments:
parameters:
- name: book-slug
value: "al-silsila-sahiha"
# ======================================== # ========================================
# Book ingestion template # Book ingestion template
@ -173,7 +188,7 @@ spec:
container: container:
image: axxs/hadith-ingestion:latest image: axxs/hadith-ingestion:latest
imagePullPolicy: IfNotPresent imagePullPolicy: Always
command: [python, /app/src/main_hadithapi.py] command: [python, /app/src/main_hadithapi.py]
args: args:
- "--book-slug={{inputs.parameters.book-slug}}" - "--book-slug={{inputs.parameters.book-slug}}"

View File

@ -0,0 +1,49 @@
#!/bin/bash
# run-full-ingestion.sh
set -e
echo "=== Starting Full HadithAPI Ingestion ==="
# Books to ingest (in order)
BOOKS=(
"sahih-bukhari"
"sahih-muslim"
"sunan-abu-dawood"
"jami-at-tirmidhi"
"sunan-an-nasai"
"sunan-ibn-e-majah"
)
for BOOK in "${BOOKS[@]}"; do
echo -e "\n========================================="
echo "Ingesting: $BOOK"
echo "========================================="
argo submit -n argo argo/workflows/ingest-hadithapi.yaml \
--parameter book-slug=$BOOK \
--parameter limit=0 \
--wait \
--log
echo "$BOOK completed!"
# Optional: add delay between books
sleep 10
done
echo -e "\n=== All Books Ingestion Complete ==="
# Print summary
kubectl -n db exec -it postgres-0 -- psql -U hadith_ingest -d hadith_db -c "
SELECT
c.name_english,
c.abbreviation,
COUNT(h.id) as hadith_count,
COUNT(DISTINCT b.id) as chapter_count
FROM collections c
LEFT JOIN hadiths h ON c.id = h.collection_id
LEFT JOIN books b ON h.book_id = b.id
GROUP BY c.name_english, c.abbreviation
ORDER BY hadith_count DESC;
"

View File

@ -5,14 +5,17 @@ set -e
echo "=== Starting Full HadithAPI Ingestion ===" echo "=== Starting Full HadithAPI Ingestion ==="
# Book slug to collection abbreviation mapping
# Books to ingest (in order) # Books to ingest (in order)
BOOKS=( BOOKS=(
"sahih-bukhari" "sahih-bukhari"
"sahih-muslim" "sahih-muslim"
"sunan-abu-dawood" "abu-dawood"
"jami-at-tirmidhi" "al-tirmidhi"
"sunan-an-nasai" "ibn-e-majah"
"sunan-ibn-e-majah" "sunan-nasai"
"musnad-ahmad"
"al-silsila-sahiha"
) )
for BOOK in "${BOOKS[@]}"; do for BOOK in "${BOOKS[@]}"; do