diff --git a/hadith-ingestion/argo/workflows/ingest-hadithapi.yaml b/hadith-ingestion/argo/workflows/ingest-hadithapi.yaml index e51c6ce..b604ad3 100644 --- a/hadith-ingestion/argo/workflows/ingest-hadithapi.yaml +++ b/hadith-ingestion/argo/workflows/ingest-hadithapi.yaml @@ -140,28 +140,43 @@ spec: arguments: parameters: - name: book-slug - value: "sunan-abu-dawood" + value: "abu-dawood" - - name: jami-at-tirmidhi template: ingest-book arguments: parameters: - name: book-slug - value: "jami-at-tirmidhi" + value: "al-tirmidhi" - - name: sunan-an-nasai template: ingest-book arguments: parameters: - name: book-slug - value: "sunan-an-nasai" + value: "sunan-nasai" - - name: sunan-ibn-e-majah template: ingest-book arguments: parameters: - name: book-slug - value: "sunan-ibn-e-majah" + value: "ibn-e-majah" + + - - name: musnad-ahmad + template: ingest-book + arguments: + parameters: + - name: book-slug + value: "musnad-ahmad" + + + - - name: al-silsila-sahiha + template: ingest-book + arguments: + parameters: + - name: book-slug + value: "al-silsila-sahiha" # ======================================== # Book ingestion template @@ -173,7 +188,7 @@ spec: container: image: axxs/hadith-ingestion:latest - imagePullPolicy: IfNotPresent + imagePullPolicy: Always command: [python, /app/src/main_hadithapi.py] args: - "--book-slug={{inputs.parameters.book-slug}}" diff --git a/hadith-ingestion/full-ingestion.sh b/hadith-ingestion/full-ingestion.sh new file mode 100644 index 0000000..2906b7e --- /dev/null +++ b/hadith-ingestion/full-ingestion.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# run-full-ingestion.sh + +set -e + +echo "=== Starting Full HadithAPI Ingestion ===" + +# Books to ingest (in order) +BOOKS=( + "sahih-bukhari" + "sahih-muslim" + "sunan-abu-dawood" + "jami-at-tirmidhi" + "sunan-an-nasai" + "sunan-ibn-e-majah" +) + +for BOOK in "${BOOKS[@]}"; do + echo -e "\n=========================================" + echo "Ingesting: $BOOK" + echo "=========================================" + + argo submit -n argo argo/workflows/ingest-hadithapi.yaml \ + --parameter book-slug=$BOOK \ + --parameter limit=0 \ + --wait \ + --log + + echo "$BOOK completed!" + + # Optional: add delay between books + sleep 10 +done + +echo -e "\n=== All Books Ingestion Complete ===" + +# Print summary +kubectl -n db exec -it postgres-0 -- psql -U hadith_ingest -d hadith_db -c " +SELECT + c.name_english, + c.abbreviation, + COUNT(h.id) as hadith_count, + COUNT(DISTINCT b.id) as chapter_count +FROM collections c +LEFT JOIN hadiths h ON c.id = h.collection_id +LEFT JOIN books b ON h.book_id = b.id +GROUP BY c.name_english, c.abbreviation +ORDER BY hadith_count DESC; +" \ No newline at end of file diff --git a/hadith-ingestion/run-full-ingestion.sh b/hadith-ingestion/run-full-ingestion.sh index 2906b7e..9f41a10 100755 --- a/hadith-ingestion/run-full-ingestion.sh +++ b/hadith-ingestion/run-full-ingestion.sh @@ -5,14 +5,17 @@ set -e echo "=== Starting Full HadithAPI Ingestion ===" +# Book slug to collection abbreviation mapping # Books to ingest (in order) BOOKS=( "sahih-bukhari" "sahih-muslim" - "sunan-abu-dawood" - "jami-at-tirmidhi" - "sunan-an-nasai" - "sunan-ibn-e-majah" + "abu-dawood" + "al-tirmidhi" + "ibn-e-majah" + "sunan-nasai" + "musnad-ahmad" + "al-silsila-sahiha" ) for BOOK in "${BOOKS[@]}"; do