hadith-ingestion/hadith-ingestion/run-full-ingestion.sh

52 lines
1.1 KiB
Bash
Executable File

#!/bin/bash
# run-full-ingestion.sh
set -e
echo "=== Starting Full HadithAPI Ingestion ==="
# Book slug to collection abbreviation mapping
# Books to ingest (in order)
BOOKS=(
"sahih-bukhari"
"sahih-muslim"
"abu-dawood"
"al-tirmidhi"
"ibn-e-majah"
"sunan-nasai"
"musnad-ahmad"
"al-silsila-sahiha"
)
for BOOK in "${BOOKS[@]}"; do
echo -e "\n========================================="
echo "Ingesting: $BOOK"
echo "========================================="
argo submit -n argo argo/workflows/ingest-hadithapi.yaml \
--parameter book-slug=$BOOK \
--parameter limit=0 \
--wait \
--log
echo "$BOOK completed!"
# Optional: add delay between books
sleep 10
done
echo -e "\n=== All Books Ingestion Complete ==="
# Print summary
kubectl -n db exec -it postgres-0 -- psql -U hadith_ingest -d hadith_db -c "
SELECT
c.name_english,
c.abbreviation,
COUNT(h.id) as hadith_count,
COUNT(DISTINCT b.id) as chapter_count
FROM collections c
LEFT JOIN hadiths h ON c.id = h.collection_id
LEFT JOIN books b ON h.book_id = b.id
GROUP BY c.name_english, c.abbreviation
ORDER BY hadith_count DESC;
"