hadith-ingestion/hadith-ingestion/run-full-ingestion.sh

#!/bin/bash
# run-full-ingestion.sh

set -e

echo "=== Starting Full HadithAPI Ingestion ==="

# Book slug to collection abbreviation mapping
# Books to ingest (in order)
BOOKS=(
  "sahih-bukhari"
  "sahih-muslim"
  "abu-dawood"
  "al-tirmidhi"
  "ibn-e-majah"
  "sunan-nasai"
  "musnad-ahmad"
  "al-silsila-sahiha"
)

for BOOK in "${BOOKS[@]}"; do
  echo -e "\n========================================="
  echo "Ingesting: $BOOK"
  echo "========================================="

  argo submit -n argo argo/workflows/ingest-hadithapi.yaml \
    --parameter book-slug=$BOOK \
    --parameter limit=0 \
    --wait \
    --log

  echo "$BOOK completed!"

  # Optional: add delay between books
  sleep 10
done

echo -e "\n=== All Books Ingestion Complete ==="

# Print summary
kubectl -n db exec -it postgres-0 -- psql -U hadith_ingest -d hadith_db -c "
SELECT
  c.name_english,
  c.abbreviation,
  COUNT(h.id) as hadith_count,
  COUNT(DISTINCT b.id) as chapter_count
FROM collections c
LEFT JOIN hadiths h ON c.id = h.collection_id
LEFT JOIN books b ON h.book_id = b.id
GROUP BY c.name_english, c.abbreviation
ORDER BY hadith_count DESC;
"