52 lines
1.1 KiB
Bash
Executable File
52 lines
1.1 KiB
Bash
Executable File
#!/bin/bash
|
|
# run-full-ingestion.sh
|
|
|
|
set -e
|
|
|
|
echo "=== Starting Full HadithAPI Ingestion ==="
|
|
|
|
# Book slug to collection abbreviation mapping
|
|
# Books to ingest (in order)
|
|
BOOKS=(
|
|
# "sahih-bukhari"
|
|
"sahih-muslim"
|
|
# "abu-dawood"
|
|
# "al-tirmidhi"
|
|
# "ibn-e-majah"
|
|
# "sunan-nasai"
|
|
# "musnad-ahmad"
|
|
# "al-silsila-sahiha"
|
|
)
|
|
|
|
for BOOK in "${BOOKS[@]}"; do
|
|
echo -e "\n========================================="
|
|
echo "Ingesting: $BOOK"
|
|
echo "========================================="
|
|
|
|
argo submit -n ml argo/workflows/ingest-hadithapi.yaml \
|
|
--parameter book-slug=$BOOK \
|
|
--parameter limit=0 \
|
|
--wait \
|
|
--log
|
|
|
|
echo "$BOOK completed!"
|
|
|
|
# Optional: add delay between books
|
|
sleep 10
|
|
done
|
|
|
|
echo -e "\n=== All Books Ingestion Complete ==="
|
|
|
|
# Print summary
|
|
kubectl -n db exec -it postgres-0 -- psql -U hadith_ingest -d hadith_db -c "
|
|
SELECT
|
|
c.name_english,
|
|
c.abbreviation,
|
|
COUNT(h.id) as hadith_count,
|
|
COUNT(DISTINCT b.id) as chapter_count
|
|
FROM collections c
|
|
LEFT JOIN hadiths h ON c.id = h.collection_id
|
|
LEFT JOIN books b ON h.book_id = b.id
|
|
GROUP BY c.name_english, c.abbreviation
|
|
ORDER BY hadith_count DESC;
|
|
" |