update ingestion slugs
This commit is contained in:
parent
079735a81b
commit
7fdcb1417d
|
|
@ -140,28 +140,43 @@ spec:
|
|||
arguments:
|
||||
parameters:
|
||||
- name: book-slug
|
||||
value: "sunan-abu-dawood"
|
||||
value: "abu-dawood"
|
||||
|
||||
- - name: jami-at-tirmidhi
|
||||
template: ingest-book
|
||||
arguments:
|
||||
parameters:
|
||||
- name: book-slug
|
||||
value: "jami-at-tirmidhi"
|
||||
value: "al-tirmidhi"
|
||||
|
||||
- - name: sunan-an-nasai
|
||||
template: ingest-book
|
||||
arguments:
|
||||
parameters:
|
||||
- name: book-slug
|
||||
value: "sunan-an-nasai"
|
||||
value: "sunan-nasai"
|
||||
|
||||
- - name: sunan-ibn-e-majah
|
||||
template: ingest-book
|
||||
arguments:
|
||||
parameters:
|
||||
- name: book-slug
|
||||
value: "sunan-ibn-e-majah"
|
||||
value: "ibn-e-majah"
|
||||
|
||||
- - name: musnad-ahmad
|
||||
template: ingest-book
|
||||
arguments:
|
||||
parameters:
|
||||
- name: book-slug
|
||||
value: "musnad-ahmad"
|
||||
|
||||
|
||||
- - name: al-silsila-sahiha
|
||||
template: ingest-book
|
||||
arguments:
|
||||
parameters:
|
||||
- name: book-slug
|
||||
value: "al-silsila-sahiha"
|
||||
|
||||
# ========================================
|
||||
# Book ingestion template
|
||||
|
|
@ -173,7 +188,7 @@ spec:
|
|||
|
||||
container:
|
||||
image: axxs/hadith-ingestion:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
imagePullPolicy: Always
|
||||
command: [python, /app/src/main_hadithapi.py]
|
||||
args:
|
||||
- "--book-slug={{inputs.parameters.book-slug}}"
|
||||
|
|
|
|||
|
|
@ -0,0 +1,49 @@
|
|||
#!/bin/bash
|
||||
# run-full-ingestion.sh
|
||||
|
||||
set -e
|
||||
|
||||
echo "=== Starting Full HadithAPI Ingestion ==="
|
||||
|
||||
# Books to ingest (in order)
|
||||
BOOKS=(
|
||||
"sahih-bukhari"
|
||||
"sahih-muslim"
|
||||
"sunan-abu-dawood"
|
||||
"jami-at-tirmidhi"
|
||||
"sunan-an-nasai"
|
||||
"sunan-ibn-e-majah"
|
||||
)
|
||||
|
||||
for BOOK in "${BOOKS[@]}"; do
|
||||
echo -e "\n========================================="
|
||||
echo "Ingesting: $BOOK"
|
||||
echo "========================================="
|
||||
|
||||
argo submit -n argo argo/workflows/ingest-hadithapi.yaml \
|
||||
--parameter book-slug=$BOOK \
|
||||
--parameter limit=0 \
|
||||
--wait \
|
||||
--log
|
||||
|
||||
echo "$BOOK completed!"
|
||||
|
||||
# Optional: add delay between books
|
||||
sleep 10
|
||||
done
|
||||
|
||||
echo -e "\n=== All Books Ingestion Complete ==="
|
||||
|
||||
# Print summary
|
||||
kubectl -n db exec -it postgres-0 -- psql -U hadith_ingest -d hadith_db -c "
|
||||
SELECT
|
||||
c.name_english,
|
||||
c.abbreviation,
|
||||
COUNT(h.id) as hadith_count,
|
||||
COUNT(DISTINCT b.id) as chapter_count
|
||||
FROM collections c
|
||||
LEFT JOIN hadiths h ON c.id = h.collection_id
|
||||
LEFT JOIN books b ON h.book_id = b.id
|
||||
GROUP BY c.name_english, c.abbreviation
|
||||
ORDER BY hadith_count DESC;
|
||||
"
|
||||
|
|
@ -5,14 +5,17 @@ set -e
|
|||
|
||||
echo "=== Starting Full HadithAPI Ingestion ==="
|
||||
|
||||
# Book slug to collection abbreviation mapping
|
||||
# Books to ingest (in order)
|
||||
BOOKS=(
|
||||
"sahih-bukhari"
|
||||
"sahih-muslim"
|
||||
"sunan-abu-dawood"
|
||||
"jami-at-tirmidhi"
|
||||
"sunan-an-nasai"
|
||||
"sunan-ibn-e-majah"
|
||||
"abu-dawood"
|
||||
"al-tirmidhi"
|
||||
"ibn-e-majah"
|
||||
"sunan-nasai"
|
||||
"musnad-ahmad"
|
||||
"al-silsila-sahiha"
|
||||
)
|
||||
|
||||
for BOOK in "${BOOKS[@]}"; do
|
||||
|
|
|
|||
Loading…
Reference in New Issue