update ingestion slugs
This commit is contained in:
parent
079735a81b
commit
7fdcb1417d
|
|
@ -140,28 +140,43 @@ spec:
|
||||||
arguments:
|
arguments:
|
||||||
parameters:
|
parameters:
|
||||||
- name: book-slug
|
- name: book-slug
|
||||||
value: "sunan-abu-dawood"
|
value: "abu-dawood"
|
||||||
|
|
||||||
- - name: jami-at-tirmidhi
|
- - name: jami-at-tirmidhi
|
||||||
template: ingest-book
|
template: ingest-book
|
||||||
arguments:
|
arguments:
|
||||||
parameters:
|
parameters:
|
||||||
- name: book-slug
|
- name: book-slug
|
||||||
value: "jami-at-tirmidhi"
|
value: "al-tirmidhi"
|
||||||
|
|
||||||
- - name: sunan-an-nasai
|
- - name: sunan-an-nasai
|
||||||
template: ingest-book
|
template: ingest-book
|
||||||
arguments:
|
arguments:
|
||||||
parameters:
|
parameters:
|
||||||
- name: book-slug
|
- name: book-slug
|
||||||
value: "sunan-an-nasai"
|
value: "sunan-nasai"
|
||||||
|
|
||||||
- - name: sunan-ibn-e-majah
|
- - name: sunan-ibn-e-majah
|
||||||
template: ingest-book
|
template: ingest-book
|
||||||
arguments:
|
arguments:
|
||||||
parameters:
|
parameters:
|
||||||
- name: book-slug
|
- name: book-slug
|
||||||
value: "sunan-ibn-e-majah"
|
value: "ibn-e-majah"
|
||||||
|
|
||||||
|
- - name: musnad-ahmad
|
||||||
|
template: ingest-book
|
||||||
|
arguments:
|
||||||
|
parameters:
|
||||||
|
- name: book-slug
|
||||||
|
value: "musnad-ahmad"
|
||||||
|
|
||||||
|
|
||||||
|
- - name: al-silsila-sahiha
|
||||||
|
template: ingest-book
|
||||||
|
arguments:
|
||||||
|
parameters:
|
||||||
|
- name: book-slug
|
||||||
|
value: "al-silsila-sahiha"
|
||||||
|
|
||||||
# ========================================
|
# ========================================
|
||||||
# Book ingestion template
|
# Book ingestion template
|
||||||
|
|
@ -173,7 +188,7 @@ spec:
|
||||||
|
|
||||||
container:
|
container:
|
||||||
image: axxs/hadith-ingestion:latest
|
image: axxs/hadith-ingestion:latest
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: Always
|
||||||
command: [python, /app/src/main_hadithapi.py]
|
command: [python, /app/src/main_hadithapi.py]
|
||||||
args:
|
args:
|
||||||
- "--book-slug={{inputs.parameters.book-slug}}"
|
- "--book-slug={{inputs.parameters.book-slug}}"
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,49 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# run-full-ingestion.sh
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
echo "=== Starting Full HadithAPI Ingestion ==="
|
||||||
|
|
||||||
|
# Books to ingest (in order)
|
||||||
|
BOOKS=(
|
||||||
|
"sahih-bukhari"
|
||||||
|
"sahih-muslim"
|
||||||
|
"sunan-abu-dawood"
|
||||||
|
"jami-at-tirmidhi"
|
||||||
|
"sunan-an-nasai"
|
||||||
|
"sunan-ibn-e-majah"
|
||||||
|
)
|
||||||
|
|
||||||
|
for BOOK in "${BOOKS[@]}"; do
|
||||||
|
echo -e "\n========================================="
|
||||||
|
echo "Ingesting: $BOOK"
|
||||||
|
echo "========================================="
|
||||||
|
|
||||||
|
argo submit -n argo argo/workflows/ingest-hadithapi.yaml \
|
||||||
|
--parameter book-slug=$BOOK \
|
||||||
|
--parameter limit=0 \
|
||||||
|
--wait \
|
||||||
|
--log
|
||||||
|
|
||||||
|
echo "$BOOK completed!"
|
||||||
|
|
||||||
|
# Optional: add delay between books
|
||||||
|
sleep 10
|
||||||
|
done
|
||||||
|
|
||||||
|
echo -e "\n=== All Books Ingestion Complete ==="
|
||||||
|
|
||||||
|
# Print summary
|
||||||
|
kubectl -n db exec -it postgres-0 -- psql -U hadith_ingest -d hadith_db -c "
|
||||||
|
SELECT
|
||||||
|
c.name_english,
|
||||||
|
c.abbreviation,
|
||||||
|
COUNT(h.id) as hadith_count,
|
||||||
|
COUNT(DISTINCT b.id) as chapter_count
|
||||||
|
FROM collections c
|
||||||
|
LEFT JOIN hadiths h ON c.id = h.collection_id
|
||||||
|
LEFT JOIN books b ON h.book_id = b.id
|
||||||
|
GROUP BY c.name_english, c.abbreviation
|
||||||
|
ORDER BY hadith_count DESC;
|
||||||
|
"
|
||||||
|
|
@ -5,14 +5,17 @@ set -e
|
||||||
|
|
||||||
echo "=== Starting Full HadithAPI Ingestion ==="
|
echo "=== Starting Full HadithAPI Ingestion ==="
|
||||||
|
|
||||||
|
# Book slug to collection abbreviation mapping
|
||||||
# Books to ingest (in order)
|
# Books to ingest (in order)
|
||||||
BOOKS=(
|
BOOKS=(
|
||||||
"sahih-bukhari"
|
"sahih-bukhari"
|
||||||
"sahih-muslim"
|
"sahih-muslim"
|
||||||
"sunan-abu-dawood"
|
"abu-dawood"
|
||||||
"jami-at-tirmidhi"
|
"al-tirmidhi"
|
||||||
"sunan-an-nasai"
|
"ibn-e-majah"
|
||||||
"sunan-ibn-e-majah"
|
"sunan-nasai"
|
||||||
|
"musnad-ahmad"
|
||||||
|
"al-silsila-sahiha"
|
||||||
)
|
)
|
||||||
|
|
||||||
for BOOK in "${BOOKS[@]}"; do
|
for BOOK in "${BOOKS[@]}"; do
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue