diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 1b60f5c..d8f7671 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -91,3 +91,170 @@ Kubernetes Cluster * End-to-end ML lifecycle * Automated data pipelines * Production observability-first apps + + + +```mermaid + +flowchart TB + %% ========================= + %% BetelgeuseBytes AI Platform – Full Architecture (CPU-first, K8s) + %% ========================= + + %% ---- External / Users ---- + subgraph EXT["External Users & Clients"] + U1["Scholar / Admin User\n(Web Browser)"] + U2["API Client\n(curl / SDK / Bots)"] + U3["Annotator\n(Labeling UI)"] + end + + %% ---- DNS + TLS + Ingress ---- + subgraph EDGE["Edge: DNS → TLS → Ingress"] + DNS["DNS: betelgeusebytes.io\nA/AAAA records → Ingress IP"] + CM["cert-manager\nLet's Encrypt TLS"] + INGRESS["NGINX Ingress Controller\nHTTP(S) + SNI routing"] + TCPMAP["Ingress TCP Services\n(Postgres, Neo4j Bolt)"] + end + + %% ---- Kubernetes Cluster ---- + subgraph K8S["Kubernetes Cluster (Hetzner Dedicated • CPU-only • local SSD PVs)"] + direction TB + + subgraph NET["Networking"] + CILIUM["Cilium CNI\n(eBPF dataplane / policies)"] + WG["WireGuard\n(node mesh / private networking)"] + end + + subgraph DEVOPS["DevOps / GitOps"] + GITEA["Gitea\nGit repos"] + ARGOCD["Argo CD\nGitOps deployments"] + end + + subgraph OBS["Observability (LGTM)"] + ALLOY["Grafana Alloy\n(collect logs+traces)"] + PROM["Prometheus\n(metrics)"] + LOKI["Loki\n(logs)"] + TEMPO["Tempo\n(traces)"] + GRAF["Grafana\n(dashboards)"] + KSM["kube-state-metrics"] + NODEX["node-exporter"] + end + + subgraph DATA["Core Data Layer"] + PG["PostgreSQL\n(app DB / MLflow / Label Studio)\nNamespace: db"] + REDIS["Redis\n(cache)\nNamespace: db"] + ES["Elasticsearch\n(search/log store)\nNamespace: elastic"] + KIB["Kibana\nUI\nNamespace: elastic"] + KAFKA["Kafka\n(event bus)\nNamespace: broker"] + KAFKAUI["Kafka UI\nUI\nNamespace: broker"] + MINIO["MinIO (S3)\n(datasets & artifacts)\nNamespace: storage"] + end + + subgraph KG["Knowledge & Retrieval"] + NEO4J["Neo4j\n(knowledge graph)\nNamespace: graph"] + QDRANT["Qdrant\n(vector DB + UI)\nNamespace: vec"] + TEI["Text Embeddings Inference\n(embeddings API)\nNamespace: ai"] + end + + subgraph AI["AI / ML Services"] + LLM["LLM Server (CPU)\nOllama / llama.cpp\nNamespace: ai"] + JUP["Jupyter\n(research notebooks)\nNamespace: ml"] + LABEL["Label Studio\n(annotation UI)\nNamespace: ai"] + MLFLOW["MLflow\n(tracking + registry)\nNamespace: mlops/ml"] + end + + subgraph PIPE["Automation / Pipelines"] + ARGO_WF["Argo Workflows\n(pipelines)\nNamespace: ml/argo"] + N8N["n8n\n(automation)\nNamespace: automation"] + end + + subgraph AUTH["Authentication"] + KEYCLOAK["Keycloak\n(OIDC/SSO)\nNamespace: auth"] + end + + subgraph APPS["Custom Applications (to build)"] + ORCH["Hadith Orchestrator API\n(FastAPI)\nNamespace: hadith"] + ADMIN["Hadith Admin UI\n(Next.js)\nNamespace: hadith"] + NER["NER Service\n(custom)\nNamespace: hadith"] + RE["Relation Extraction Service\n(custom)\nNamespace: hadith"] + end + end + + %% ---- Edge wiring ---- + U1 --> DNS + U2 --> DNS + U3 --> DNS + DNS --> INGRESS + CM --> INGRESS + + %% ---- Public HTTP(S) routes ---- + INGRESS -->|hadith-admin.betelgeusebytes.io| ADMIN + INGRESS -->|hadith-api.betelgeusebytes.io| ORCH + INGRESS -->|llm.betelgeusebytes.io| LLM + INGRESS -->|embeddings.betelgeusebytes.io| TEI + INGRESS -->|vector.betelgeusebytes.io| QDRANT + INGRESS -->|neo4j.betelgeusebytes.io| NEO4J + INGRESS -->|label.betelgeusebytes.io| LABEL + INGRESS -->|mlflow.betelgeusebytes.io| MLFLOW + INGRESS -->|minio.betelgeusebytes.io| MINIO + INGRESS -->|argo.betelgeusebytes.io| ARGO_WF + INGRESS -->|auth.betelgeusebytes.io| KEYCLOAK + INGRESS -->|grafana.betelgeusebytes.io| GRAF + INGRESS -->|kibana.betelgeusebytes.io| KIB + INGRESS -->|broker.betelgeusebytes.io| KAFKAUI + + %% ---- TCP routes (optional/external) ---- + TCPMAP -.-> PG + TCPMAP -.-> NEO4J + + %% ---- GitOps flow ---- + GITEA -->|manifests + app code| ARGOCD + ARGOCD -->|sync/apply| K8S + + %% ---- Auth flows ---- + ADMIN -->|OIDC login| KEYCLOAK + ORCH -->|validate JWT / introspect| KEYCLOAK + LABEL -->|optional OIDC| KEYCLOAK + MLFLOW -->|OIDC| KEYCLOAK + + %% ---- Orchestrator runtime data flows ---- + ORCH -->|reasoning / JSON extraction| LLM + ORCH -->|embed queries/docs| TEI + ORCH -->|vector search| QDRANT + ORCH -->|graph read/write| NEO4J + ORCH -->|metadata/users/jobs| PG + ORCH -->|cache| REDIS + ORCH -->|full-text search| ES + + %% ---- NER/RE services (future) ---- + ORCH --> NER + ORCH --> RE + NER -->|entities| NEO4J + RE -->|relations| NEO4J + + %% ---- Data curation loop ---- + LABEL -->|labeled datasets| MINIO + ARGO_WF -->|training data| MINIO + ARGO_WF -->|log metrics| MLFLOW + ARGO_WF -->|publish artifacts| MINIO + MLFLOW -->|model versions| MINIO + ARGO_WF -->|deploy/update services| ARGOCD + + %% ---- Event-driven (optional) ---- + ORCH -->|events| KAFKA + ARGO_WF -->|consume triggers| KAFKA + N8N -->|integrations/alerts| KAFKA + + %% ---- Observability wiring ---- + ALLOY --> LOKI + ALLOY --> TEMPO + PROM --> GRAF + LOKI --> GRAF + TEMPO --> GRAF + KSM --> PROM + NODEX --> PROM + + %% ---- Internal networking ---- + CILIUM --- INGRESS + WG --- CILIUM +