diff --git a/ansible/inventories/prod/hosts-ha.ini b/ansible/inventories/prod/hosts-ha.ini new file mode 100644 index 0000000..06e7207 --- /dev/null +++ b/ansible/inventories/prod/hosts-ha.ini @@ -0,0 +1,22 @@ + +[k8s_control_plane] +hetzner-1 ansible_host=95.217.89.53 public_ip=95.217.89.53 wg_address=10.66.0.11 + +[new_control_planes] +cp-1 ansible_host=89.167.73.124 public_ip=89.167.73.124 wg_address=10.66.0.1 +cp-2 ansible_host=46.62.155.1 public_ip=46.62.155.1 wg_address=10.66.0.2 +cp-3 ansible_host=37.27.200.183 public_ip=37.27.200.183 wg_address=10.66.0.3 + +[k8s_workers] +hetzner-1 ansible_host=95.217.89.53 public_ip=95.217.89.53 wg_address=10.66.0.11 +hetzner-2 ansible_host=138.201.254.97 public_ip=138.201.254.97 wg_address=10.66.0.12 + +[k8s_nodes:children] +k8s_control_plane +k8s_workers +new_control_planes + +[all:vars] +ansible_user=root +ansible_password=3Lcd0504 +ansible_become=true \ No newline at end of file diff --git a/ansible/playbooks/bootstrap-new-cps.yaml b/ansible/playbooks/bootstrap-new-cps.yaml new file mode 100644 index 0000000..830d259 --- /dev/null +++ b/ansible/playbooks/bootstrap-new-cps.yaml @@ -0,0 +1,60 @@ +--- +# Step 1: Install base packages on new CP nodes +- hosts: new_control_planes + become: yes + roles: + - common + - wireguard + - containerd + - kubernetes + +# Step 2: Update WireGuard on existing nodes to know about new peers +- hosts: k8s_workers + become: yes + roles: + - wireguard + +# Step 3: Get join credentials from existing CP +- hosts: k8s_control_plane[0] + become: yes + roles: + - kubeadm_cp_discovery + +# Step 4: Join new nodes as control planes +- hosts: new_control_planes + become: yes + serial: 1 # Join one at a time for safety + tasks: + - name: Join as control plane + command: >- + {{ hostvars[groups['k8s_control_plane'][0]].kubeadm_cp_join_cmd }} + --control-plane-endpoint cp.k8s.betelgeusebytes.io:6443 + --apiserver-advertise-address {{ wg_address }} + args: + creates: /etc/kubernetes/kubelet.conf + + - name: Setup kubeconfig + shell: | + mkdir -p /root/.kube + cp -f /etc/kubernetes/admin.conf /root/.kube/config + + - name: Update kubelet server to DNS endpoint + replace: + path: /etc/kubernetes/kubelet.conf + regexp: 'server: https://[0-9.]+:6443' + replace: 'server: https://cp.k8s.betelgeusebytes.io:6443' + + - name: Update admin.conf server to DNS endpoint + replace: + path: /etc/kubernetes/admin.conf + regexp: 'server: https://[0-9.]+:6443' + replace: 'server: https://cp.k8s.betelgeusebytes.io:6443' + + - name: Restart kubelet + service: + name: kubelet + state: restarted + + - name: Taint node as control-plane only + command: kubectl taint nodes {{ inventory_hostname }} node-role.kubernetes.io/control-plane:NoSchedule --overwrite + delegate_to: "{{ groups['k8s_control_plane'][0] }}" \ No newline at end of file diff --git a/ansible/roles/wireguard/vars/main.yml b/ansible/roles/wireguard/vars/main.yml index f908d00..373b271 100644 --- a/ansible/roles/wireguard/vars/main.yml +++ b/ansible/roles/wireguard/vars/main.yml @@ -2,5 +2,8 @@ wg_interface: wg0 wg_port: 51820 wg_cidr: 10.66.0.0/24 wg_nodes: + cp-1: { address: 10.66.0.1, public_ip: "89.167.73.124" } + cp-2: { address: 10.66.0.2, public_ip: "46.62.155.1" } + cp-3: { address: 10.66.0.3, public_ip: "37.27.200.183" } hetzner-1: { address: 10.66.0.11, public_ip: "95.217.89.53" } hetzner-2: { address: 10.66.0.12, public_ip: "138.201.254.97" } diff --git a/k8s/vllm/pv-pvc.yaml b/k8s/vllm/pv-pvc.yaml new file mode 100644 index 0000000..9bbb6b6 --- /dev/null +++ b/k8s/vllm/pv-pvc.yaml @@ -0,0 +1,30 @@ +# PV +apiVersion: v1 +kind: PersistentVolume +metadata: + name: pv-vllm +spec: + capacity: + storage: 60Gi + accessModes: + - ReadWriteOnce + persistentVolumeReclaimPolicy: Retain + storageClassName: local-ssd-hetzner + local: + path: /mnt/local-ssd/vllm + nodeAffinity: + required: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - hetzner-2 +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: { name: ollama-data, namespace: ml } +spec: + accessModes: ["ReadWriteOnce"] + storageClassName: local-ssd-hetzner + resources: { requests: { storage: 60Gi } } \ No newline at end of file diff --git a/k8s/vllm/vllm.yaml b/k8s/vllm/vllm.yaml index 1d7fb6f..1235991 100644 --- a/k8s/vllm/vllm.yaml +++ b/k8s/vllm/vllm.yaml @@ -5,7 +5,7 @@ metadata: name: pv-vllm spec: capacity: - storage: 50Gi + storage: 100Gi accessModes: - ReadWriteOnce persistentVolumeReclaimPolicy: Retain @@ -50,8 +50,35 @@ spec: ollama serve & # start a temp daemon sleep 2 # pull one or more small, quantized models for CPU + echo "Pulling model pack (CPU-friendly, reliable families)..." ollama pull qwen2.5:3b-instruct-q4_K_M || true ollama pull llama3.2:3b-instruct-q4_K_M || true + # --- QWEN 3 (latest generation) --- + # fast + instruct + ollama pull qwen3:4b-instruct || true + # balanced default + ollama pull qwen3:8b || true + # higher quality (slower on CPU) + # ollama pull qwen3:14b || true + + # --- CODING --- + ollama pull qwen2.5-coder:7b || true + + # --- REASONING --- + ollama pull deepseek-r1:8b || true + + # --- STRONG GENERALIST (long context) --- + # ollama pull mistral-nemo:latest || true + + # --- SMALL + FAST FALLBACK --- + # ollama pull llama3.2:3b-instruct-q4_K_M || true + # ollama pull phi3.5:latest || true + + # --- ALTERNATE GENERALIST --- + # ollama pull gemma2:9b || true + + # --- EMBEDDINGS (for RAG) --- + # ollama pull qwen3-embedding:4b || true pkill ollama || true volumeMounts: - { name: data, mountPath: /root/.ollama } @@ -65,8 +92,8 @@ spec: volumeMounts: - { name: data, mountPath: /root/.ollama } resources: - requests: { cpu: "2", memory: "4Gi" } - limits: { cpu: "4", memory: "8Gi" } + requests: { cpu: "4", memory: "16Gi" } + limits: { cpu: "8", memory: "32Gi" } volumes: - name: data persistentVolumeClaim: { claimName: ollama-data } @@ -116,7 +143,7 @@ metadata: { name: ollama-data, namespace: ml } spec: accessModes: ["ReadWriteOnce"] storageClassName: local-ssd-hetzner - resources: { requests: { storage: 50Gi } } + resources: { requests: { storage: 100Gi } } # --- #old k8s/ai/vllm/svc-ing.yaml # apiVersion: v1