5959 lines
156 KiB
Plaintext
5959 lines
156 KiB
Plaintext
=== ./ansible/inventories/prod/group_vars/all.yml ===
|
|
cluster_name: prod
|
|
k8s_version: "v1.30.3"
|
|
control_plane_endpoint: "95.217.89.53:6443" # switch later to cp.k8s.betelgeusebytes.io:6443
|
|
|
|
pod_cidr: "10.244.0.0/16"
|
|
service_cidr: "10.96.0.0/12"
|
|
cilium_version: "1.15.7"
|
|
|
|
local_path_dir: "/srv/k8s"
|
|
local_sc_name: "local-ssd-hetzner"
|
|
|
|
stateful_node_label_key: "node"
|
|
stateful_node_label_val: "hetzner-2"
|
|
|
|
=== ./ansible/inventories/prod/hosts.ini ===
|
|
[k8s_control_plane]
|
|
hetzner-1 ansible_host=95.217.89.53 public_ip=95.217.89.53 wg_address=10.66.0.11
|
|
|
|
[k8s_workers]
|
|
hetzner-1 ansible_host=95.217.89.53 public_ip=95.217.89.53 wg_address=10.66.0.11
|
|
hetzner-2 ansible_host=138.201.254.97 public_ip=138.201.254.97 wg_address=10.66.0.12
|
|
|
|
[k8s_nodes:children]
|
|
k8s_control_plane
|
|
k8s_workers
|
|
|
|
# add tiny VPS control-planes here when ready
|
|
[new_control_planes]
|
|
# cp-a ansible_host=<VPS1_IP> public_ip=<VPS1_IP> wg_address=10.66.0.10
|
|
|
|
[all:vars]
|
|
ansible_user=root
|
|
ansible_password=3Lcd0504
|
|
ansible_become=true
|
|
|
|
=== ./ansible/playbooks/add-control-planes.yml ===
|
|
- hosts: k8s_control_plane[0]
|
|
become: yes
|
|
roles:
|
|
- kubeadm_cp_discovery
|
|
|
|
- hosts: new_control_planes
|
|
become: yes
|
|
roles:
|
|
- common
|
|
- wireguard
|
|
- containerd
|
|
- kubernetes
|
|
|
|
- hosts: new_control_planes
|
|
become: yes
|
|
roles:
|
|
- kubeadm_join_cp
|
|
vars:
|
|
kubeadm_cp_join_cmd: "{{ hostvars[groups['k8s_control_plane'][0]].kubeadm_cp_join_cmd | default(kubeadm_cp_join_cmd) }}"
|
|
|
|
=== ./ansible/playbooks/site.yml ===
|
|
- hosts: k8s_nodes
|
|
become: yes
|
|
# serial: 1
|
|
roles:
|
|
# - ../roles/common
|
|
#- ../roles/wireguard
|
|
#- ../roles/containerd
|
|
#- ../roles/kubernetes
|
|
|
|
- hosts: k8s_control_plane
|
|
become: yes
|
|
roles:
|
|
- ../roles/kubeadm_init
|
|
|
|
# - hosts: k8s_workers
|
|
# become: yes
|
|
# roles:
|
|
# - ../roles/kubeadm_join
|
|
|
|
- hosts: k8s_control_plane
|
|
become: yes
|
|
roles:
|
|
# - ../roles/cilium
|
|
# - ../roles/ingress
|
|
#- ../roles/cert_manager
|
|
|
|
- hosts: k8s_nodes
|
|
become: yes
|
|
roles:
|
|
#- ../roles/storage_local_path
|
|
- ../roles/labels
|
|
|
|
=== ./ansible/roles/cert_manager/tasks/main.yml ===
|
|
- name: Install cert-manager
|
|
shell: kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.14.4/cert-manager.yaml
|
|
|
|
- name: Wait for cert-manager pods to be ready
|
|
shell: kubectl wait --for=condition=ready --timeout=300s pod -l app.kubernetes.io/instance=cert-manager -n cert-manager
|
|
|
|
- name: Wait for webhook endpoint to be ready
|
|
shell: |
|
|
for i in {1..30}; do
|
|
if kubectl get endpoints cert-manager-webhook -n cert-manager -o jsonpath='{.subsets[*].addresses[*].ip}' | grep -q .; then
|
|
echo "Webhook endpoint is ready"
|
|
exit 0
|
|
fi
|
|
echo "Waiting for webhook endpoint... attempt $i/30"
|
|
sleep 2
|
|
done
|
|
exit 1
|
|
|
|
- name: Test webhook connectivity
|
|
shell: kubectl run test-webhook --image=curlimages/curl:latest --rm -i --restart=Never -- curl -k https://cert-manager-webhook.cert-manager.svc:443/healthz
|
|
register: webhook_test
|
|
ignore_errors: yes
|
|
|
|
- name: Display webhook test result
|
|
debug:
|
|
var: webhook_test
|
|
|
|
- name: ClusterIssuer
|
|
copy:
|
|
dest: /root/cluster-issuer-prod.yaml
|
|
content: |
|
|
apiVersion: cert-manager.io/v1
|
|
kind: ClusterIssuer
|
|
metadata:
|
|
name: letsencrypt-prod
|
|
spec:
|
|
acme:
|
|
- name: ClusterIssuer
|
|
copy:
|
|
dest: /root/cluster-issuer-prod.yaml
|
|
content: |
|
|
apiVersion: cert-manager.io/v1
|
|
kind: ClusterIssuer
|
|
metadata:
|
|
name: letsencrypt-prod
|
|
spec:
|
|
acme:
|
|
email: admin@betelgeusebytes.io
|
|
server: https://acme-v02.api.letsencrypt.org/directory
|
|
privateKeySecretRef:
|
|
name: letsencrypt-prod-key
|
|
solvers:
|
|
- http01:
|
|
ingress:
|
|
class: nginx
|
|
|
|
- name: Temporarily disable cert-manager webhook
|
|
shell: |
|
|
kubectl delete validatingwebhookconfiguration cert-manager-webhook || true
|
|
ignore_errors: yes
|
|
|
|
- name: Apply ClusterIssuer
|
|
command: kubectl apply -f /root/cluster-issuer-prod.yaml
|
|
|
|
- name: Reinstall cert-manager to restore webhook
|
|
shell: kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.14.4/cert-manager.yaml
|
|
|
|
=== ./ansible/roles/cilium/tasks/main.yml ===
|
|
- name: Install cilium CLI
|
|
shell: |
|
|
curl -L --fail --remote-name-all https://github.com/cilium/cilium-cli/releases/latest/download/cilium-linux-amd64.tar.gz
|
|
tar xzf cilium-linux-amd64.tar.gz -C /usr/local/bin
|
|
args: { creates: /usr/local/bin/cilium }
|
|
|
|
- name: Deploy cilium
|
|
shell: |
|
|
cilium install --version {{ cilium_version }} --set kubeProxyReplacement=strict --set bpf.masquerade=true
|
|
|
|
=== ./ansible/roles/common/tasks/main.yml ===
|
|
- name: Disable swap
|
|
command: swapoff -a
|
|
when: ansible_swaptotal_mb|int > 0
|
|
|
|
- name: Ensure swap disabled on boot
|
|
replace:
|
|
path: /etc/fstab
|
|
regexp: '^([^#].*\sswap\s)'
|
|
replace: '# \1'
|
|
|
|
- name: Kernel modules
|
|
copy:
|
|
dest: /etc/modules-load.d/containerd.conf
|
|
content: |
|
|
overlay
|
|
br_netfilter
|
|
|
|
- name: Load modules
|
|
command: modprobe {{ item }}
|
|
loop: [overlay, br_netfilter]
|
|
|
|
- name: Sysctl for k8s
|
|
copy:
|
|
dest: /etc/sysctl.d/99-kubernetes.conf
|
|
content: |
|
|
net.bridge.bridge-nf-call-iptables = 1
|
|
net.bridge.bridge-nf-call-ip6tables = 1
|
|
net.ipv4.ip_forward = 1
|
|
vm.max_map_count = 262144
|
|
- name: Apply sysctl
|
|
command: sysctl --system
|
|
|
|
=== ./ansible/roles/containerd/tasks/main.yml ===
|
|
- name: Install containerd
|
|
apt:
|
|
name: containerd
|
|
state: present
|
|
update_cache: yes
|
|
|
|
- name: Ensure containerd config directory
|
|
file:
|
|
path: /etc/containerd
|
|
state: directory
|
|
mode: '0755'
|
|
|
|
- name: Generate default config
|
|
shell: containerd config default > /etc/containerd/config.toml
|
|
args: { creates: /etc/containerd/config.toml }
|
|
|
|
- name: Ensure SystemdCgroup=true
|
|
replace:
|
|
path: /etc/containerd/config.toml
|
|
regexp: 'SystemdCgroup = false'
|
|
replace: 'SystemdCgroup = true'
|
|
|
|
- name: Restart containerd
|
|
service:
|
|
name: containerd
|
|
state: restarted
|
|
enabled: yes
|
|
|
|
=== ./ansible/roles/ingress/tasks/main.yml ===
|
|
- name: Deploy ingress-nginx (baremetal)
|
|
shell: kubectl apply -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/main/deploy/static/provider/baremetal/deploy.yaml
|
|
|
|
=== ./ansible/roles/kubeadm_cp_discovery/tasks/main.yml ===
|
|
- name: Upload certs and get certificate key
|
|
shell: kubeadm init phase upload-certs --upload-certs | tail -n 1
|
|
register: cert_key
|
|
|
|
- name: Compute CA cert hash
|
|
shell: |
|
|
openssl x509 -pubkey -in /etc/kubernetes/pki/ca.crt | openssl rsa -pubin -outform der 2>/dev/null | openssl dgst -sha256 -hex | awk '{print $2}'
|
|
register: ca_hash
|
|
|
|
- name: Create short-lived token
|
|
shell: kubeadm token create --ttl 30m
|
|
register: join_token
|
|
|
|
- name: Determine control-plane endpoint
|
|
set_fact:
|
|
cp_endpoint: "{{ hostvars[inventory_hostname].control_plane_endpoint | default(ansible_host ~ ':6443') }}"
|
|
|
|
- set_fact:
|
|
kubeadm_cp_join_cmd: >-
|
|
kubeadm join {{ cp_endpoint }}
|
|
--token {{ join_token.stdout }}
|
|
--discovery-token-ca-cert-hash sha256:{{ ca_hash.stdout }}
|
|
--control-plane
|
|
--certificate-key {{ cert_key.stdout }}
|
|
|
|
=== ./ansible/roles/kubeadm_init/tasks/main.yml ===
|
|
# - name: Write kubeadm config
|
|
# template:
|
|
# src: kubeadm-config.yaml.j2
|
|
# dest: /etc/kubernetes/kubeadm-config.yaml
|
|
|
|
# - name: Pre-pull images
|
|
# command: kubeadm config images pull
|
|
|
|
# - name: Init control-plane
|
|
# command: kubeadm init --config=/etc/kubernetes/kubeadm-config.yaml
|
|
# args: { creates: /etc/kubernetes/admin.conf }
|
|
|
|
# - name: Setup kubeconfig
|
|
# shell: |
|
|
# mkdir -p $HOME/.kube
|
|
# cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
|
|
# chown $(id -u):$(id -g) $HOME/.kube/config
|
|
|
|
- name: Save join command
|
|
shell: kubeadm token create --print-join-command
|
|
register: join_cmd
|
|
|
|
- set_fact:
|
|
kubeadm_join_command_all: "{{ join_cmd.stdout }}"
|
|
|
|
=== ./ansible/roles/kubeadm_join/tasks/main.yml ===
|
|
- name: Join node to cluster
|
|
command: "{{ hostvars[groups['k8s_control_plane'][0]].kubeadm_join_command_all }} --ignore-preflight-errors=FileAvailable--etc-kubernetes-kubelet.conf,FileAvailable--etc-kubernetes-pki-ca.crt,Port-10250"
|
|
|
|
=== ./ansible/roles/kubeadm_join_cp/tasks/main.yml ===
|
|
- name: Ensure join command provided
|
|
fail:
|
|
msg: "Set kubeadm_cp_join_cmd variable (string)"
|
|
when: kubeadm_cp_join_cmd is not defined
|
|
|
|
- name: Join node as control-plane
|
|
command: "{{ kubeadm_cp_join_cmd }}"
|
|
args:
|
|
creates: /etc/kubernetes/kubelet.conf
|
|
|
|
=== ./ansible/roles/kubernetes/tasks/main.yml ===
|
|
- name: Install Kubernetes apt key
|
|
shell: curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.30/deb/Release.key | gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg
|
|
args: { creates: /etc/apt/keyrings/kubernetes-apt-keyring.gpg }
|
|
|
|
- name: Add Kubernetes repo
|
|
apt_repository:
|
|
repo: "deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.30/deb/ /"
|
|
state: present
|
|
|
|
- name: Install kubeadm, kubelet, kubectl
|
|
apt:
|
|
name: [kubeadm, kubelet, kubectl]
|
|
state: present
|
|
update_cache: yes
|
|
|
|
- name: Hold kube packages
|
|
command: apt-mark hold kubeadm kubelet kubectl
|
|
|
|
=== ./ansible/roles/labels/tasks/main.yml ===
|
|
- name: Label hetzner-2 for stateful
|
|
command: kubectl label node hetzner-2 {{ stateful_node_label_key }}={{ stateful_node_label_val }} --overwrite
|
|
delegate_to: "{{ groups['k8s_control_plane'][0] }}"
|
|
run_once: true
|
|
|
|
=== ./ansible/roles/storage_local_path/tasks/main.yml ===
|
|
- name: Ensure local path dir
|
|
file:
|
|
path: "{{ local_path_dir }}"
|
|
state: directory
|
|
mode: '0777'
|
|
|
|
- name: StorageClass local-ssd-hetzner
|
|
copy:
|
|
dest: /root/local-sc.yaml
|
|
content: |
|
|
apiVersion: storage.k8s.io/v1
|
|
kind: StorageClass
|
|
metadata:
|
|
name: {{ local_sc_name }}
|
|
provisioner: kubernetes.io/no-provisioner
|
|
volumeBindingMode: WaitForFirstConsumer
|
|
when: inventory_hostname in groups['k8s_control_plane']
|
|
|
|
- name: Apply SC
|
|
command: kubectl apply -f /root/local-sc.yaml
|
|
environment:
|
|
KUBECONFIG: /etc/kubernetes/admin.conf
|
|
when: inventory_hostname in groups['k8s_control_plane']
|
|
|
|
- name: Create local-path directory
|
|
file:
|
|
path: /mnt/local-ssd
|
|
state: directory
|
|
mode: '0755'
|
|
|
|
- name: Create subdirectories for each PV
|
|
file:
|
|
path: "/mnt/local-ssd/{{ item }}"
|
|
state: directory
|
|
mode: '0755'
|
|
loop:
|
|
- postgres
|
|
- prometheus
|
|
- elasticsearch
|
|
- grafana
|
|
|
|
- name: Copy PV manifest
|
|
template:
|
|
src: local-ssd-pv.yaml
|
|
dest: /tmp/local-ssd-pv.yaml
|
|
|
|
- name: Apply PV
|
|
command: kubectl apply -f /tmp/local-ssd-pv.yaml
|
|
run_once: true
|
|
delegate_to: "{{ groups['k8s_control_plane'][0] }}"
|
|
|
|
- name: Apply SC
|
|
command: kubectl apply -f /tmp/local-ssd-sc.yaml
|
|
run_once: true
|
|
delegate_to: "{{ groups['k8s_control_plane'][0] }}"
|
|
|
|
=== ./ansible/roles/storage_local_path/templates/local-ssd-pv.yaml ===
|
|
apiVersion: v1
|
|
kind: PersistentVolume
|
|
metadata:
|
|
name: local-ssd-postgres
|
|
spec:
|
|
capacity:
|
|
storage: 100Gi
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
persistentVolumeReclaimPolicy: Retain
|
|
storageClassName: local-ssd-hetzner
|
|
local:
|
|
path: /mnt/local-ssd/postgres
|
|
nodeAffinity:
|
|
required:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: kubernetes.io/hostname
|
|
operator: In
|
|
values:
|
|
- hetzner-2
|
|
---
|
|
apiVersion: v1
|
|
kind: PersistentVolume
|
|
metadata:
|
|
name: local-ssd-prometheus
|
|
spec:
|
|
capacity:
|
|
storage: 100Gi
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
persistentVolumeReclaimPolicy: Retain
|
|
storageClassName: local-ssd-hetzner
|
|
local:
|
|
path: /mnt/local-ssd/prometheus
|
|
nodeAffinity:
|
|
required:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: kubernetes.io/hostname
|
|
operator: In
|
|
values:
|
|
- hetzner-2
|
|
---
|
|
apiVersion: v1
|
|
kind: PersistentVolume
|
|
metadata:
|
|
name: local-ssd-elasticsearch
|
|
spec:
|
|
capacity:
|
|
storage: 300Gi
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
persistentVolumeReclaimPolicy: Retain
|
|
storageClassName: local-ssd-hetzner
|
|
local:
|
|
path: /mnt/local-ssd/elasticsearch
|
|
nodeAffinity:
|
|
required:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: kubernetes.io/hostname
|
|
operator: In
|
|
values:
|
|
- hetzner-2
|
|
|
|
=== ./ansible/roles/wireguard/tasks/main.yml ===
|
|
- name: Install wireguard
|
|
apt:
|
|
name: [wireguard, qrencode]
|
|
state: present
|
|
update_cache: yes
|
|
|
|
- name: Ensure key dir
|
|
file: { path: /etc/wireguard/keys, state: directory, mode: '0700' }
|
|
|
|
- name: Generate private key if missing
|
|
shell: "[ -f /etc/wireguard/keys/privatekey ] || (umask 077 && wg genkey > /etc/wireguard/keys/privatekey)"
|
|
args: { creates: /etc/wireguard/keys/privatekey }
|
|
|
|
- name: Generate public key
|
|
shell: "wg pubkey < /etc/wireguard/keys/privatekey > /etc/wireguard/keys/publickey"
|
|
args: { creates: /etc/wireguard/keys/publickey }
|
|
|
|
- name: Read pubkey
|
|
slurp: { src: /etc/wireguard/keys/publickey }
|
|
register: pubkey_raw
|
|
|
|
- name: Read private key
|
|
slurp: { src: /etc/wireguard/keys/privatekey }
|
|
register: privkey_raw
|
|
|
|
- set_fact:
|
|
wg_public_key: "{{ pubkey_raw.content | b64decode | trim }}"
|
|
wg_private_key: "{{ privkey_raw.content | b64decode | trim }}"
|
|
|
|
- name: Gather facts from all hosts
|
|
setup:
|
|
delegate_to: "{{ item }}"
|
|
delegate_facts: true
|
|
loop: "{{ groups['k8s_nodes'] }}"
|
|
run_once: true
|
|
|
|
- name: Pretty print hostvars
|
|
debug:
|
|
msg: "{{ hostvars['hetzner-1']['wg_public_key'] }}"
|
|
|
|
- name: Render config
|
|
template:
|
|
src: wg0.conf.j2
|
|
dest: /etc/wireguard/wg0.conf
|
|
mode: '0600'
|
|
|
|
- name: Enable IP forward
|
|
sysctl:
|
|
name: net.ipv4.ip_forward
|
|
value: "1"
|
|
sysctl_set: yes
|
|
state: present
|
|
reload: yes
|
|
|
|
- name: Enable wg-quick
|
|
service:
|
|
name: wg-quick@wg0
|
|
enabled: yes
|
|
state: started
|
|
|
|
- debug:
|
|
var: wg_show.stdout
|
|
=== ./ansible/roles/wireguard/vars/main.yml ===
|
|
wg_interface: wg0
|
|
wg_port: 51820
|
|
wg_cidr: 10.66.0.0/24
|
|
wg_nodes:
|
|
hetzner-1: { address: 10.66.0.11, public_ip: "95.217.89.53" }
|
|
hetzner-2: { address: 10.66.0.12, public_ip: "138.201.254.97" }
|
|
|
|
=== ./DNS_RECORDS.txt ===
|
|
apps.betelgeusebytes.io. 300 IN A 95.217.89.53
|
|
apps.betelgeusebytes.io. 300 IN A 138.201.254.97
|
|
gitea.betelgeusebytes.io. 300 IN CNAME apps.betelgeusebytes.io.
|
|
kibana.betelgeusebytes.io. 300 IN CNAME apps.betelgeusebytes.io.
|
|
grafana.betelgeusebytes.io. 300 IN CNAME apps.betelgeusebytes.io.
|
|
prometheus.betelgeusebytes.io. 300 IN CNAME apps.betelgeusebytes.io.
|
|
notebook.betelgeusebytes.io. 300 IN CNAME apps.betelgeusebytes.io.
|
|
broker.betelgeusebytes.io. 300 IN CNAME apps.betelgeusebytes.io.
|
|
neo4j.betelgeusebytes.io. 300 IN CNAME apps.betelgeusebytes.io.
|
|
otlp.betelgeusebytes.io. 300 IN CNAME apps.betelgeusebytes.io.
|
|
|
|
=== ./k8s/00-namespaces.yaml ===
|
|
apiVersion: v1
|
|
kind: Namespace
|
|
metadata: { name: db }
|
|
---
|
|
apiVersion: v1
|
|
kind: Namespace
|
|
metadata: { name: scm }
|
|
---
|
|
apiVersion: v1
|
|
kind: Namespace
|
|
metadata: { name: ml }
|
|
---
|
|
apiVersion: v1
|
|
kind: Namespace
|
|
metadata: { name: monitoring }
|
|
---
|
|
apiVersion: v1
|
|
kind: Namespace
|
|
metadata: { name: elastic }
|
|
---
|
|
apiVersion: v1
|
|
kind: Namespace
|
|
metadata: { name: broker }
|
|
---
|
|
apiVersion: v1
|
|
kind: Namespace
|
|
metadata: { name: graph }
|
|
---
|
|
apiVersion: v1
|
|
kind: Namespace
|
|
metadata: { name: observability }
|
|
|
|
=== ./k8s/01-secrets/basic-auth.yaml ===
|
|
# Replace each 'auth' line with a real htpasswd pair:
|
|
# htpasswd -nbBC 10 admin 'Str0ngP@ss' (copy 'admin:...' to value below)
|
|
|
|
apiVersion: v1
|
|
kind: Secret
|
|
metadata: { name: basic-auth-kibana, namespace: elastic }
|
|
type: Opaque
|
|
stringData: { auth: "admin:$2y$10$MBLgALyI7xwFrQh2PHqZruX.EzaTUGagmJODwpBEvF27snFAxCBvq" }
|
|
---
|
|
apiVersion: v1
|
|
kind: Secret
|
|
metadata: { name: basic-auth-grafana, namespace: monitoring }
|
|
type: Opaque
|
|
stringData: { auth: "admin:$2y$10$MBLgALyI7xwFrQh2PHqZruX.EzaTUGagmJODwpBEvF27snFAxCBvq" }
|
|
---
|
|
apiVersion: v1
|
|
kind: Secret
|
|
metadata: { name: basic-auth-prometheus, namespace: monitoring }
|
|
type: Opaque
|
|
stringData: { auth: "aadmin:$2y$10$MBLgALyI7xwFrQh2PHqZruX.EzaTUGagmJODwpBEvF27snFAxCBvq" }
|
|
---
|
|
apiVersion: v1
|
|
kind: Secret
|
|
metadata: { name: basic-auth-notebook, namespace: ml }
|
|
type: Opaque
|
|
stringData: { auth: "admin:$2y$10$MBLgALyI7xwFrQh2PHqZruX.EzaTUGagmJODwpBEvF27snFAxCBvq" }
|
|
---
|
|
apiVersion: v1
|
|
kind: Secret
|
|
metadata: { name: basic-auth-broker, namespace: broker }
|
|
type: Opaque
|
|
stringData: { auth: "admin:$2y$10$MBLgALyI7xwFrQh2PHqZruX.EzaTUGagmJODwpBEvF27snFAxCBvq" }
|
|
---
|
|
apiVersion: v1
|
|
kind: Secret
|
|
metadata: { name: basic-auth-neo4j, namespace: graph }
|
|
type: Opaque
|
|
stringData: { auth: "admin:$2y$10$MBLgALyI7xwFrQh2PHqZruX.EzaTUGagmJODwpBEvF27snFAxCBvq" }
|
|
|
|
=== ./k8s/argoflow/argo.yaml ===
|
|
apiVersion: v1
|
|
kind: Secret
|
|
metadata:
|
|
name: argo-artifacts
|
|
namespace: ml
|
|
type: Opaque
|
|
stringData:
|
|
accesskey: "minioadmin" # <-- change
|
|
secretkey: "minioadmin" # <-- change
|
|
---
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: workflow-controller-configmap
|
|
namespace: ml
|
|
data:
|
|
config: |
|
|
artifactRepository:
|
|
s3:
|
|
bucket: argo-artifacts
|
|
endpoint: minio.betelgeusebytes.io # no scheme here
|
|
insecure: false # https via Ingress
|
|
accessKeySecret:
|
|
name: argo-artifacts
|
|
key: accesskey
|
|
secretKeySecret:
|
|
name: argo-artifacts
|
|
key: secretkey
|
|
keyFormat: "{{workflow.namespace}}/{{workflow.name}}/{{pod.name}}"
|
|
|
|
---
|
|
# k8s/argo/workflows/ns-rbac.yaml
|
|
apiVersion: v1
|
|
kind: ServiceAccount
|
|
metadata:
|
|
name: argo-server
|
|
namespace: ml
|
|
---
|
|
apiVersion: rbac.authorization.k8s.io/v1
|
|
kind: Role
|
|
metadata:
|
|
name: argo-namespaced
|
|
namespace: ml
|
|
rules:
|
|
- apiGroups: [""]
|
|
resources: ["pods","pods/log","secrets","configmaps","events","persistentvolumeclaims","serviceaccounts"]
|
|
verbs: ["get","list","watch","create","delete","patch","update"]
|
|
- apiGroups: ["coordination.k8s.io"]
|
|
resources: ["leases"]
|
|
verbs: ["get","list","watch","create","delete","patch","update"]
|
|
- apiGroups: ["argoproj.io"]
|
|
resources: ["workflows","workflowtemplates","cronworkflows","workfloweventbindings","sensors","eventsources","workflowtasksets","workflowartifactgctasks","workflowtaskresults"]
|
|
verbs: ["get","list","watch","create","delete","patch","update"]
|
|
---
|
|
apiVersion: rbac.authorization.k8s.io/v1
|
|
kind: RoleBinding
|
|
metadata:
|
|
name: argo-namespaced-binding
|
|
namespace: ml
|
|
subjects:
|
|
- kind: ServiceAccount
|
|
name: argo-server
|
|
namespace: ml
|
|
roleRef:
|
|
apiGroup: rbac.authorization.k8s.io
|
|
kind: Role
|
|
name: argo-namespaced
|
|
|
|
---
|
|
# k8s/argo/workflows/controller.yaml
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata: { name: workflow-controller, namespace: ml }
|
|
spec:
|
|
replicas: 1
|
|
selector: { matchLabels: { app: workflow-controller } }
|
|
template:
|
|
metadata: { labels: { app: workflow-controller } }
|
|
spec:
|
|
serviceAccountName: argo-server
|
|
containers:
|
|
- name: controller
|
|
image: quay.io/argoproj/workflow-controller:latest
|
|
args: ["--namespaced"]
|
|
env:
|
|
- name: LEADER_ELECTION_IDENTITY
|
|
valueFrom:
|
|
fieldRef:
|
|
fieldPath: metadata.name
|
|
ports: [{ containerPort: 9090 }]
|
|
readinessProbe:
|
|
httpGet: { path: /metrics, port: 9090, scheme: HTTPS }
|
|
initialDelaySeconds: 5
|
|
periodSeconds: 10
|
|
livenessProbe:
|
|
httpGet: { path: /metrics, port: 9090, scheme: HTTPS }
|
|
initialDelaySeconds: 20
|
|
periodSeconds: 20
|
|
|
|
---
|
|
# k8s/argo/workflows/server.yaml
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata: { name: argo-server, namespace: ml }
|
|
spec:
|
|
replicas: 1
|
|
selector: { matchLabels: { app: argo-server } }
|
|
template:
|
|
metadata: { labels: { app: argo-server } }
|
|
spec:
|
|
serviceAccountName: argo-server
|
|
containers:
|
|
- name: server
|
|
image: quay.io/argoproj/argocli:latest
|
|
args: ["server","--auth-mode","server","--namespaced","--secure=false"]
|
|
ports: [{ containerPort: 2746 }]
|
|
readinessProbe:
|
|
httpGet: { path: /, port: 2746, scheme: HTTP }
|
|
initialDelaySeconds: 5
|
|
periodSeconds: 10
|
|
livenessProbe:
|
|
httpGet: { path: /, port: 2746, scheme: HTTP }
|
|
initialDelaySeconds: 20
|
|
periodSeconds: 20
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata: { name: argo-server, namespace: ml }
|
|
spec: { selector: { app: argo-server }, ports: [ { port: 80, targetPort: 2746 } ] }
|
|
---
|
|
apiVersion: networking.k8s.io/v1
|
|
kind: Ingress
|
|
metadata:
|
|
name: argo
|
|
namespace: ml
|
|
annotations: { cert-manager.io/cluster-issuer: letsencrypt-prod }
|
|
spec:
|
|
ingressClassName: nginx
|
|
tls: [{ hosts: ["argo.betelgeusebytes.io"], secretName: argo-tls }]
|
|
rules:
|
|
- host: argo.betelgeusebytes.io
|
|
http:
|
|
paths:
|
|
- path: /
|
|
pathType: Prefix
|
|
backend: { service: { name: argo-server, port: { number: 80 } } }
|
|
=== ./k8s/automation/n8n.yaml ===
|
|
apiVersion: v1
|
|
kind: Namespace
|
|
metadata:
|
|
name: automation
|
|
labels:
|
|
name: automation
|
|
---
|
|
apiVersion: v1
|
|
kind: PersistentVolume
|
|
metadata:
|
|
name: n8n-pv
|
|
labels:
|
|
app: n8n
|
|
spec:
|
|
capacity:
|
|
storage: 20Gi
|
|
volumeMode: Filesystem
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
persistentVolumeReclaimPolicy: Retain
|
|
storageClassName: local-ssd
|
|
local:
|
|
path: /mnt/local-ssd/n8n
|
|
nodeAffinity:
|
|
required:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: kubernetes.io/hostname
|
|
operator: In
|
|
values:
|
|
- hetzner-2
|
|
---
|
|
apiVersion: v1
|
|
kind: PersistentVolumeClaim
|
|
metadata:
|
|
name: n8n-data
|
|
namespace: automation
|
|
labels:
|
|
app: n8n
|
|
spec:
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
storageClassName: local-ssd
|
|
resources:
|
|
requests:
|
|
storage: 20Gi
|
|
selector:
|
|
matchLabels:
|
|
app: n8n
|
|
---
|
|
apiVersion: v1
|
|
kind: Secret
|
|
metadata:
|
|
name: n8n-secrets
|
|
namespace: automation
|
|
type: Opaque
|
|
stringData:
|
|
# Generate a strong encryption key with: openssl rand -base64 32
|
|
N8N_ENCRYPTION_KEY: "G/US0ePajEpWwRUjlchyOs6+6I/AT+0bisXmE2fugSU="
|
|
# Optional: Database connection if using PostgreSQL
|
|
DB_TYPE: "postgresdb"
|
|
DB_POSTGRESDB_HOST: "pg.betelgeusebytes.io"
|
|
DB_POSTGRESDB_PORT: "5432"
|
|
DB_POSTGRESDB_DATABASE: "n8n"
|
|
DB_POSTGRESDB_USER: "app"
|
|
DB_POSTGRESDB_PASSWORD: "pa$$word"
|
|
---
|
|
apiVersion: apps/v1
|
|
kind: StatefulSet
|
|
metadata:
|
|
name: n8n
|
|
namespace: automation
|
|
spec:
|
|
serviceName: n8n
|
|
replicas: 1
|
|
selector:
|
|
matchLabels:
|
|
app: n8n
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: n8n
|
|
spec:
|
|
nodeSelector:
|
|
kubernetes.io/hostname: hetzner-2
|
|
containers:
|
|
- name: n8n
|
|
image: n8nio/n8n:latest
|
|
ports:
|
|
- containerPort: 5678
|
|
name: http
|
|
env:
|
|
- name: N8N_HOST
|
|
value: "n8n.betelgeusebytes.io"
|
|
- name: N8N_PORT
|
|
value: "5678"
|
|
- name: N8N_PROTOCOL
|
|
value: "https"
|
|
- name: WEBHOOK_URL
|
|
value: "https://n8n.betelgeusebytes.io/"
|
|
- name: GENERIC_TIMEZONE
|
|
value: "UTC"
|
|
- name: N8N_ENCRYPTION_KEY
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: n8n-secrets
|
|
key: N8N_ENCRYPTION_KEY
|
|
# Uncomment if using PostgreSQL
|
|
- name: DB_TYPE
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: n8n-secrets
|
|
key: DB_TYPE
|
|
- name: DB_POSTGRESDB_HOST
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: n8n-secrets
|
|
key: DB_POSTGRESDB_HOST
|
|
- name: DB_POSTGRESDB_PORT
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: n8n-secrets
|
|
key: DB_POSTGRESDB_PORT
|
|
- name: DB_POSTGRESDB_DATABASE
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: n8n-secrets
|
|
key: DB_POSTGRESDB_DATABASE
|
|
- name: DB_POSTGRESDB_USER
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: n8n-secrets
|
|
key: DB_POSTGRESDB_USER
|
|
- name: DB_POSTGRESDB_PASSWORD
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: n8n-secrets
|
|
key: DB_POSTGRESDB_PASSWORD
|
|
volumeMounts:
|
|
- name: n8n-data
|
|
mountPath: /home/node/.n8n
|
|
resources:
|
|
requests:
|
|
memory: "512Mi"
|
|
cpu: "250m"
|
|
limits:
|
|
memory: "2Gi"
|
|
cpu: "1000m"
|
|
livenessProbe:
|
|
httpGet:
|
|
path: /healthz
|
|
port: 5678
|
|
initialDelaySeconds: 60
|
|
periodSeconds: 30
|
|
timeoutSeconds: 10
|
|
failureThreshold: 5
|
|
readinessProbe:
|
|
httpGet:
|
|
path: /healthz
|
|
port: 5678
|
|
initialDelaySeconds: 30
|
|
periodSeconds: 10
|
|
timeoutSeconds: 5
|
|
failureThreshold: 3
|
|
volumes:
|
|
- name: n8n-data
|
|
persistentVolumeClaim:
|
|
claimName: n8n-data
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: n8n
|
|
namespace: automation
|
|
labels:
|
|
app: n8n
|
|
spec:
|
|
type: ClusterIP
|
|
ports:
|
|
- port: 5678
|
|
targetPort: 5678
|
|
protocol: TCP
|
|
name: http
|
|
selector:
|
|
app: n8n
|
|
---
|
|
apiVersion: networking.k8s.io/v1
|
|
kind: Ingress
|
|
metadata:
|
|
name: n8n
|
|
namespace: automation
|
|
annotations:
|
|
cert-manager.io/cluster-issuer: "letsencrypt-prod"
|
|
# nginx.ingress.kubernetes.io/proxy-body-size: "50m"
|
|
# nginx.ingress.kubernetes.io/proxy-read-timeout: "300"
|
|
# nginx.ingress.kubernetes.io/proxy-send-timeout: "300"
|
|
# Uncomment below if you want basic auth protection in addition to n8n's auth
|
|
# nginx.ingress.kubernetes.io/auth-type: basic
|
|
# nginx.ingress.kubernetes.io/auth-secret: n8n-basic-auth
|
|
# nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required'
|
|
spec:
|
|
ingressClassName: nginx
|
|
tls:
|
|
- hosts:
|
|
- n8n.betelgeusebytes.io
|
|
secretName: wildcard-betelgeusebytes-tls
|
|
rules:
|
|
- host: n8n.betelgeusebytes.io
|
|
http:
|
|
paths:
|
|
- path: /
|
|
pathType: Prefix
|
|
backend:
|
|
service:
|
|
name: n8n
|
|
port:
|
|
number: 5678
|
|
=== ./k8s/cert-manager/cluster-issuer.yaml ===
|
|
apiVersion: cert-manager.io/v1
|
|
kind: ClusterIssuer
|
|
metadata: { name: letsencrypt-prod }
|
|
spec:
|
|
acme:
|
|
email: angal.salah@gmail.com
|
|
server: https://acme-v02.api.letsencrypt.org/directory
|
|
privateKeySecretRef: { name: letsencrypt-prod-key }
|
|
solvers:
|
|
- http01: { ingress: { class: nginx } }
|
|
|
|
=== ./k8s/elastic/elastic-pv.yaml ===
|
|
apiVersion: v1
|
|
kind: PersistentVolume
|
|
metadata:
|
|
name: pv-elasticsearch
|
|
spec:
|
|
capacity:
|
|
storage: 80Gi
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
persistentVolumeReclaimPolicy: Retain
|
|
storageClassName: local-ssd-hetzner
|
|
local:
|
|
path: /mnt/local-ssd/elasticsearch
|
|
nodeAffinity:
|
|
required:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: kubernetes.io/hostname
|
|
operator: In
|
|
values:
|
|
- hetzner-2
|
|
=== ./k8s/elastic/elasticsearch.yaml ===
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata: { name: elasticsearch, namespace: elastic }
|
|
spec:
|
|
ports:
|
|
- { name: http, port: 9200, targetPort: 9200 }
|
|
- { name: transport, port: 9300, targetPort: 9300 }
|
|
selector: { app: elasticsearch }
|
|
---
|
|
apiVersion: apps/v1
|
|
kind: StatefulSet
|
|
metadata: { name: elasticsearch, namespace: elastic }
|
|
spec:
|
|
serviceName: elasticsearch
|
|
replicas: 1
|
|
selector: { matchLabels: { app: elasticsearch } }
|
|
template:
|
|
metadata: { labels: { app: elasticsearch } }
|
|
spec:
|
|
nodeSelector: { node: hetzner-2 }
|
|
containers:
|
|
- name: es
|
|
image: docker.elastic.co/elasticsearch/elasticsearch:8.14.0
|
|
env:
|
|
- { name: discovery.type, value: single-node }
|
|
- { name: xpack.security.enabled, value: "false" }
|
|
- { name: ES_JAVA_OPTS, value: "-Xms2g -Xmx2g" }
|
|
ports:
|
|
- { containerPort: 9200 }
|
|
- { containerPort: 9300 }
|
|
volumeMounts:
|
|
- { name: data, mountPath: /usr/share/elasticsearch/data }
|
|
volumeClaimTemplates:
|
|
- metadata: { name: data }
|
|
spec:
|
|
accessModes: ["ReadWriteOnce"]
|
|
storageClassName: local-ssd-hetzner
|
|
resources: { requests: { storage: 80Gi } }
|
|
|
|
=== ./k8s/elastic/kibana.yaml ===
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata: { name: kibana, namespace: elastic }
|
|
spec:
|
|
ports: [{ port: 5601, targetPort: 5601 }]
|
|
selector: { app: kibana }
|
|
---
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata: { name: kibana, namespace: elastic }
|
|
spec:
|
|
replicas: 1
|
|
selector: { matchLabels: { app: kibana } }
|
|
template:
|
|
metadata: { labels: { app: kibana } }
|
|
spec:
|
|
nodeSelector: { node: hetzner-2 }
|
|
containers:
|
|
- name: kibana
|
|
image: docker.elastic.co/kibana/kibana:8.14.0
|
|
env:
|
|
- { name: ELASTICSEARCH_HOSTS, value: "http://elasticsearch.elastic.svc.cluster.local:9200" }
|
|
ports: [{ containerPort: 5601 }]
|
|
---
|
|
apiVersion: networking.k8s.io/v1
|
|
kind: Ingress
|
|
metadata:
|
|
name: kibana
|
|
namespace: elastic
|
|
annotations:
|
|
cert-manager.io/cluster-issuer: letsencrypt-prod
|
|
# nginx.ingress.kubernetes.io/auth-type: basic
|
|
# nginx.ingress.kubernetes.io/auth-secret: basic-auth-kibana
|
|
# nginx.ingress.kubernetes.io/auth-realm: "Authentication Required"
|
|
spec:
|
|
ingressClassName: nginx
|
|
tls: [{ hosts: ["kibana.betelgeusebytes.io"], secretName: kibana-tls }]
|
|
rules:
|
|
- host: kibana.betelgeusebytes.io
|
|
http:
|
|
paths:
|
|
- path: /
|
|
pathType: Prefix
|
|
backend: { service: { name: kibana, port: { number: 5601 } } }
|
|
|
|
=== ./k8s/gitea/gitea-pv.yaml ===
|
|
apiVersion: v1
|
|
kind: PersistentVolume
|
|
metadata:
|
|
name: pv-gitea
|
|
spec:
|
|
capacity:
|
|
storage: 50Gi
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
persistentVolumeReclaimPolicy: Retain
|
|
storageClassName: local-ssd-hetzner
|
|
local:
|
|
path: /mnt/local-ssd/gitea
|
|
nodeAffinity:
|
|
required:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: kubernetes.io/hostname
|
|
operator: In
|
|
values:
|
|
- hetzner-2
|
|
=== ./k8s/gitea/gitea.yaml ===
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata: { name: gitea, namespace: scm }
|
|
spec:
|
|
ports: [{ port: 80, targetPort: 3000 }]
|
|
selector: { app: gitea }
|
|
---
|
|
apiVersion: apps/v1
|
|
kind: StatefulSet
|
|
metadata: { name: gitea, namespace: scm }
|
|
spec:
|
|
serviceName: gitea
|
|
replicas: 1
|
|
selector: { matchLabels: { app: gitea } }
|
|
template:
|
|
metadata: { labels: { app: gitea } }
|
|
spec:
|
|
nodeSelector: { node: hetzner-2 }
|
|
containers:
|
|
- name: gitea
|
|
image: gitea/gitea:1.21.11
|
|
env:
|
|
- { name: GITEA__server__ROOT_URL, value: "https://gitea.betelgeusebytes.io" }
|
|
- { name: GITEA__database__DB_TYPE, value: "postgres" }
|
|
- { name: GITEA__database__HOST, value: "postgres.db.svc.cluster.local:5432" }
|
|
- { name: GITEA__database__NAME, value: "gitea" }
|
|
- { name: GITEA__database__USER, value: "app" }
|
|
- { name: GITEA__database__PASSWD, value: "pa$$word" }
|
|
ports: [{ containerPort: 3000 }]
|
|
volumeMounts:
|
|
- { name: data, mountPath: /data }
|
|
volumeClaimTemplates:
|
|
- metadata: { name: data }
|
|
spec:
|
|
accessModes: ["ReadWriteOnce"]
|
|
storageClassName: local-ssd-hetzner
|
|
resources: { requests: { storage: 50Gi } }
|
|
---
|
|
apiVersion: networking.k8s.io/v1
|
|
kind: Ingress
|
|
metadata:
|
|
name: gitea
|
|
namespace: scm
|
|
annotations: { cert-manager.io/cluster-issuer: letsencrypt-prod }
|
|
spec:
|
|
ingressClassName: nginx
|
|
tls: [{ hosts: ["gitea.betelgeusebytes.io"], secretName: gitea-tls }]
|
|
rules:
|
|
- host: gitea.betelgeusebytes.io
|
|
http:
|
|
paths:
|
|
- path: /
|
|
pathType: Prefix
|
|
backend: { service: { name: gitea, port: { number: 80 } } }
|
|
|
|
=== ./k8s/grafana/grafana.yaml ===
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata: { name: grafana, namespace: monitoring }
|
|
spec:
|
|
ports: [{ port: 80, targetPort: 3000 }]
|
|
selector: { app: grafana }
|
|
---
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata: { name: grafana, namespace: monitoring }
|
|
spec:
|
|
replicas: 1
|
|
selector: { matchLabels: { app: grafana } }
|
|
template:
|
|
metadata: { labels: { app: grafana } }
|
|
spec:
|
|
nodeSelector: { node: hetzner-2 }
|
|
containers:
|
|
- name: grafana
|
|
image: grafana/grafana:10.4.3
|
|
env:
|
|
- { name: GF_SECURITY_ADMIN_USER, value: admin }
|
|
- { name: GF_SECURITY_ADMIN_PASSWORD, value: "ADMINclaude-GRAFANA" }
|
|
ports: [{ containerPort: 3000 }]
|
|
---
|
|
apiVersion: networking.k8s.io/v1
|
|
kind: Ingress
|
|
metadata:
|
|
name: grafana
|
|
namespace: monitoring
|
|
annotations:
|
|
cert-manager.io/cluster-issuer: letsencrypt-prod
|
|
nginx.ingress.kubernetes.io/auth-type: basic
|
|
nginx.ingress.kubernetes.io/auth-secret: basic-auth-grafana
|
|
nginx.ingress.kubernetes.io/auth-realm: "Authentication Required"
|
|
spec:
|
|
ingressClassName: nginx
|
|
tls: [{ hosts: ["grafana.betelgeusebytes.io"], secretName: grafana-tls }]
|
|
rules:
|
|
- host: grafana.betelgeusebytes.io
|
|
http:
|
|
paths:
|
|
- path: /
|
|
pathType: Prefix
|
|
backend: { service: { name: grafana, port: { number: 80 } } }
|
|
|
|
=== ./k8s/ingress-patch/kustomization.yaml ===
|
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
|
kind: Kustomization
|
|
namespace: ingress-nginx
|
|
|
|
# Create the tcp-services ConfigMap from *quoted* literals
|
|
configMapGenerator:
|
|
- name: tcp-services
|
|
literals:
|
|
- "5432=db/postgres:5432"
|
|
- "7687=graph/neo4j:7687"
|
|
|
|
generatorOptions:
|
|
disableNameSuffixHash: true
|
|
|
|
# Inline JSON6902 patches
|
|
patches:
|
|
# 1) Add controller arg for tcp-services
|
|
- target:
|
|
group: apps
|
|
version: v1
|
|
kind: Deployment
|
|
name: ingress-nginx-controller
|
|
namespace: ingress-nginx
|
|
patch: |-
|
|
- op: add
|
|
path: /spec/template/spec/containers/0/args/-
|
|
value: --tcp-services-configmap=$(POD_NAMESPACE)/tcp-services
|
|
|
|
# 2) Expose Service ports 5432 and 7687 (keeps 80/443)
|
|
- target:
|
|
version: v1
|
|
kind: Service
|
|
name: ingress-nginx-controller
|
|
namespace: ingress-nginx
|
|
patch: |-
|
|
- op: add
|
|
path: /spec/ports/-
|
|
value:
|
|
name: tcp-5432
|
|
port: 5432
|
|
protocol: TCP
|
|
targetPort: 5432
|
|
- op: add
|
|
path: /spec/ports/-
|
|
value:
|
|
name: tcp-7687
|
|
port: 7687
|
|
protocol: TCP
|
|
targetPort: 7687
|
|
|
|
=== ./k8s/jupyter/jupyter.yaml ===
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata: { name: notebook, namespace: ml }
|
|
spec:
|
|
selector: { app: jupyterlab }
|
|
ports: [{ port: 80, targetPort: 8888 }]
|
|
---
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata: { name: jupyterlab, namespace: ml }
|
|
spec:
|
|
replicas: 1
|
|
selector: { matchLabels: { app: jupyterlab } }
|
|
template:
|
|
metadata: { labels: { app: jupyterlab } }
|
|
spec:
|
|
securityContext:
|
|
runAsUser: 1000
|
|
fsGroup: 100
|
|
nodeSelector: { node: hetzner-2 }
|
|
containers:
|
|
- name: jupyter
|
|
image: jupyter/base-notebook:latest
|
|
args: ["start-notebook.sh", "--NotebookApp.token=$(PASSWORD)"]
|
|
env:
|
|
- name: PASSWORD
|
|
valueFrom: { secretKeyRef: { name: jupyter-auth, key: PASSWORD } }
|
|
ports: [{ containerPort: 8888 }]
|
|
volumeMounts:
|
|
- { name: work, mountPath: /home/jovyan/work }
|
|
volumes:
|
|
- name: work
|
|
persistentVolumeClaim: { claimName: jupyter-pvc }
|
|
---
|
|
apiVersion: v1
|
|
kind: PersistentVolumeClaim
|
|
metadata: { name: jupyter-pvc, namespace: ml }
|
|
spec:
|
|
accessModes: ["ReadWriteOnce"]
|
|
storageClassName: local-ssd-hetzner
|
|
resources: { requests: { storage: 20Gi } }
|
|
---
|
|
apiVersion: v1
|
|
kind: Secret
|
|
metadata: { name: jupyter-auth, namespace: ml }
|
|
type: Opaque
|
|
stringData: { PASSWORD: "notebook" }
|
|
---
|
|
apiVersion: networking.k8s.io/v1
|
|
kind: Ingress
|
|
metadata:
|
|
name: notebook
|
|
namespace: ml
|
|
annotations:
|
|
cert-manager.io/cluster-issuer: letsencrypt-prod
|
|
# nginx.ingress.kubernetes.io/auth-type: basic
|
|
# nginx.ingress.kubernetes.io/auth-secret: basic-auth-notebook
|
|
# nginx.ingress.kubernetes.io/auth-realm: "Authentication Required"
|
|
spec:
|
|
ingressClassName: nginx
|
|
tls: [{ hosts: ["notebook.betelgeusebytes.io"], secretName: notebook-tls }]
|
|
rules:
|
|
- host: notebook.betelgeusebytes.io
|
|
http:
|
|
paths:
|
|
- path: /
|
|
pathType: Prefix
|
|
backend: { service: { name: notebook, port: { number: 80 } } }
|
|
|
|
=== ./k8s/kafka/kafka-pv.yaml ===
|
|
apiVersion: v1
|
|
kind: PersistentVolume
|
|
metadata:
|
|
name: pv-kafka
|
|
spec:
|
|
capacity:
|
|
storage: 50Gi
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
persistentVolumeReclaimPolicy: Retain
|
|
storageClassName: local-ssd-hetzner
|
|
local:
|
|
path: /mnt/local-ssd/kafka
|
|
nodeAffinity:
|
|
required:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: kubernetes.io/hostname
|
|
operator: In
|
|
values:
|
|
- hetzner-2
|
|
---
|
|
apiVersion: v1
|
|
kind: PersistentVolume
|
|
metadata:
|
|
name: pv-zookeeper-data
|
|
spec:
|
|
capacity:
|
|
storage: 10Gi
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
persistentVolumeReclaimPolicy: Retain
|
|
storageClassName: local-ssd-hetzner
|
|
local:
|
|
path: /mnt/local-ssd/zookeeper-data
|
|
nodeAffinity:
|
|
required:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: kubernetes.io/hostname
|
|
operator: In
|
|
values:
|
|
- hetzner-2
|
|
---
|
|
apiVersion: v1
|
|
kind: PersistentVolume
|
|
metadata:
|
|
name: pv-zookeeper-log
|
|
spec:
|
|
capacity:
|
|
storage: 10Gi
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
persistentVolumeReclaimPolicy: Retain
|
|
storageClassName: local-ssd-hetzner
|
|
local:
|
|
path: /mnt/local-ssd/zookeeper-log
|
|
nodeAffinity:
|
|
required:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: kubernetes.io/hostname
|
|
operator: In
|
|
values:
|
|
- hetzner-2
|
|
=== ./k8s/kafka/kafka-ui.yaml ===
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata: { name: kafka-ui, namespace: broker }
|
|
spec:
|
|
ports: [{ port: 80, targetPort: 8080 }]
|
|
selector: { app: kafka-ui }
|
|
---
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata: { name: kafka-ui, namespace: broker }
|
|
spec:
|
|
replicas: 1
|
|
selector: { matchLabels: { app: kafka-ui } }
|
|
template:
|
|
metadata: { labels: { app: kafka-ui } }
|
|
spec:
|
|
containers:
|
|
- name: ui
|
|
image: provectuslabs/kafka-ui:latest
|
|
env:
|
|
- { name: KAFKA_CLUSTERS_0_NAME, value: "local" }
|
|
- { name: KAFKA_CLUSTERS_0_BOOTSTRAPSERVERS, value: "kafka.broker.svc.cluster.local:9092" }
|
|
ports: [{ containerPort: 8080 }]
|
|
---
|
|
apiVersion: networking.k8s.io/v1
|
|
kind: Ingress
|
|
metadata:
|
|
name: kafka-ui
|
|
namespace: broker
|
|
annotations:
|
|
cert-manager.io/cluster-issuer: letsencrypt-prod
|
|
# nginx.ingress.kubernetes.io/auth-type: basic
|
|
# nginx.ingress.kubernetes.io/auth-secret: basic-auth-broker
|
|
# nginx.ingress.kubernetes.io/auth-realm: "Authentication Required"
|
|
spec:
|
|
ingressClassName: nginx
|
|
tls: [{ hosts: ["broker.betelgeusebytes.io"], secretName: broker-tls }]
|
|
rules:
|
|
- host: broker.betelgeusebytes.io
|
|
http:
|
|
paths:
|
|
- path: /
|
|
pathType: Prefix
|
|
backend: { service: { name: kafka-ui, port: { number: 80 } } }
|
|
|
|
=== ./k8s/kafka/kafka.yaml ===
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata: { name: kafka, namespace: broker }
|
|
spec:
|
|
ports: [{ name: kafka, port: 9092, targetPort: 9092 }]
|
|
selector: { app: kafka }
|
|
---
|
|
apiVersion: apps/v1
|
|
kind: StatefulSet
|
|
metadata: { name: kafka, namespace: broker }
|
|
spec:
|
|
serviceName: kafka
|
|
replicas: 1
|
|
selector: { matchLabels: { app: kafka } }
|
|
template:
|
|
metadata: { labels: { app: kafka } }
|
|
spec:
|
|
nodeSelector: { node: hetzner-2 }
|
|
containers:
|
|
- name: kafka
|
|
image: apache/kafka:latest
|
|
env:
|
|
- { name: KAFKA_NODE_ID, value: "1" }
|
|
- { name: KAFKA_PROCESS_ROLES, value: "broker,controller" }
|
|
- { name: KAFKA_LISTENERS, value: "PLAINTEXT://:9092,CONTROLLER://:9093" }
|
|
- { name: KAFKA_ADVERTISED_LISTENERS, value: "PLAINTEXT://kafka.broker.svc.cluster.local:9092" }
|
|
- { name: KAFKA_CONTROLLER_LISTENER_NAMES, value: "CONTROLLER" }
|
|
- { name: KAFKA_LISTENER_SECURITY_PROTOCOL_MAP, value: "CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT" }
|
|
- { name: KAFKA_CONTROLLER_QUORUM_VOTERS, value: "1@localhost:9093" }
|
|
- { name: KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR, value: "1" }
|
|
- { name: KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR, value: "1" }
|
|
- { name: KAFKA_TRANSACTION_STATE_LOG_MIN_ISR, value: "1" }
|
|
- { name: KAFKA_LOG_DIRS, value: "/var/lib/kafka/data" }
|
|
- { name: CLUSTER_ID, value: "MkU3OEVBNTcwNTJENDM2Qk" }
|
|
ports:
|
|
- { containerPort: 9092 }
|
|
- { containerPort: 9093 }
|
|
volumeMounts:
|
|
- { name: data, mountPath: /var/lib/kafka/data }
|
|
volumeClaimTemplates:
|
|
- metadata: { name: data }
|
|
spec:
|
|
accessModes: ["ReadWriteOnce"]
|
|
storageClassName: local-ssd-hetzner
|
|
resources: { requests: { storage: 50Gi } }
|
|
|
|
=== ./k8s/label_studio/label.yaml ===
|
|
# k8s/ai/label-studio/secret-pg.yaml
|
|
apiVersion: v1
|
|
kind: Secret
|
|
metadata: { name: labelstudio-pg, namespace: ml }
|
|
type: Opaque
|
|
stringData: { POSTGRES_PASSWORD: "admin" }
|
|
|
|
---
|
|
# k8s/ai/label-studio/secret-minio.yaml
|
|
apiVersion: v1
|
|
kind: Secret
|
|
metadata: { name: minio-label, namespace: ml }
|
|
type: Opaque
|
|
stringData:
|
|
accesskey: "minioadmin"
|
|
secretkey: "minioadmin"
|
|
|
|
---
|
|
# k8s/ai/label-studio/deploy.yaml
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata: { name: label-studio, namespace: ml }
|
|
spec:
|
|
replicas: 1
|
|
selector: { matchLabels: { app: label-studio } }
|
|
template:
|
|
metadata: { labels: { app: label-studio } }
|
|
spec:
|
|
containers:
|
|
- name: app
|
|
image: heartexlabs/label-studio:latest
|
|
env:
|
|
- { name: POSTGRE_NAME, value: "labelstudio" }
|
|
- { name: POSTGRE_USER, value: "admin" }
|
|
- name: POSTGRE_PASSWORD
|
|
valueFrom: { secretKeyRef: { name: labelstudio-pg, key: POSTGRES_PASSWORD } }
|
|
- { name: POSTGRE_HOST, value: "postgres.db.svc.cluster.local" }
|
|
- { name: POSTGRE_PORT, value: "5432" }
|
|
- { name: S3_ENDPOINT, value: "https://minio.betelgeusebytes.io" }
|
|
- name: AWS_ACCESS_KEY_ID
|
|
valueFrom: { secretKeyRef: { name: minio-label, key: accesskey } }
|
|
- name: AWS_SECRET_ACCESS_KEY
|
|
valueFrom: { secretKeyRef: { name: minio-label, key: secretkey } }
|
|
- name: ALLOWED_HOSTS
|
|
value: "label.betelgeusebytes.io"
|
|
- name: CSRF_TRUSTED_ORIGINS
|
|
value: "https://label.betelgeusebytes.io"
|
|
- name: CSRF_COOKIE_SECURE
|
|
value: "1"
|
|
- name: SESSION_COOKIE_SECURE
|
|
value: "1"
|
|
ports: [{ containerPort: 8080 }]
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata: { name: label-studio, namespace: ml }
|
|
spec: { selector: { app: label-studio }, ports: [ { port: 80, targetPort: 8080 } ] }
|
|
---
|
|
apiVersion: networking.k8s.io/v1
|
|
kind: Ingress
|
|
metadata:
|
|
name: label-studio
|
|
namespace: ml
|
|
annotations: { cert-manager.io/cluster-issuer: letsencrypt-prod }
|
|
spec:
|
|
ingressClassName: nginx
|
|
tls: [{ hosts: ["label.betelgeusebytes.io"], secretName: label-tls }]
|
|
rules:
|
|
- host: label.betelgeusebytes.io
|
|
http:
|
|
paths:
|
|
- path: /
|
|
pathType: Prefix
|
|
backend: { service: { name: label-studio, port: { number: 80 } } }
|
|
|
|
=== ./k8s/minio/minio.yaml ===
|
|
apiVersion: v1
|
|
kind: Namespace
|
|
metadata: { name: storage }
|
|
---
|
|
# k8s/storage/minio/secret.yaml
|
|
apiVersion: v1
|
|
kind: Secret
|
|
metadata: { name: minio-root, namespace: storage }
|
|
type: Opaque
|
|
stringData:
|
|
MINIO_ROOT_USER: "minioadmin"
|
|
MINIO_ROOT_PASSWORD: "minioadmin"
|
|
|
|
---
|
|
# k8s/storage/minio/pvc.yaml
|
|
apiVersion: v1
|
|
kind: PersistentVolumeClaim
|
|
metadata: { name: minio-data, namespace: storage }
|
|
spec:
|
|
accessModes: ["ReadWriteOnce"]
|
|
storageClassName: local-ssd-hetzner
|
|
resources: { requests: { storage: 20Gi } }
|
|
|
|
---
|
|
# k8s/storage/minio/deploy.yaml
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata: { name: minio, namespace: storage }
|
|
spec:
|
|
replicas: 1
|
|
selector: { matchLabels: { app: minio } }
|
|
template:
|
|
metadata: { labels: { app: minio } }
|
|
spec:
|
|
containers:
|
|
- name: minio
|
|
image: minio/minio:latest
|
|
args: ["server","/data","--console-address",":9001"]
|
|
envFrom: [{ secretRef: { name: minio-root } }]
|
|
ports:
|
|
- { containerPort: 9000 } # S3
|
|
- { containerPort: 9001 } # Console
|
|
volumeMounts:
|
|
- { name: data, mountPath: /data }
|
|
volumes:
|
|
- name: data
|
|
persistentVolumeClaim: { claimName: minio-data }
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata: { name: minio, namespace: storage }
|
|
spec:
|
|
selector: { app: minio }
|
|
ports:
|
|
- { name: s3, port: 9000, targetPort: 9000 }
|
|
- { name: console, port: 9001, targetPort: 9001 }
|
|
---
|
|
apiVersion: networking.k8s.io/v1
|
|
kind: Ingress
|
|
metadata:
|
|
name: minio
|
|
namespace: storage
|
|
annotations: { cert-manager.io/cluster-issuer: letsencrypt-prod }
|
|
spec:
|
|
ingressClassName: nginx
|
|
tls: [{ hosts: ["minio.betelgeusebytes.io"], secretName: minio-tls }]
|
|
rules:
|
|
- host: minio.betelgeusebytes.io
|
|
http:
|
|
paths:
|
|
- path: /
|
|
pathType: Prefix
|
|
backend: { service: { name: minio, port: { number: 9001 } } }
|
|
---
|
|
# PV
|
|
apiVersion: v1
|
|
kind: PersistentVolume
|
|
metadata:
|
|
name: pv-minio
|
|
spec:
|
|
capacity:
|
|
storage: 20Gi
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
persistentVolumeReclaimPolicy: Retain
|
|
storageClassName: local-ssd-hetzner
|
|
local:
|
|
path: /mnt/local-ssd/minio
|
|
nodeAffinity:
|
|
required:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: kubernetes.io/hostname
|
|
operator: In
|
|
values:
|
|
- hetzner-2
|
|
=== ./k8s/mlflow/mlflow.yaml ===
|
|
# k8s/mlops/mlflow/secret-pg.yaml
|
|
apiVersion: v1
|
|
kind: Secret
|
|
metadata: { name: mlflow-pg, namespace: ml }
|
|
type: Opaque
|
|
stringData: { POSTGRES_PASSWORD: "pa$$word" }
|
|
|
|
---
|
|
# k8s/mlops/mlflow/secret-minio.yaml
|
|
apiVersion: v1
|
|
kind: Secret
|
|
metadata: { name: mlflow-minio, namespace: ml }
|
|
type: Opaque
|
|
stringData:
|
|
accesskey: "minioadmin"
|
|
secretkey: "minioadmin"
|
|
|
|
---
|
|
# k8s/mlops/mlflow/deploy.yaml
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata: { name: mlflow, namespace: ml }
|
|
spec:
|
|
replicas: 1
|
|
selector: { matchLabels: { app: mlflow } }
|
|
template:
|
|
metadata: { labels: { app: mlflow } }
|
|
spec:
|
|
containers:
|
|
- name: mlflow
|
|
# image: ghcr.io/mlflow/mlflow:v3.6.0
|
|
image: axxs/mlflow-pg
|
|
env:
|
|
- { name: MLFLOW_BACKEND_STORE_URI,
|
|
value: "postgresql://admin:admin@postgres.db.svc.cluster.local:5432/mlflow" }
|
|
- { name: POSTGRES_PASSWORD, valueFrom: { secretKeyRef: { name: mlflow-pg, key: POSTGRES_PASSWORD } } }
|
|
- { name: MLFLOW_S3_ENDPOINT_URL, value: "https://minio.betelgeusebytes.io" }
|
|
- { name: AWS_ACCESS_KEY_ID, valueFrom: { secretKeyRef: { name: mlflow-minio, key: accesskey } } }
|
|
- { name: AWS_SECRET_ACCESS_KEY, valueFrom: { secretKeyRef: { name: mlflow-minio, key: secretkey } } }
|
|
args: ["mlflow","server","--host","0.0.0.0","--port","5000","--artifacts-destination","s3://mlflow", "--allowed-hosts", "*.betelgeusebytes.io"]
|
|
ports: [{ containerPort: 5000 }]
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata: { name: mlflow, namespace: ml }
|
|
spec: { selector: { app: mlflow }, ports: [ { port: 80, targetPort: 5000 } ] }
|
|
---
|
|
apiVersion: networking.k8s.io/v1
|
|
kind: Ingress
|
|
metadata:
|
|
name: mlflow
|
|
namespace: ml
|
|
annotations: { cert-manager.io/cluster-issuer: letsencrypt-prod }
|
|
spec:
|
|
ingressClassName: nginx
|
|
tls: [{ hosts: ["mlflow.betelgeusebytes.io"], secretName: mlflow-tls }]
|
|
rules:
|
|
- host: mlflow.betelgeusebytes.io
|
|
http:
|
|
paths:
|
|
- path: /
|
|
pathType: Prefix
|
|
backend: { service: { name: mlflow, port: { number: 80 } } }
|
|
|
|
|
|
=== ./k8s/neo4j/neo4j-pv.yaml ===
|
|
apiVersion: v1
|
|
kind: PersistentVolume
|
|
metadata:
|
|
name: pv-neo4j
|
|
spec:
|
|
capacity:
|
|
storage: 20Gi
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
persistentVolumeReclaimPolicy: Retain
|
|
storageClassName: local-ssd-hetzner
|
|
local:
|
|
path: /mnt/local-ssd/neo4j
|
|
nodeAffinity:
|
|
required:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: kubernetes.io/hostname
|
|
operator: In
|
|
values:
|
|
- hetzner-2
|
|
=== ./k8s/neo4j/neo4j.yaml ===
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata: { name: neo4j, namespace: graph }
|
|
spec:
|
|
selector: { app: neo4j }
|
|
ports:
|
|
- { name: http, port: 7474, targetPort: 7474 }
|
|
- { name: bolt, port: 7687, targetPort: 7687 }
|
|
---
|
|
apiVersion: apps/v1
|
|
kind: StatefulSet
|
|
metadata: { name: neo4j, namespace: graph }
|
|
spec:
|
|
serviceName: neo4j
|
|
replicas: 1
|
|
selector: { matchLabels: { app: neo4j } }
|
|
template:
|
|
metadata: { labels: { app: neo4j } }
|
|
spec:
|
|
enableServiceLinks: false
|
|
nodeSelector: { node: hetzner-2 }
|
|
containers:
|
|
- name: neo4j
|
|
image: neo4j:5.20
|
|
env:
|
|
- name: NEO4J_AUTH
|
|
valueFrom: { secretKeyRef: { name: neo4j-auth, key: NEO4J_AUTH } }
|
|
- name: NEO4J_dbms_ssl_policy_bolt_enabled
|
|
value: "true"
|
|
- name: NEO4J_dbms_ssl_policy_bolt_base__directory
|
|
value: "/certs/bolt"
|
|
- name: NEO4J_dbms_ssl_policy_bolt_private__key
|
|
value: "tls.key"
|
|
- name: NEO4J_dbms_ssl_policy_bolt_public__certificate
|
|
value: "tls.crt"
|
|
- name: NEO4J_dbms_connector_bolt_tls__level
|
|
value: "REQUIRED"
|
|
# Advertise public hostname so the Browser uses the external FQDN for Bolt
|
|
- name: NEO4J_dbms_connector_bolt_advertised__address
|
|
value: "neo4j.betelgeusebytes.io:7687"
|
|
# also set a default advertised address (recommended)
|
|
- name: NEO4J_dbms_default__advertised__address
|
|
value: "neo4j.betelgeusebytes.io"
|
|
ports:
|
|
- { containerPort: 7474 }
|
|
- { containerPort: 7687 }
|
|
volumeMounts:
|
|
- { name: data, mountPath: /data }
|
|
- { name: bolt-certs, mountPath: /certs/bolt }
|
|
volumes:
|
|
- name: bolt-certs
|
|
secret:
|
|
secretName: neo4j-tls
|
|
items:
|
|
- key: tls.crt
|
|
path: tls.crt
|
|
- key: tls.key
|
|
path: tls.key
|
|
volumeClaimTemplates:
|
|
- metadata: { name: data }
|
|
spec:
|
|
accessModes: ["ReadWriteOnce"]
|
|
storageClassName: local-ssd-hetzner
|
|
resources: { requests: { storage: 20Gi } }
|
|
---
|
|
apiVersion: v1
|
|
kind: Secret
|
|
metadata: { name: neo4j-auth, namespace: graph }
|
|
type: Opaque
|
|
stringData: { NEO4J_AUTH: "neo4j/NEO4J-PASS" }
|
|
---
|
|
apiVersion: networking.k8s.io/v1
|
|
kind: Ingress
|
|
metadata:
|
|
name: neo4j-http
|
|
namespace: graph
|
|
annotations:
|
|
cert-manager.io/cluster-issuer: letsencrypt-prod
|
|
# nginx.ingress.kubernetes.io/auth-type: basic
|
|
# nginx.ingress.kubernetes.io/auth-secret: basic-auth-neo4j
|
|
# nginx.ingress.kubernetes.io/auth-realm: "Authentication Required"
|
|
spec:
|
|
ingressClassName: nginx
|
|
tls: [{ hosts: ["neo4j.betelgeusebytes.io"], secretName: neo4j-tls }]
|
|
rules:
|
|
- host: neo4j.betelgeusebytes.io
|
|
http:
|
|
paths:
|
|
- path: /
|
|
pathType: Prefix
|
|
backend: { service: { name: neo4j, port: { number: 7474 } } }
|
|
|
|
# create or update the tcp-services configmap
|
|
# kubectl -n ingress-nginx create configmap tcp-services \
|
|
# --from-literal="7687=graph/neo4j:7687" \
|
|
# -o yaml --dry-run=client | kubectl apply -f -
|
|
|
|
# kubectl -n ingress-nginx patch deploy ingress-nginx-controller \
|
|
# --type='json' -p='[{"op":"add","path":"/spec/template/spec/containers/0/args/-","value":"--tcp-services-configmap=$(POD_NAMESPACE)/tcp-services"}]'
|
|
|
|
# kubectl -n ingress-nginx patch deploy ingress-nginx-controller \
|
|
# --type='json' -p='[{"op":"add","path":"/spec/template/spec/containers/0/args/-","value":"--tcp-services-configmap=$(POD_NAMESPACE)/tcp-services"}]'
|
|
|
|
# kubectl -n ingress-nginx patch deployment ingress-nginx-controller \
|
|
# --type='json' -p='[
|
|
# {"op":"add","path":"/spec/template/spec/containers/0/ports/-","value":{"name":"tcp-7687","containerPort":7687,"hostPort":7687,"protocol":"TCP"}}
|
|
# ]'
|
|
=== ./k8s/observability/fluent-bit.yaml ===
|
|
apiVersion: v1
|
|
kind: ServiceAccount
|
|
metadata: { name: fluent-bit, namespace: observability }
|
|
---
|
|
apiVersion: rbac.authorization.k8s.io/v1
|
|
kind: ClusterRole
|
|
metadata: { name: fluent-bit-read }
|
|
rules:
|
|
- apiGroups: [""]
|
|
resources: ["pods", "namespaces"]
|
|
verbs: ["get", "list", "watch"]
|
|
---
|
|
apiVersion: rbac.authorization.k8s.io/v1
|
|
kind: ClusterRoleBinding
|
|
metadata: { name: fluent-bit-read }
|
|
roleRef:
|
|
apiGroup: rbac.authorization.k8s.io
|
|
kind: ClusterRole
|
|
name: fluent-bit-read
|
|
subjects:
|
|
- kind: ServiceAccount
|
|
name: fluent-bit
|
|
namespace: observability
|
|
---
|
|
apiVersion: apps/v1
|
|
kind: DaemonSet
|
|
metadata: { name: fluent-bit, namespace: observability }
|
|
spec:
|
|
selector: { matchLabels: { app: fluent-bit } }
|
|
template:
|
|
metadata: { labels: { app: fluent-bit } }
|
|
spec:
|
|
serviceAccountName: fluent-bit
|
|
containers:
|
|
- name: fluent-bit
|
|
image: cr.fluentbit.io/fluent/fluent-bit:2.2.2
|
|
volumeMounts:
|
|
- { name: varlog, mountPath: /var/log }
|
|
- { name: containers, mountPath: /var/lib/docker/containers, readOnly: true }
|
|
env:
|
|
- { name: FLUENT_ELASTICSEARCH_HOST, value: elasticsearch.elastic.svc.cluster.local }
|
|
- { name: FLUENT_ELASTICSEARCH_PORT, value: "9200" }
|
|
args: ["-i","tail","-p","path=/var/log/containers/*.log","-F","kubernetes","-o","es","-p","host=${FLUENT_ELASTICSEARCH_HOST}","-p","port=${FLUENT_ELASTICSEARCH_PORT}","-p","logstash_format=On","-p","logstash_prefix=k8s-logs"]
|
|
volumes:
|
|
- { name: varlog, hostPath: { path: /var/log } }
|
|
- { name: containers, hostPath: { path: /var/lib/docker/containers, type: DirectoryOrCreate } }
|
|
|
|
=== ./k8s/observability-stack/00-namespace.yaml ===
|
|
apiVersion: v1
|
|
kind: Namespace
|
|
metadata:
|
|
name: observability
|
|
labels:
|
|
name: observability
|
|
monitoring: "true"
|
|
|
|
=== ./k8s/observability-stack/01-persistent-volumes.yaml ===
|
|
---
|
|
# Prometheus PV
|
|
apiVersion: v1
|
|
kind: PersistentVolume
|
|
metadata:
|
|
name: prometheus-data-pv
|
|
spec:
|
|
capacity:
|
|
storage: 50Gi
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
persistentVolumeReclaimPolicy: Retain
|
|
storageClassName: local-storage
|
|
local:
|
|
path: /mnt/local-ssd/prometheus
|
|
nodeAffinity:
|
|
required:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: kubernetes.io/hostname
|
|
operator: In
|
|
values:
|
|
- hetzner-2
|
|
|
|
---
|
|
# Loki PV
|
|
apiVersion: v1
|
|
kind: PersistentVolume
|
|
metadata:
|
|
name: loki-data-pv
|
|
spec:
|
|
capacity:
|
|
storage: 100Gi
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
persistentVolumeReclaimPolicy: Retain
|
|
storageClassName: local-storage
|
|
local:
|
|
path: /mnt/local-ssd/loki
|
|
nodeAffinity:
|
|
required:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: kubernetes.io/hostname
|
|
operator: In
|
|
values:
|
|
- hetzner-2
|
|
|
|
---
|
|
# Tempo PV
|
|
apiVersion: v1
|
|
kind: PersistentVolume
|
|
metadata:
|
|
name: tempo-data-pv
|
|
spec:
|
|
capacity:
|
|
storage: 50Gi
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
persistentVolumeReclaimPolicy: Retain
|
|
storageClassName: local-storage
|
|
local:
|
|
path: /mnt/local-ssd/tempo
|
|
nodeAffinity:
|
|
required:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: kubernetes.io/hostname
|
|
operator: In
|
|
values:
|
|
- hetzner-2
|
|
|
|
---
|
|
# Grafana PV
|
|
apiVersion: v1
|
|
kind: PersistentVolume
|
|
metadata:
|
|
name: grafana-data-pv
|
|
spec:
|
|
capacity:
|
|
storage: 10Gi
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
persistentVolumeReclaimPolicy: Retain
|
|
storageClassName: local-storage
|
|
local:
|
|
path: /mnt/local-ssd/grafana
|
|
nodeAffinity:
|
|
required:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: kubernetes.io/hostname
|
|
operator: In
|
|
values:
|
|
- hetzner-2
|
|
|
|
=== ./k8s/observability-stack/02-persistent-volume-claims.yaml ===
|
|
---
|
|
apiVersion: v1
|
|
kind: PersistentVolumeClaim
|
|
metadata:
|
|
name: prometheus-data
|
|
namespace: observability
|
|
spec:
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
storageClassName: local-storage
|
|
resources:
|
|
requests:
|
|
storage: 50Gi
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: PersistentVolumeClaim
|
|
metadata:
|
|
name: loki-data
|
|
namespace: observability
|
|
spec:
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
storageClassName: local-storage
|
|
resources:
|
|
requests:
|
|
storage: 100Gi
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: PersistentVolumeClaim
|
|
metadata:
|
|
name: tempo-data
|
|
namespace: observability
|
|
spec:
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
storageClassName: local-storage
|
|
resources:
|
|
requests:
|
|
storage: 50Gi
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: PersistentVolumeClaim
|
|
metadata:
|
|
name: grafana-data
|
|
namespace: observability
|
|
spec:
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
storageClassName: local-storage
|
|
resources:
|
|
requests:
|
|
storage: 10Gi
|
|
|
|
=== ./k8s/observability-stack/03-prometheus-config.yaml ===
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: prometheus-config
|
|
namespace: observability
|
|
data:
|
|
prometheus.yml: |
|
|
global:
|
|
scrape_interval: 15s
|
|
evaluation_interval: 15s
|
|
external_labels:
|
|
cluster: 'betelgeuse-k8s'
|
|
environment: 'production'
|
|
|
|
# Alerting configuration (optional - can add alertmanager later)
|
|
alerting:
|
|
alertmanagers:
|
|
- static_configs:
|
|
- targets: []
|
|
|
|
# Rule files
|
|
rule_files:
|
|
- /etc/prometheus/rules/*.yml
|
|
|
|
scrape_configs:
|
|
# Scrape Prometheus itself
|
|
- job_name: 'prometheus'
|
|
static_configs:
|
|
- targets: ['localhost:9090']
|
|
|
|
# Kubernetes API server
|
|
- job_name: 'kubernetes-apiservers'
|
|
kubernetes_sd_configs:
|
|
- role: endpoints
|
|
scheme: https
|
|
tls_config:
|
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
relabel_configs:
|
|
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
|
|
action: keep
|
|
regex: default;kubernetes;https
|
|
|
|
# Kubernetes nodes
|
|
- job_name: 'kubernetes-nodes'
|
|
kubernetes_sd_configs:
|
|
- role: node
|
|
scheme: https
|
|
tls_config:
|
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
relabel_configs:
|
|
- action: labelmap
|
|
regex: __meta_kubernetes_node_label_(.+)
|
|
- target_label: __address__
|
|
replacement: kubernetes.default.svc:443
|
|
- source_labels: [__meta_kubernetes_node_name]
|
|
regex: (.+)
|
|
target_label: __metrics_path__
|
|
replacement: /api/v1/nodes/${1}/proxy/metrics
|
|
|
|
# Kubernetes nodes cadvisor
|
|
- job_name: 'kubernetes-cadvisor'
|
|
kubernetes_sd_configs:
|
|
- role: node
|
|
scheme: https
|
|
tls_config:
|
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
relabel_configs:
|
|
- action: labelmap
|
|
regex: __meta_kubernetes_node_label_(.+)
|
|
- target_label: __address__
|
|
replacement: kubernetes.default.svc:443
|
|
- source_labels: [__meta_kubernetes_node_name]
|
|
regex: (.+)
|
|
target_label: __metrics_path__
|
|
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
|
|
|
|
# Kubernetes service endpoints
|
|
- job_name: 'kubernetes-service-endpoints'
|
|
kubernetes_sd_configs:
|
|
- role: endpoints
|
|
relabel_configs:
|
|
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
|
|
action: keep
|
|
regex: true
|
|
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
|
|
action: replace
|
|
target_label: __scheme__
|
|
regex: (https?)
|
|
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
|
|
action: replace
|
|
target_label: __metrics_path__
|
|
regex: (.+)
|
|
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
|
|
action: replace
|
|
target_label: __address__
|
|
regex: ([^:]+)(?::\d+)?;(\d+)
|
|
replacement: $1:$2
|
|
- action: labelmap
|
|
regex: __meta_kubernetes_service_label_(.+)
|
|
- source_labels: [__meta_kubernetes_namespace]
|
|
action: replace
|
|
target_label: kubernetes_namespace
|
|
- source_labels: [__meta_kubernetes_service_name]
|
|
action: replace
|
|
target_label: kubernetes_name
|
|
- source_labels: [__meta_kubernetes_pod_name]
|
|
action: replace
|
|
target_label: kubernetes_pod_name
|
|
|
|
# Kubernetes pods
|
|
- job_name: 'kubernetes-pods'
|
|
kubernetes_sd_configs:
|
|
- role: pod
|
|
relabel_configs:
|
|
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
|
|
action: keep
|
|
regex: true
|
|
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
|
|
action: replace
|
|
target_label: __metrics_path__
|
|
regex: (.+)
|
|
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
|
|
action: replace
|
|
regex: ([^:]+)(?::\d+)?;(\d+)
|
|
replacement: $1:$2
|
|
target_label: __address__
|
|
- action: labelmap
|
|
regex: __meta_kubernetes_pod_label_(.+)
|
|
- source_labels: [__meta_kubernetes_namespace]
|
|
action: replace
|
|
target_label: kubernetes_namespace
|
|
- source_labels: [__meta_kubernetes_pod_name]
|
|
action: replace
|
|
target_label: kubernetes_pod_name
|
|
|
|
# kube-state-metrics
|
|
- job_name: 'kube-state-metrics'
|
|
static_configs:
|
|
- targets: ['kube-state-metrics.observability.svc.cluster.local:8080']
|
|
|
|
# node-exporter
|
|
- job_name: 'node-exporter'
|
|
kubernetes_sd_configs:
|
|
- role: pod
|
|
relabel_configs:
|
|
- source_labels: [__meta_kubernetes_pod_label_app]
|
|
action: keep
|
|
regex: node-exporter
|
|
- source_labels: [__meta_kubernetes_pod_node_name]
|
|
action: replace
|
|
target_label: instance
|
|
|
|
# Grafana Loki
|
|
- job_name: 'loki'
|
|
static_configs:
|
|
- targets: ['loki.observability.svc.cluster.local:3100']
|
|
|
|
# Grafana Tempo
|
|
- job_name: 'tempo'
|
|
static_configs:
|
|
- targets: ['tempo.observability.svc.cluster.local:3200']
|
|
|
|
# Grafana
|
|
- job_name: 'grafana'
|
|
static_configs:
|
|
- targets: ['grafana.observability.svc.cluster.local:3000']
|
|
|
|
=== ./k8s/observability-stack/04-loki-config.yaml ===
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: loki-config
|
|
namespace: observability
|
|
data:
|
|
loki.yaml: |
|
|
auth_enabled: false
|
|
|
|
server:
|
|
http_listen_port: 3100
|
|
grpc_listen_port: 9096
|
|
log_level: info
|
|
|
|
common:
|
|
path_prefix: /loki
|
|
storage:
|
|
filesystem:
|
|
chunks_directory: /loki/chunks
|
|
rules_directory: /loki/rules
|
|
replication_factor: 1
|
|
ring:
|
|
kvstore:
|
|
store: inmemory
|
|
|
|
schema_config:
|
|
configs:
|
|
- from: 2024-01-01
|
|
store: tsdb
|
|
object_store: filesystem
|
|
schema: v13
|
|
index:
|
|
prefix: index_
|
|
period: 24h
|
|
|
|
storage_config:
|
|
tsdb_shipper:
|
|
active_index_directory: /loki/tsdb-index
|
|
cache_location: /loki/tsdb-cache
|
|
filesystem:
|
|
directory: /loki/chunks
|
|
|
|
compactor:
|
|
working_directory: /loki/compactor
|
|
compaction_interval: 10m
|
|
retention_enabled: false
|
|
|
|
limits_config:
|
|
reject_old_samples: true
|
|
reject_old_samples_max_age: 168h # 7 days
|
|
retention_period: 168h # 7 days
|
|
max_query_length: 721h # 30 days for queries
|
|
max_query_parallelism: 32
|
|
max_streams_per_user: 0
|
|
max_global_streams_per_user: 0
|
|
ingestion_rate_mb: 50
|
|
ingestion_burst_size_mb: 100
|
|
per_stream_rate_limit: 10MB
|
|
per_stream_rate_limit_burst: 20MB
|
|
split_queries_by_interval: 15m
|
|
|
|
query_range:
|
|
align_queries_with_step: true
|
|
cache_results: true
|
|
results_cache:
|
|
cache:
|
|
embedded_cache:
|
|
enabled: true
|
|
max_size_mb: 500
|
|
|
|
frontend:
|
|
log_queries_longer_than: 5s
|
|
compress_responses: true
|
|
|
|
query_scheduler:
|
|
max_outstanding_requests_per_tenant: 2048
|
|
|
|
ingester:
|
|
chunk_idle_period: 30m
|
|
chunk_block_size: 262144
|
|
chunk_encoding: snappy
|
|
chunk_retain_period: 1m
|
|
max_chunk_age: 2h
|
|
wal:
|
|
enabled: true
|
|
dir: /loki/wal
|
|
flush_on_shutdown: true
|
|
replay_memory_ceiling: 1GB
|
|
|
|
analytics:
|
|
reporting_enabled: false
|
|
=== ./k8s/observability-stack/05-tempo-config.yaml ===
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: tempo-config
|
|
namespace: observability
|
|
data:
|
|
tempo.yaml: |
|
|
server:
|
|
http_listen_port: 3200
|
|
log_level: info
|
|
|
|
distributor:
|
|
receivers:
|
|
jaeger:
|
|
protocols:
|
|
thrift_http:
|
|
endpoint: 0.0.0.0:14268
|
|
grpc:
|
|
endpoint: 0.0.0.0:14250
|
|
zipkin:
|
|
endpoint: 0.0.0.0:9411
|
|
otlp:
|
|
protocols:
|
|
http:
|
|
endpoint: 0.0.0.0:4318
|
|
grpc:
|
|
endpoint: 0.0.0.0:4317
|
|
|
|
ingester:
|
|
max_block_duration: 5m
|
|
|
|
compactor:
|
|
compaction:
|
|
block_retention: 168h # 7 days
|
|
|
|
metrics_generator:
|
|
registry:
|
|
external_labels:
|
|
source: tempo
|
|
cluster: betelgeuse-k8s
|
|
storage:
|
|
path: /var/tempo/generator/wal
|
|
remote_write:
|
|
- url: http://prometheus.observability.svc.cluster.local:9090/api/v1/write
|
|
send_exemplars: true
|
|
|
|
storage:
|
|
trace:
|
|
backend: local
|
|
wal:
|
|
path: /var/tempo/wal
|
|
local:
|
|
path: /var/tempo/blocks
|
|
pool:
|
|
max_workers: 100
|
|
queue_depth: 10000
|
|
|
|
# Single instance mode - no need for frontend/querier split
|
|
query_frontend:
|
|
search:
|
|
duration_slo: 5s
|
|
throughput_bytes_slo: 1.073741824e+09
|
|
trace_by_id:
|
|
duration_slo: 5s
|
|
|
|
overrides:
|
|
defaults:
|
|
metrics_generator:
|
|
processors: [service-graphs, span-metrics]
|
|
=== ./k8s/observability-stack/06-alloy-config.yaml ===
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: alloy-config
|
|
namespace: observability
|
|
data:
|
|
config.alloy: |
|
|
// Logging configuration
|
|
logging {
|
|
level = "info"
|
|
format = "logfmt"
|
|
}
|
|
|
|
// Discover Kubernetes pods for log collection
|
|
discovery.kubernetes "pods" {
|
|
role = "pod"
|
|
}
|
|
|
|
// Discover Kubernetes nodes
|
|
discovery.kubernetes "nodes" {
|
|
role = "node"
|
|
}
|
|
|
|
// Relabel pods for log collection
|
|
discovery.relabel "pod_logs" {
|
|
targets = discovery.kubernetes.pods.targets
|
|
|
|
// Only scrape pods with logs
|
|
rule {
|
|
source_labels = ["__meta_kubernetes_pod_container_name"]
|
|
action = "keep"
|
|
regex = ".+"
|
|
}
|
|
|
|
// Set the log path
|
|
rule {
|
|
source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"]
|
|
target_label = "__path__"
|
|
separator = "/"
|
|
replacement = "/var/log/pods/*$1/*.log"
|
|
}
|
|
|
|
// Set namespace label
|
|
rule {
|
|
source_labels = ["__meta_kubernetes_namespace"]
|
|
target_label = "namespace"
|
|
}
|
|
|
|
// Set pod name label
|
|
rule {
|
|
source_labels = ["__meta_kubernetes_pod_name"]
|
|
target_label = "pod"
|
|
}
|
|
|
|
// Set container name label
|
|
rule {
|
|
source_labels = ["__meta_kubernetes_pod_container_name"]
|
|
target_label = "container"
|
|
}
|
|
|
|
// Set node name label
|
|
rule {
|
|
source_labels = ["__meta_kubernetes_pod_node_name"]
|
|
target_label = "node"
|
|
}
|
|
|
|
// Copy all pod labels
|
|
rule {
|
|
action = "labelmap"
|
|
regex = "__meta_kubernetes_pod_label_(.+)"
|
|
}
|
|
}
|
|
|
|
// Read logs from discovered pods
|
|
loki.source.kubernetes "pod_logs" {
|
|
targets = discovery.relabel.pod_logs.output
|
|
forward_to = [loki.process.pod_logs.receiver]
|
|
}
|
|
|
|
// Process and enrich logs
|
|
loki.process "pod_logs" {
|
|
forward_to = [loki.write.local.receiver]
|
|
|
|
// Parse JSON logs
|
|
stage.json {
|
|
expressions = {
|
|
level = "level",
|
|
message = "message",
|
|
timestamp = "timestamp",
|
|
}
|
|
}
|
|
|
|
// Extract log level
|
|
stage.labels {
|
|
values = {
|
|
level = "",
|
|
}
|
|
}
|
|
|
|
// Add cluster label
|
|
stage.static_labels {
|
|
values = {
|
|
cluster = "betelgeuse-k8s",
|
|
}
|
|
}
|
|
}
|
|
|
|
// Write logs to Loki
|
|
loki.write "local" {
|
|
endpoint {
|
|
url = "http://loki.observability.svc.cluster.local:3100/loki/api/v1/push"
|
|
}
|
|
}
|
|
|
|
// OpenTelemetry receiver for traces
|
|
otelcol.receiver.otlp "default" {
|
|
grpc {
|
|
endpoint = "0.0.0.0:4317"
|
|
}
|
|
|
|
http {
|
|
endpoint = "0.0.0.0:4318"
|
|
}
|
|
|
|
output {
|
|
traces = [otelcol.exporter.otlp.tempo.input]
|
|
}
|
|
}
|
|
|
|
// Export traces to Tempo
|
|
otelcol.exporter.otlp "tempo" {
|
|
client {
|
|
endpoint = "tempo.observability.svc.cluster.local:4317"
|
|
tls {
|
|
insecure = true
|
|
}
|
|
}
|
|
}
|
|
|
|
// Scrape local metrics (Alloy's own metrics)
|
|
// Prometheus will scrape these via service discovery
|
|
prometheus.exporter.self "alloy" {
|
|
}
|
|
=== ./k8s/observability-stack/07-grafana-datasources.yaml ===
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: grafana-datasources
|
|
namespace: observability
|
|
data:
|
|
datasources.yaml: |
|
|
apiVersion: 1
|
|
datasources:
|
|
# Prometheus
|
|
- name: Prometheus
|
|
type: prometheus
|
|
access: proxy
|
|
url: http://prometheus.observability.svc.cluster.local:9090
|
|
isDefault: true
|
|
editable: true
|
|
jsonData:
|
|
timeInterval: 15s
|
|
queryTimeout: 60s
|
|
httpMethod: POST
|
|
|
|
# Loki
|
|
- name: Loki
|
|
type: loki
|
|
access: proxy
|
|
url: http://loki.observability.svc.cluster.local:3100
|
|
editable: true
|
|
jsonData:
|
|
maxLines: 1000
|
|
derivedFields:
|
|
- datasourceUid: tempo
|
|
matcherRegex: "traceID=(\\w+)"
|
|
name: TraceID
|
|
url: "$${__value.raw}"
|
|
|
|
# Tempo
|
|
- name: Tempo
|
|
type: tempo
|
|
access: proxy
|
|
url: http://tempo.observability.svc.cluster.local:3200
|
|
editable: true
|
|
uid: tempo
|
|
jsonData:
|
|
tracesToLogsV2:
|
|
datasourceUid: loki
|
|
spanStartTimeShift: -1h
|
|
spanEndTimeShift: 1h
|
|
filterByTraceID: true
|
|
filterBySpanID: false
|
|
customQuery: false
|
|
tracesToMetrics:
|
|
datasourceUid: prometheus
|
|
spanStartTimeShift: -1h
|
|
spanEndTimeShift: 1h
|
|
serviceMap:
|
|
datasourceUid: prometheus
|
|
nodeGraph:
|
|
enabled: true
|
|
search:
|
|
hide: false
|
|
lokiSearch:
|
|
datasourceUid: loki
|
|
|
|
=== ./k8s/observability-stack/08-rbac.yaml ===
|
|
---
|
|
# Prometheus ServiceAccount
|
|
apiVersion: v1
|
|
kind: ServiceAccount
|
|
metadata:
|
|
name: prometheus
|
|
namespace: observability
|
|
|
|
---
|
|
# Prometheus ClusterRole
|
|
apiVersion: rbac.authorization.k8s.io/v1
|
|
kind: ClusterRole
|
|
metadata:
|
|
name: prometheus
|
|
rules:
|
|
- apiGroups: [""]
|
|
resources:
|
|
- nodes
|
|
- nodes/proxy
|
|
- services
|
|
- endpoints
|
|
- pods
|
|
verbs: ["get", "list", "watch"]
|
|
- apiGroups:
|
|
- extensions
|
|
resources:
|
|
- ingresses
|
|
verbs: ["get", "list", "watch"]
|
|
- nonResourceURLs: ["/metrics"]
|
|
verbs: ["get"]
|
|
|
|
---
|
|
# Prometheus ClusterRoleBinding
|
|
apiVersion: rbac.authorization.k8s.io/v1
|
|
kind: ClusterRoleBinding
|
|
metadata:
|
|
name: prometheus
|
|
roleRef:
|
|
apiGroup: rbac.authorization.k8s.io
|
|
kind: ClusterRole
|
|
name: prometheus
|
|
subjects:
|
|
- kind: ServiceAccount
|
|
name: prometheus
|
|
namespace: observability
|
|
|
|
---
|
|
# Alloy ServiceAccount
|
|
apiVersion: v1
|
|
kind: ServiceAccount
|
|
metadata:
|
|
name: alloy
|
|
namespace: observability
|
|
|
|
---
|
|
# Alloy ClusterRole
|
|
apiVersion: rbac.authorization.k8s.io/v1
|
|
kind: ClusterRole
|
|
metadata:
|
|
name: alloy
|
|
rules:
|
|
- apiGroups: [""]
|
|
resources:
|
|
- nodes
|
|
- nodes/proxy
|
|
- services
|
|
- endpoints
|
|
- pods
|
|
- pods/log
|
|
verbs: ["get", "list", "watch"]
|
|
- apiGroups:
|
|
- extensions
|
|
resources:
|
|
- ingresses
|
|
verbs: ["get", "list", "watch"]
|
|
|
|
---
|
|
# Alloy ClusterRoleBinding
|
|
apiVersion: rbac.authorization.k8s.io/v1
|
|
kind: ClusterRoleBinding
|
|
metadata:
|
|
name: alloy
|
|
roleRef:
|
|
apiGroup: rbac.authorization.k8s.io
|
|
kind: ClusterRole
|
|
name: alloy
|
|
subjects:
|
|
- kind: ServiceAccount
|
|
name: alloy
|
|
namespace: observability
|
|
|
|
---
|
|
# kube-state-metrics ServiceAccount
|
|
apiVersion: v1
|
|
kind: ServiceAccount
|
|
metadata:
|
|
name: kube-state-metrics
|
|
namespace: observability
|
|
|
|
---
|
|
# kube-state-metrics ClusterRole
|
|
apiVersion: rbac.authorization.k8s.io/v1
|
|
kind: ClusterRole
|
|
metadata:
|
|
name: kube-state-metrics
|
|
rules:
|
|
- apiGroups: [""]
|
|
resources:
|
|
- configmaps
|
|
- secrets
|
|
- nodes
|
|
- pods
|
|
- services
|
|
- resourcequotas
|
|
- replicationcontrollers
|
|
- limitranges
|
|
- persistentvolumeclaims
|
|
- persistentvolumes
|
|
- namespaces
|
|
- endpoints
|
|
verbs: ["list", "watch"]
|
|
- apiGroups: ["apps"]
|
|
resources:
|
|
- statefulsets
|
|
- daemonsets
|
|
- deployments
|
|
- replicasets
|
|
verbs: ["list", "watch"]
|
|
- apiGroups: ["batch"]
|
|
resources:
|
|
- cronjobs
|
|
- jobs
|
|
verbs: ["list", "watch"]
|
|
- apiGroups: ["autoscaling"]
|
|
resources:
|
|
- horizontalpodautoscalers
|
|
verbs: ["list", "watch"]
|
|
- apiGroups: ["policy"]
|
|
resources:
|
|
- poddisruptionbudgets
|
|
verbs: ["list", "watch"]
|
|
- apiGroups: ["certificates.k8s.io"]
|
|
resources:
|
|
- certificatesigningrequests
|
|
verbs: ["list", "watch"]
|
|
- apiGroups: ["storage.k8s.io"]
|
|
resources:
|
|
- storageclasses
|
|
- volumeattachments
|
|
verbs: ["list", "watch"]
|
|
- apiGroups: ["admissionregistration.k8s.io"]
|
|
resources:
|
|
- mutatingwebhookconfigurations
|
|
- validatingwebhookconfigurations
|
|
verbs: ["list", "watch"]
|
|
- apiGroups: ["networking.k8s.io"]
|
|
resources:
|
|
- networkpolicies
|
|
- ingresses
|
|
verbs: ["list", "watch"]
|
|
- apiGroups: ["coordination.k8s.io"]
|
|
resources:
|
|
- leases
|
|
verbs: ["list", "watch"]
|
|
|
|
---
|
|
# kube-state-metrics ClusterRoleBinding
|
|
apiVersion: rbac.authorization.k8s.io/v1
|
|
kind: ClusterRoleBinding
|
|
metadata:
|
|
name: kube-state-metrics
|
|
roleRef:
|
|
apiGroup: rbac.authorization.k8s.io
|
|
kind: ClusterRole
|
|
name: kube-state-metrics
|
|
subjects:
|
|
- kind: ServiceAccount
|
|
name: kube-state-metrics
|
|
namespace: observability
|
|
=== ./k8s/observability-stack/10-prometheus.yaml ===
|
|
apiVersion: apps/v1
|
|
kind: StatefulSet
|
|
metadata:
|
|
name: prometheus
|
|
namespace: observability
|
|
labels:
|
|
app: prometheus
|
|
spec:
|
|
serviceName: prometheus
|
|
replicas: 1
|
|
selector:
|
|
matchLabels:
|
|
app: prometheus
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: prometheus
|
|
annotations:
|
|
prometheus.io/scrape: "true"
|
|
prometheus.io/port: "9090"
|
|
spec:
|
|
serviceAccountName: prometheus
|
|
nodeSelector:
|
|
kubernetes.io/hostname: hetzner-2
|
|
containers:
|
|
- name: prometheus
|
|
image: prom/prometheus:v2.54.1
|
|
args:
|
|
- '--config.file=/etc/prometheus/prometheus.yml'
|
|
- '--storage.tsdb.path=/prometheus'
|
|
- '--storage.tsdb.retention.time=7d'
|
|
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
|
|
- '--web.console.templates=/usr/share/prometheus/consoles'
|
|
- '--web.enable-lifecycle'
|
|
- '--web.enable-admin-api'
|
|
ports:
|
|
- name: http
|
|
containerPort: 9090
|
|
protocol: TCP
|
|
livenessProbe:
|
|
httpGet:
|
|
path: /-/healthy
|
|
port: http
|
|
initialDelaySeconds: 30
|
|
periodSeconds: 10
|
|
timeoutSeconds: 5
|
|
readinessProbe:
|
|
httpGet:
|
|
path: /-/ready
|
|
port: http
|
|
initialDelaySeconds: 30
|
|
periodSeconds: 10
|
|
timeoutSeconds: 5
|
|
resources:
|
|
requests:
|
|
cpu: 500m
|
|
memory: 2Gi
|
|
limits:
|
|
cpu: 2000m
|
|
memory: 4Gi
|
|
volumeMounts:
|
|
- name: prometheus-config
|
|
mountPath: /etc/prometheus
|
|
- name: prometheus-data
|
|
mountPath: /prometheus
|
|
volumes:
|
|
- name: prometheus-config
|
|
configMap:
|
|
name: prometheus-config
|
|
- name: prometheus-data
|
|
persistentVolumeClaim:
|
|
claimName: prometheus-data
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: prometheus
|
|
namespace: observability
|
|
labels:
|
|
app: prometheus
|
|
spec:
|
|
type: ClusterIP
|
|
ports:
|
|
- port: 9090
|
|
targetPort: http
|
|
protocol: TCP
|
|
name: http
|
|
selector:
|
|
app: prometheus
|
|
|
|
=== ./k8s/observability-stack/11-loki.yaml ===
|
|
apiVersion: apps/v1
|
|
kind: StatefulSet
|
|
metadata:
|
|
name: loki
|
|
namespace: observability
|
|
labels:
|
|
app: loki
|
|
spec:
|
|
serviceName: loki
|
|
replicas: 1
|
|
selector:
|
|
matchLabels:
|
|
app: loki
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: loki
|
|
annotations:
|
|
prometheus.io/scrape: "true"
|
|
prometheus.io/port: "3100"
|
|
spec:
|
|
nodeSelector:
|
|
kubernetes.io/hostname: hetzner-2
|
|
securityContext:
|
|
fsGroup: 10001
|
|
runAsGroup: 10001
|
|
runAsNonRoot: true
|
|
runAsUser: 10001
|
|
containers:
|
|
- name: loki
|
|
image: grafana/loki:3.2.1
|
|
args:
|
|
- '-config.file=/etc/loki/loki.yaml'
|
|
- '-target=all'
|
|
ports:
|
|
- name: http
|
|
containerPort: 3100
|
|
protocol: TCP
|
|
- name: grpc
|
|
containerPort: 9096
|
|
protocol: TCP
|
|
livenessProbe:
|
|
httpGet:
|
|
path: /ready
|
|
port: http
|
|
initialDelaySeconds: 45
|
|
periodSeconds: 10
|
|
timeoutSeconds: 5
|
|
readinessProbe:
|
|
httpGet:
|
|
path: /ready
|
|
port: http
|
|
initialDelaySeconds: 45
|
|
periodSeconds: 10
|
|
timeoutSeconds: 5
|
|
resources:
|
|
requests:
|
|
cpu: 500m
|
|
memory: 1Gi
|
|
limits:
|
|
cpu: 2000m
|
|
memory: 2Gi
|
|
volumeMounts:
|
|
- name: loki-config
|
|
mountPath: /etc/loki
|
|
- name: loki-data
|
|
mountPath: /loki
|
|
volumes:
|
|
- name: loki-config
|
|
configMap:
|
|
name: loki-config
|
|
- name: loki-data
|
|
persistentVolumeClaim:
|
|
claimName: loki-data
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: loki
|
|
namespace: observability
|
|
labels:
|
|
app: loki
|
|
spec:
|
|
type: ClusterIP
|
|
ports:
|
|
- port: 3100
|
|
targetPort: http
|
|
protocol: TCP
|
|
name: http
|
|
- port: 9096
|
|
targetPort: grpc
|
|
protocol: TCP
|
|
name: grpc
|
|
selector:
|
|
app: loki
|
|
|
|
=== ./k8s/observability-stack/12-tempo.yaml ===
|
|
apiVersion: apps/v1
|
|
kind: StatefulSet
|
|
metadata:
|
|
name: tempo
|
|
namespace: observability
|
|
labels:
|
|
app: tempo
|
|
spec:
|
|
serviceName: tempo
|
|
replicas: 1
|
|
selector:
|
|
matchLabels:
|
|
app: tempo
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: tempo
|
|
annotations:
|
|
prometheus.io/scrape: "true"
|
|
prometheus.io/port: "3200"
|
|
spec:
|
|
nodeSelector:
|
|
kubernetes.io/hostname: hetzner-2
|
|
securityContext:
|
|
fsGroup: 10001
|
|
runAsGroup: 10001
|
|
runAsNonRoot: true
|
|
runAsUser: 10001
|
|
containers:
|
|
- name: tempo
|
|
image: grafana/tempo:2.6.1
|
|
args:
|
|
- '-config.file=/etc/tempo/tempo.yaml'
|
|
ports:
|
|
- name: http
|
|
containerPort: 3200
|
|
protocol: TCP
|
|
- name: otlp-grpc
|
|
containerPort: 4317
|
|
protocol: TCP
|
|
- name: otlp-http
|
|
containerPort: 4318
|
|
protocol: TCP
|
|
- name: jaeger-grpc
|
|
containerPort: 14250
|
|
protocol: TCP
|
|
- name: jaeger-http
|
|
containerPort: 14268
|
|
protocol: TCP
|
|
- name: zipkin
|
|
containerPort: 9411
|
|
protocol: TCP
|
|
livenessProbe:
|
|
httpGet:
|
|
path: /ready
|
|
port: http
|
|
initialDelaySeconds: 30
|
|
periodSeconds: 10
|
|
timeoutSeconds: 5
|
|
readinessProbe:
|
|
httpGet:
|
|
path: /ready
|
|
port: http
|
|
initialDelaySeconds: 30
|
|
periodSeconds: 10
|
|
timeoutSeconds: 5
|
|
resources:
|
|
requests:
|
|
cpu: 500m
|
|
memory: 1Gi
|
|
limits:
|
|
cpu: 2000m
|
|
memory: 2Gi
|
|
volumeMounts:
|
|
- name: tempo-config
|
|
mountPath: /etc/tempo
|
|
- name: tempo-data
|
|
mountPath: /var/tempo
|
|
volumes:
|
|
- name: tempo-config
|
|
configMap:
|
|
name: tempo-config
|
|
- name: tempo-data
|
|
persistentVolumeClaim:
|
|
claimName: tempo-data
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: tempo
|
|
namespace: observability
|
|
labels:
|
|
app: tempo
|
|
spec:
|
|
type: ClusterIP
|
|
ports:
|
|
- port: 3200
|
|
targetPort: http
|
|
protocol: TCP
|
|
name: http
|
|
- port: 4317
|
|
targetPort: otlp-grpc
|
|
protocol: TCP
|
|
name: otlp-grpc
|
|
- port: 4318
|
|
targetPort: otlp-http
|
|
protocol: TCP
|
|
name: otlp-http
|
|
- port: 14250
|
|
targetPort: jaeger-grpc
|
|
protocol: TCP
|
|
name: jaeger-grpc
|
|
- port: 14268
|
|
targetPort: jaeger-http
|
|
protocol: TCP
|
|
name: jaeger-http
|
|
- port: 9411
|
|
targetPort: zipkin
|
|
protocol: TCP
|
|
name: zipkin
|
|
selector:
|
|
app: tempo
|
|
=== ./k8s/observability-stack/13-grafana.yaml ===
|
|
apiVersion: apps/v1
|
|
kind: StatefulSet
|
|
metadata:
|
|
name: grafana
|
|
namespace: observability
|
|
labels:
|
|
app: grafana
|
|
spec:
|
|
serviceName: grafana
|
|
replicas: 1
|
|
selector:
|
|
matchLabels:
|
|
app: grafana
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: grafana
|
|
spec:
|
|
nodeSelector:
|
|
kubernetes.io/hostname: hetzner-2
|
|
securityContext:
|
|
fsGroup: 472
|
|
runAsGroup: 472
|
|
runAsUser: 472
|
|
containers:
|
|
- name: grafana
|
|
image: grafana/grafana:11.4.0
|
|
ports:
|
|
- name: http
|
|
containerPort: 3000
|
|
protocol: TCP
|
|
env:
|
|
- name: GF_SECURITY_ADMIN_USER
|
|
value: admin
|
|
- name: GF_SECURITY_ADMIN_PASSWORD
|
|
value: admin # Change this in production!
|
|
- name: GF_INSTALL_PLUGINS
|
|
value: ""
|
|
- name: GF_FEATURE_TOGGLES_ENABLE
|
|
value: "traceqlEditor,correlations"
|
|
- name: GF_AUTH_ANONYMOUS_ENABLED
|
|
value: "false"
|
|
- name: GF_ANALYTICS_REPORTING_ENABLED
|
|
value: "false"
|
|
- name: GF_ANALYTICS_CHECK_FOR_UPDATES
|
|
value: "false"
|
|
livenessProbe:
|
|
httpGet:
|
|
path: /api/health
|
|
port: http
|
|
initialDelaySeconds: 60
|
|
periodSeconds: 10
|
|
timeoutSeconds: 5
|
|
readinessProbe:
|
|
httpGet:
|
|
path: /api/health
|
|
port: http
|
|
initialDelaySeconds: 60
|
|
periodSeconds: 10
|
|
timeoutSeconds: 5
|
|
resources:
|
|
requests:
|
|
cpu: 250m
|
|
memory: 512Mi
|
|
limits:
|
|
cpu: 1000m
|
|
memory: 1Gi
|
|
volumeMounts:
|
|
- name: grafana-data
|
|
mountPath: /var/lib/grafana
|
|
- name: grafana-datasources
|
|
mountPath: /etc/grafana/provisioning/datasources
|
|
volumes:
|
|
- name: grafana-data
|
|
persistentVolumeClaim:
|
|
claimName: grafana-data
|
|
- name: grafana-datasources
|
|
configMap:
|
|
name: grafana-datasources
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: grafana
|
|
namespace: observability
|
|
labels:
|
|
app: grafana
|
|
spec:
|
|
type: ClusterIP
|
|
ports:
|
|
- port: 3000
|
|
targetPort: http
|
|
protocol: TCP
|
|
name: http
|
|
selector:
|
|
app: grafana
|
|
|
|
=== ./k8s/observability-stack/14-alloy.yaml ===
|
|
apiVersion: apps/v1
|
|
kind: DaemonSet
|
|
metadata:
|
|
name: alloy
|
|
namespace: observability
|
|
labels:
|
|
app: alloy
|
|
spec:
|
|
selector:
|
|
matchLabels:
|
|
app: alloy
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: alloy
|
|
spec:
|
|
serviceAccountName: alloy
|
|
hostNetwork: true
|
|
hostPID: true
|
|
dnsPolicy: ClusterFirstWithHostNet
|
|
containers:
|
|
- name: alloy
|
|
image: grafana/alloy:v1.5.1
|
|
args:
|
|
- run
|
|
- /etc/alloy/config.alloy
|
|
- --storage.path=/var/lib/alloy
|
|
- --server.http.listen-addr=0.0.0.0:12345
|
|
ports:
|
|
- name: http-metrics
|
|
containerPort: 12345
|
|
protocol: TCP
|
|
- name: otlp-grpc
|
|
containerPort: 4317
|
|
protocol: TCP
|
|
- name: otlp-http
|
|
containerPort: 4318
|
|
protocol: TCP
|
|
env:
|
|
- name: HOSTNAME
|
|
valueFrom:
|
|
fieldRef:
|
|
fieldPath: spec.nodeName
|
|
securityContext:
|
|
privileged: true
|
|
runAsUser: 0
|
|
resources:
|
|
requests:
|
|
cpu: 100m
|
|
memory: 256Mi
|
|
limits:
|
|
cpu: 500m
|
|
memory: 512Mi
|
|
volumeMounts:
|
|
- name: config
|
|
mountPath: /etc/alloy
|
|
- name: varlog
|
|
mountPath: /var/log
|
|
readOnly: true
|
|
- name: varlibdockercontainers
|
|
mountPath: /var/lib/docker/containers
|
|
readOnly: true
|
|
- name: etcmachineid
|
|
mountPath: /etc/machine-id
|
|
readOnly: true
|
|
tolerations:
|
|
- effect: NoSchedule
|
|
operator: Exists
|
|
volumes:
|
|
- name: config
|
|
configMap:
|
|
name: alloy-config
|
|
- name: varlog
|
|
hostPath:
|
|
path: /var/log
|
|
- name: varlibdockercontainers
|
|
hostPath:
|
|
path: /var/lib/docker/containers
|
|
- name: etcmachineid
|
|
hostPath:
|
|
path: /etc/machine-id
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: alloy
|
|
namespace: observability
|
|
labels:
|
|
app: alloy
|
|
spec:
|
|
type: ClusterIP
|
|
ports:
|
|
- port: 12345
|
|
targetPort: http-metrics
|
|
protocol: TCP
|
|
name: http-metrics
|
|
- port: 4317
|
|
targetPort: otlp-grpc
|
|
protocol: TCP
|
|
name: otlp-grpc
|
|
- port: 4318
|
|
targetPort: otlp-http
|
|
protocol: TCP
|
|
name: otlp-http
|
|
selector:
|
|
app: alloy
|
|
|
|
=== ./k8s/observability-stack/15-kube-state-metrics.yaml ===
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
name: kube-state-metrics
|
|
namespace: observability
|
|
labels:
|
|
app: kube-state-metrics
|
|
spec:
|
|
replicas: 1
|
|
selector:
|
|
matchLabels:
|
|
app: kube-state-metrics
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: kube-state-metrics
|
|
annotations:
|
|
prometheus.io/scrape: "true"
|
|
prometheus.io/port: "8080"
|
|
spec:
|
|
serviceAccountName: kube-state-metrics
|
|
containers:
|
|
- name: kube-state-metrics
|
|
image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.13.0
|
|
ports:
|
|
- name: http-metrics
|
|
containerPort: 8080
|
|
- name: telemetry
|
|
containerPort: 8081
|
|
livenessProbe:
|
|
httpGet:
|
|
path: /healthz
|
|
port: 8080
|
|
initialDelaySeconds: 5
|
|
timeoutSeconds: 5
|
|
readinessProbe:
|
|
httpGet:
|
|
path: /
|
|
port: 8080
|
|
initialDelaySeconds: 5
|
|
timeoutSeconds: 5
|
|
resources:
|
|
requests:
|
|
cpu: 100m
|
|
memory: 128Mi
|
|
limits:
|
|
cpu: 200m
|
|
memory: 256Mi
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: kube-state-metrics
|
|
namespace: observability
|
|
labels:
|
|
app: kube-state-metrics
|
|
annotations:
|
|
prometheus.io/scrape: "true"
|
|
prometheus.io/port: "8080"
|
|
spec:
|
|
type: ClusterIP
|
|
ports:
|
|
- name: http-metrics
|
|
port: 8080
|
|
targetPort: http-metrics
|
|
- name: telemetry
|
|
port: 8081
|
|
targetPort: telemetry
|
|
selector:
|
|
app: kube-state-metrics
|
|
|
|
=== ./k8s/observability-stack/16-node-exporter.yaml ===
|
|
apiVersion: apps/v1
|
|
kind: DaemonSet
|
|
metadata:
|
|
name: node-exporter
|
|
namespace: observability
|
|
labels:
|
|
app: node-exporter
|
|
spec:
|
|
selector:
|
|
matchLabels:
|
|
app: node-exporter
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: node-exporter
|
|
annotations:
|
|
prometheus.io/scrape: "true"
|
|
prometheus.io/port: "9100"
|
|
spec:
|
|
hostNetwork: true
|
|
hostPID: true
|
|
containers:
|
|
- name: node-exporter
|
|
image: prom/node-exporter:v1.8.2
|
|
args:
|
|
- --path.procfs=/host/proc
|
|
- --path.sysfs=/host/sys
|
|
- --path.rootfs=/host/root
|
|
- --collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)
|
|
ports:
|
|
- name: metrics
|
|
containerPort: 9100
|
|
protocol: TCP
|
|
resources:
|
|
requests:
|
|
cpu: 100m
|
|
memory: 128Mi
|
|
limits:
|
|
cpu: 200m
|
|
memory: 256Mi
|
|
volumeMounts:
|
|
- name: proc
|
|
mountPath: /host/proc
|
|
readOnly: true
|
|
- name: sys
|
|
mountPath: /host/sys
|
|
readOnly: true
|
|
- name: root
|
|
mountPath: /host/root
|
|
mountPropagation: HostToContainer
|
|
readOnly: true
|
|
tolerations:
|
|
- effect: NoSchedule
|
|
operator: Exists
|
|
volumes:
|
|
- name: proc
|
|
hostPath:
|
|
path: /proc
|
|
- name: sys
|
|
hostPath:
|
|
path: /sys
|
|
- name: root
|
|
hostPath:
|
|
path: /
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: node-exporter
|
|
namespace: observability
|
|
labels:
|
|
app: node-exporter
|
|
annotations:
|
|
prometheus.io/scrape: "true"
|
|
prometheus.io/port: "9100"
|
|
spec:
|
|
type: ClusterIP
|
|
clusterIP: None
|
|
ports:
|
|
- name: metrics
|
|
port: 9100
|
|
targetPort: metrics
|
|
selector:
|
|
app: node-exporter
|
|
|
|
=== ./k8s/observability-stack/20-grafana-ingress.yaml ===
|
|
apiVersion: networking.k8s.io/v1
|
|
kind: Ingress
|
|
metadata:
|
|
name: grafana-ingress
|
|
namespace: observability
|
|
annotations:
|
|
cert-manager.io/cluster-issuer: "letsencrypt-prod"
|
|
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
|
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
|
|
spec:
|
|
ingressClassName: nginx
|
|
tls:
|
|
- hosts:
|
|
- grafana.betelgeusebytes.io
|
|
secretName: grafana-tls
|
|
rules:
|
|
- host: grafana.betelgeusebytes.io
|
|
http:
|
|
paths:
|
|
- path: /
|
|
pathType: Prefix
|
|
backend:
|
|
service:
|
|
name: grafana
|
|
port:
|
|
number: 3000
|
|
|
|
=== ./k8s/observability-stack/21-optional-ingresses.yaml ===
|
|
---
|
|
# Optional: Prometheus Ingress (for direct access to Prometheus UI)
|
|
apiVersion: networking.k8s.io/v1
|
|
kind: Ingress
|
|
metadata:
|
|
name: prometheus-ingress
|
|
namespace: observability
|
|
annotations:
|
|
cert-manager.io/cluster-issuer: "letsencrypt-prod"
|
|
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
|
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
|
|
# Optional: Add basic auth for security
|
|
# nginx.ingress.kubernetes.io/auth-type: basic
|
|
# nginx.ingress.kubernetes.io/auth-secret: prometheus-basic-auth
|
|
# nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required'
|
|
spec:
|
|
ingressClassName: nginx
|
|
tls:
|
|
- hosts:
|
|
- prometheus.betelgeusebytes.io
|
|
secretName: prometheus-tls
|
|
rules:
|
|
- host: prometheus.betelgeusebytes.io
|
|
http:
|
|
paths:
|
|
- path: /
|
|
pathType: Prefix
|
|
backend:
|
|
service:
|
|
name: prometheus
|
|
port:
|
|
number: 9090
|
|
|
|
---
|
|
# Optional: Loki Ingress (for direct API access)
|
|
apiVersion: networking.k8s.io/v1
|
|
kind: Ingress
|
|
metadata:
|
|
name: loki-ingress
|
|
namespace: observability
|
|
annotations:
|
|
cert-manager.io/cluster-issuer: "letsencrypt-prod"
|
|
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
|
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
|
|
spec:
|
|
ingressClassName: nginx
|
|
tls:
|
|
- hosts:
|
|
- loki.betelgeusebytes.io
|
|
secretName: loki-tls
|
|
rules:
|
|
- host: loki.betelgeusebytes.io
|
|
http:
|
|
paths:
|
|
- path: /
|
|
pathType: Prefix
|
|
backend:
|
|
service:
|
|
name: loki
|
|
port:
|
|
number: 3100
|
|
|
|
---
|
|
# Optional: Tempo Ingress (for direct API access)
|
|
apiVersion: networking.k8s.io/v1
|
|
kind: Ingress
|
|
metadata:
|
|
name: tempo-ingress
|
|
namespace: observability
|
|
annotations:
|
|
cert-manager.io/cluster-issuer: "letsencrypt-prod"
|
|
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
|
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
|
|
spec:
|
|
ingressClassName: nginx
|
|
tls:
|
|
- hosts:
|
|
- tempo.betelgeusebytes.io
|
|
secretName: tempo-tls
|
|
rules:
|
|
- host: tempo.betelgeusebytes.io
|
|
http:
|
|
paths:
|
|
- path: /
|
|
pathType: Prefix
|
|
backend:
|
|
service:
|
|
name: tempo
|
|
port:
|
|
number: 3200
|
|
|
|
=== ./k8s/observability-stack/demo-app.yaml ===
|
|
---
|
|
# Example instrumented application to test the observability stack
|
|
# This is a simple Python Flask app with OpenTelemetry instrumentation
|
|
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: demo-app
|
|
namespace: observability
|
|
data:
|
|
app.py: |
|
|
from flask import Flask, jsonify
|
|
import logging
|
|
import json
|
|
import time
|
|
import random
|
|
|
|
# OpenTelemetry imports
|
|
from opentelemetry import trace, metrics
|
|
from opentelemetry.sdk.trace import TracerProvider
|
|
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
|
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
|
|
from opentelemetry.sdk.metrics import MeterProvider
|
|
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
|
|
from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
|
|
from opentelemetry.instrumentation.flask import FlaskInstrumentor
|
|
from opentelemetry.sdk.resources import Resource
|
|
from prometheus_flask_exporter import PrometheusMetrics
|
|
|
|
# Configure structured logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(message)s'
|
|
)
|
|
|
|
class JSONFormatter(logging.Formatter):
|
|
def format(self, record):
|
|
log_obj = {
|
|
'timestamp': self.formatTime(record, self.datefmt),
|
|
'level': record.levelname,
|
|
'message': record.getMessage(),
|
|
'logger': record.name,
|
|
}
|
|
if hasattr(record, 'trace_id'):
|
|
log_obj['trace_id'] = record.trace_id
|
|
log_obj['span_id'] = record.span_id
|
|
return json.dumps(log_obj)
|
|
|
|
handler = logging.StreamHandler()
|
|
handler.setFormatter(JSONFormatter())
|
|
logger = logging.getLogger(__name__)
|
|
logger.addHandler(handler)
|
|
logger.setLevel(logging.INFO)
|
|
|
|
# Configure OpenTelemetry
|
|
resource = Resource.create({"service.name": "demo-app"})
|
|
|
|
# Tracing
|
|
trace_provider = TracerProvider(resource=resource)
|
|
trace_provider.add_span_processor(
|
|
BatchSpanProcessor(
|
|
OTLPSpanExporter(
|
|
endpoint="http://tempo.observability.svc.cluster.local:4317",
|
|
insecure=True
|
|
)
|
|
)
|
|
)
|
|
trace.set_tracer_provider(trace_provider)
|
|
tracer = trace.get_tracer(__name__)
|
|
|
|
# Create Flask app
|
|
app = Flask(__name__)
|
|
|
|
# Prometheus metrics
|
|
metrics = PrometheusMetrics(app)
|
|
|
|
# Auto-instrument Flask
|
|
FlaskInstrumentor().instrument_app(app)
|
|
|
|
# Sample data
|
|
ITEMS = ["apple", "banana", "orange", "grape", "mango"]
|
|
|
|
@app.route('/')
|
|
def index():
|
|
span = trace.get_current_span()
|
|
trace_id = format(span.get_span_context().trace_id, '032x')
|
|
|
|
logger.info("Index page accessed", extra={
|
|
'trace_id': trace_id,
|
|
'endpoint': '/'
|
|
})
|
|
|
|
return jsonify({
|
|
'service': 'demo-app',
|
|
'status': 'healthy',
|
|
'trace_id': trace_id
|
|
})
|
|
|
|
@app.route('/items')
|
|
def get_items():
|
|
with tracer.start_as_current_span("fetch_items") as span:
|
|
# Simulate database query
|
|
time.sleep(random.uniform(0.01, 0.1))
|
|
|
|
span.set_attribute("items.count", len(ITEMS))
|
|
trace_id = format(span.get_span_context().trace_id, '032x')
|
|
|
|
logger.info("Items fetched", extra={
|
|
'trace_id': trace_id,
|
|
'count': len(ITEMS)
|
|
})
|
|
|
|
return jsonify({
|
|
'items': ITEMS,
|
|
'count': len(ITEMS),
|
|
'trace_id': trace_id
|
|
})
|
|
|
|
@app.route('/item/<int:item_id>')
|
|
def get_item(item_id):
|
|
with tracer.start_as_current_span("fetch_item") as span:
|
|
span.set_attribute("item.id", item_id)
|
|
trace_id = format(span.get_span_context().trace_id, '032x')
|
|
|
|
# Simulate processing
|
|
time.sleep(random.uniform(0.01, 0.05))
|
|
|
|
if item_id < 0 or item_id >= len(ITEMS):
|
|
logger.warning("Item not found", extra={
|
|
'trace_id': trace_id,
|
|
'item_id': item_id
|
|
})
|
|
return jsonify({'error': 'Item not found', 'trace_id': trace_id}), 404
|
|
|
|
item = ITEMS[item_id]
|
|
logger.info("Item fetched", extra={
|
|
'trace_id': trace_id,
|
|
'item_id': item_id,
|
|
'item': item
|
|
})
|
|
|
|
return jsonify({
|
|
'id': item_id,
|
|
'name': item,
|
|
'trace_id': trace_id
|
|
})
|
|
|
|
@app.route('/slow')
|
|
def slow_endpoint():
|
|
with tracer.start_as_current_span("slow_operation") as span:
|
|
trace_id = format(span.get_span_context().trace_id, '032x')
|
|
|
|
logger.info("Slow operation started", extra={'trace_id': trace_id})
|
|
|
|
# Simulate slow operation
|
|
time.sleep(random.uniform(1, 3))
|
|
|
|
logger.info("Slow operation completed", extra={'trace_id': trace_id})
|
|
|
|
return jsonify({
|
|
'message': 'Operation completed',
|
|
'trace_id': trace_id
|
|
})
|
|
|
|
@app.route('/error')
|
|
def error_endpoint():
|
|
with tracer.start_as_current_span("error_operation") as span:
|
|
trace_id = format(span.get_span_context().trace_id, '032x')
|
|
|
|
logger.error("Intentional error triggered", extra={'trace_id': trace_id})
|
|
span.set_attribute("error", True)
|
|
|
|
return jsonify({
|
|
'error': 'This is an intentional error',
|
|
'trace_id': trace_id
|
|
}), 500
|
|
|
|
if __name__ == '__main__':
|
|
app.run(host='0.0.0.0', port=8080)
|
|
|
|
---
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
name: demo-app
|
|
namespace: observability
|
|
labels:
|
|
app: demo-app
|
|
spec:
|
|
replicas: 1
|
|
selector:
|
|
matchLabels:
|
|
app: demo-app
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: demo-app
|
|
annotations:
|
|
prometheus.io/scrape: "true"
|
|
prometheus.io/port: "8080"
|
|
prometheus.io/path: "/metrics"
|
|
spec:
|
|
containers:
|
|
- name: demo-app
|
|
image: python:3.11-slim
|
|
command:
|
|
- /bin/bash
|
|
- -c
|
|
- |
|
|
pip install flask opentelemetry-api opentelemetry-sdk \
|
|
opentelemetry-instrumentation-flask \
|
|
opentelemetry-exporter-otlp-proto-grpc \
|
|
prometheus-flask-exporter && \
|
|
python /app/app.py
|
|
ports:
|
|
- name: http
|
|
containerPort: 8080
|
|
volumeMounts:
|
|
- name: app-code
|
|
mountPath: /app
|
|
resources:
|
|
requests:
|
|
cpu: 100m
|
|
memory: 256Mi
|
|
limits:
|
|
cpu: 500m
|
|
memory: 512Mi
|
|
volumes:
|
|
- name: app-code
|
|
configMap:
|
|
name: demo-app
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: demo-app
|
|
namespace: observability
|
|
labels:
|
|
app: demo-app
|
|
annotations:
|
|
prometheus.io/scrape: "true"
|
|
prometheus.io/port: "8080"
|
|
prometheus.io/path: "/metrics"
|
|
spec:
|
|
type: ClusterIP
|
|
ports:
|
|
- port: 8080
|
|
targetPort: http
|
|
protocol: TCP
|
|
name: http
|
|
selector:
|
|
app: demo-app
|
|
|
|
=== ./k8s/otlp/otel-collector.yaml ===
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata: { name: otel-collector, namespace: observability }
|
|
spec:
|
|
selector: { app: otel-collector }
|
|
ports:
|
|
- { name: otlp-http, port: 4318, targetPort: 4318 }
|
|
- { name: otlp-grpc, port: 4317, targetPort: 4317 }
|
|
---
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata: { name: otel-collector, namespace: observability }
|
|
spec:
|
|
replicas: 2
|
|
selector: { matchLabels: { app: otel-collector } }
|
|
template:
|
|
metadata: { labels: { app: otel-collector } }
|
|
spec:
|
|
nodeSelector: { node: hetzner-2 }
|
|
containers:
|
|
- name: otel-collector
|
|
image: otel/opentelemetry-collector-contrib:0.102.0
|
|
args: ["--config=/etc/otel/config.yaml"]
|
|
ports:
|
|
- { containerPort: 4318 }
|
|
- { containerPort: 4317 }
|
|
volumeMounts:
|
|
- { name: cfg, mountPath: /etc/otel }
|
|
volumes:
|
|
- { name: cfg, configMap: { name: otel-config } }
|
|
---
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata: { name: otel-config, namespace: observability }
|
|
data:
|
|
config.yaml: |
|
|
receivers:
|
|
otlp:
|
|
protocols: { http: {}, grpc: {} }
|
|
processors: { batch: {} }
|
|
exporters:
|
|
logging: {}
|
|
elasticsearch:
|
|
endpoints: ["http://elasticsearch.elastic.svc.cluster.local:9200"]
|
|
logs_index: "k8s-logs"
|
|
service:
|
|
pipelines:
|
|
logs: { receivers: [otlp], processors: [batch], exporters: [elasticsearch, logging] }
|
|
traces: { receivers: [otlp], processors: [batch], exporters: [logging] }
|
|
metrics: { receivers: [otlp], processors: [batch], exporters: [logging] }
|
|
---
|
|
apiVersion: networking.k8s.io/v1
|
|
kind: Ingress
|
|
metadata:
|
|
name: otlp
|
|
namespace: observability
|
|
annotations: { cert-manager.io/cluster-issuer: letsencrypt-prod }
|
|
spec:
|
|
ingressClassName: nginx
|
|
tls: [{ hosts: ["otlp.betelgeusebytes.io"], secretName: otlp-tls }]
|
|
rules:
|
|
- host: otlp.betelgeusebytes.io
|
|
http:
|
|
paths:
|
|
- path: /v1/traces
|
|
pathType: Prefix
|
|
backend: { service: { name: otel-collector, port: { number: 4318 } } }
|
|
- path: /v1/metrics
|
|
pathType: Prefix
|
|
backend: { service: { name: otel-collector, port: { number: 4318 } } }
|
|
- path: /v1/logs
|
|
pathType: Prefix
|
|
backend: { service: { name: otel-collector, port: { number: 4318 } } }
|
|
|
|
=== ./k8s/postgres/pg.yaml ===
|
|
# k8s/postgres/pg-init-sql-configmap.yaml
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: pg-init-sql
|
|
namespace: db
|
|
data:
|
|
00_extensions.sql: |
|
|
\connect gitea
|
|
CREATE EXTENSION IF NOT EXISTS postgis;
|
|
CREATE EXTENSION IF NOT EXISTS postgis_topology;
|
|
CREATE EXTENSION IF NOT EXISTS fuzzystrmatch;
|
|
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
|
CREATE EXTENSION IF NOT EXISTS hstore;
|
|
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
|
|
CREATE EXTENSION IF NOT EXISTS citext;
|
|
CREATE EXTENSION IF NOT EXISTS unaccent;
|
|
CREATE EXTENSION IF NOT EXISTS pgcrypto;
|
|
DO $$ BEGIN
|
|
CREATE EXTENSION IF NOT EXISTS plpython3u;
|
|
EXCEPTION WHEN undefined_file THEN
|
|
RAISE NOTICE 'plpython3u not available in this image';
|
|
END $$;
|
|
01_tune.sql: |
|
|
ALTER SYSTEM SET shared_buffers = '1GB';
|
|
ALTER SYSTEM SET work_mem = '32MB';
|
|
ALTER SYSTEM SET maintenance_work_mem = '512MB';
|
|
ALTER SYSTEM SET max_connections = 200;
|
|
SELECT pg_reload_conf();
|
|
---
|
|
# k8s/postgres/pg-conf.yaml
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: pg-conf
|
|
namespace: db
|
|
data:
|
|
pg_hba.conf: |
|
|
# Local connections
|
|
local all all trust
|
|
host all all 127.0.0.1/32 trust
|
|
host all all ::1/128 trust
|
|
# TLS-only access from ANY external IP (harden as needed)
|
|
hostssl all all 0.0.0.0/0 md5
|
|
hostssl all all ::/0 md5
|
|
---
|
|
# k8s/postgres/pg-secret.yaml
|
|
apiVersion: v1
|
|
kind: Secret
|
|
metadata:
|
|
name: pg18-secret
|
|
namespace: db
|
|
type: Opaque
|
|
stringData:
|
|
POSTGRES_PASSWORD: "pa$$word"
|
|
---
|
|
# k8s/postgres/pg-certificate.yaml
|
|
apiVersion: cert-manager.io/v1
|
|
kind: Certificate
|
|
metadata:
|
|
name: pg-tls
|
|
namespace: db
|
|
spec:
|
|
secretName: pg-tls
|
|
dnsNames:
|
|
- pg.betelgeusebytes.io
|
|
issuerRef:
|
|
kind: ClusterIssuer
|
|
name: letsencrypt-prod
|
|
---
|
|
# k8s/postgres/postgres-svc.yaml
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: postgres
|
|
namespace: db
|
|
spec:
|
|
selector:
|
|
app: postgres
|
|
ports:
|
|
- name: postgres
|
|
port: 5432
|
|
targetPort: 5432
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: postgres-hl
|
|
namespace: db
|
|
spec:
|
|
clusterIP: None
|
|
selector:
|
|
app: postgres
|
|
ports:
|
|
- name: postgres
|
|
port: 5432
|
|
targetPort: 5432
|
|
---
|
|
# k8s/postgres/postgres.yaml
|
|
apiVersion: apps/v1
|
|
kind: StatefulSet
|
|
metadata:
|
|
name: postgres
|
|
namespace: db
|
|
spec:
|
|
serviceName: postgres-hl
|
|
replicas: 1
|
|
selector:
|
|
matchLabels:
|
|
app: postgres
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: postgres
|
|
spec:
|
|
securityContext:
|
|
runAsUser: 999
|
|
runAsGroup: 999
|
|
fsGroup: 999
|
|
fsGroupChangePolicy: "Always"
|
|
initContainers:
|
|
- name: install-certs
|
|
image: busybox:1.36
|
|
command:
|
|
- sh
|
|
- -c
|
|
- |
|
|
cp /in/tls.crt /out/server.crt
|
|
cp /in/tls.key /out/server.key
|
|
chown 999:999 /out/* || true
|
|
chmod 600 /out/server.key
|
|
securityContext:
|
|
runAsUser: 0
|
|
volumeMounts:
|
|
- { name: pg-tls, mountPath: /in, readOnly: true }
|
|
- { name: pg-certs, mountPath: /out }
|
|
containers:
|
|
- name: postgres
|
|
image: axxs/postgres:18-postgis-vector
|
|
imagePullPolicy: IfNotPresent
|
|
args:
|
|
- -c
|
|
- ssl=on
|
|
- -c
|
|
- ssl_cert_file=/certs/server.crt
|
|
- -c
|
|
- ssl_key_file=/certs/server.key
|
|
- -c
|
|
- hba_file=/etc/postgresql-custom/pg_hba.conf
|
|
env:
|
|
- name: POSTGRES_USER
|
|
value: "app"
|
|
- name: POSTGRES_DB
|
|
value: "gitea"
|
|
- name: POSTGRES_PASSWORD
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: pg18-secret
|
|
key: POSTGRES_PASSWORD
|
|
- name: TZ
|
|
value: "Europe/Paris"
|
|
ports:
|
|
- name: postgres
|
|
containerPort: 5432
|
|
volumeMounts:
|
|
- { name: data, mountPath: /var/lib/postgresql } # PG18 expects parent, creates /var/lib/postgresql/18/main
|
|
- { name: init, mountPath: /docker-entrypoint-initdb.d, readOnly: true }
|
|
- { name: pg-certs, mountPath: /certs }
|
|
- { name: pg-conf, mountPath: /etc/postgresql-custom }
|
|
readinessProbe:
|
|
exec: { command: ["sh","-c","pg_isready -U \"$POSTGRES_USER\" -d \"$POSTGRES_DB\" -h 127.0.0.1"] }
|
|
initialDelaySeconds: 5
|
|
periodSeconds: 10
|
|
timeoutSeconds: 5
|
|
failureThreshold: 6
|
|
livenessProbe:
|
|
exec: { command: ["sh","-c","pg_isready -U \"$POSTGRES_USER\" -d \"$POSTGRES_DB\" -h 127.0.0.1"] }
|
|
initialDelaySeconds: 20
|
|
periodSeconds: 10
|
|
timeoutSeconds: 5
|
|
failureThreshold: 6
|
|
resources:
|
|
requests: { cpu: "250m", memory: "512Mi" }
|
|
limits: { cpu: "1", memory: "2Gi" }
|
|
volumes:
|
|
- name: init
|
|
configMap:
|
|
name: pg-init-sql
|
|
defaultMode: 0444
|
|
- name: pg-tls
|
|
secret:
|
|
secretName: pg-tls
|
|
- name: pg-certs
|
|
emptyDir: {}
|
|
- name: pg-conf
|
|
configMap:
|
|
name: pg-conf
|
|
defaultMode: 0444
|
|
volumeClaimTemplates:
|
|
- metadata:
|
|
name: data
|
|
spec:
|
|
accessModes: ["ReadWriteOnce"]
|
|
storageClassName: local-ssd-hetzner
|
|
resources:
|
|
requests:
|
|
storage: 80Gi
|
|
|
|
|
|
# kubectl -n ingress-nginx create configmap tcp-services \
|
|
# --from-literal="5432=db/postgres:5432" \
|
|
# -o yaml --dry-run=client | kubectl apply -f -
|
|
# kubectl -n ingress-nginx patch deploy ingress-nginx-controller \
|
|
# --type='json' -p='[
|
|
# {"op":"add","path":"/spec/template/spec/containers/0/args/-","value":"--tcp-services-configmap=$(POD_NAMESPACE)/tcp-services"}
|
|
# ]'
|
|
# # controller must listen on hostPort:5432 (we already patched earlier)
|
|
=== ./k8s/postgres/postgres-ha.yaml ===
|
|
---
|
|
apiVersion: v1
|
|
kind: Namespace
|
|
metadata:
|
|
name: db
|
|
---
|
|
# Password secret (replace with your own or generate one)
|
|
apiVersion: v1
|
|
kind: Secret
|
|
metadata:
|
|
name: pg18-secret
|
|
namespace: db
|
|
type: Opaque
|
|
stringData:
|
|
POSTGRES_PASSWORD: "pa$$word"
|
|
---
|
|
# Init SQL: keeps your original name and keeps enabling PostGIS + vector
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: pg-init-sql
|
|
namespace: db
|
|
data:
|
|
00_extensions.sql: |
|
|
-- enable common extensions in the default DB and template1 so future DBs inherit them
|
|
\connect gitea
|
|
CREATE EXTENSION IF NOT EXISTS postgis;
|
|
CREATE EXTENSION IF NOT EXISTS vector;
|
|
CREATE COLLATION IF NOT EXISTS arabic (provider = icu, locale = 'ar', deterministic = false);
|
|
CREATE EXTENSION IF NOT EXISTS tablefunc;
|
|
-- postpone pg_stat_statements CREATE to postStart (needs preload)
|
|
CREATE EXTENSION IF NOT EXISTS postgis_topology;
|
|
CREATE EXTENSION IF NOT EXISTS fuzzystrmatch;
|
|
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
|
CREATE EXTENSION IF NOT EXISTS hstore;
|
|
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
|
|
CREATE EXTENSION IF NOT EXISTS citext;
|
|
CREATE EXTENSION IF NOT EXISTS unaccent;
|
|
CREATE EXTENSION IF NOT EXISTS pgcrypto;
|
|
|
|
-- PL/Python (available in your image)
|
|
DO $$ BEGIN
|
|
CREATE EXTENSION IF NOT EXISTS plpython3u;
|
|
EXCEPTION WHEN undefined_file THEN
|
|
RAISE NOTICE 'plpython3u not available in this image';
|
|
END $$;
|
|
|
|
-- Also on template1 for new DBs (heavier, but intentional)
|
|
\connect template1
|
|
CREATE EXTENSION IF NOT EXISTS postgis;
|
|
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
|
CREATE EXTENSION IF NOT EXISTS hstore;
|
|
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
|
|
CREATE EXTENSION IF NOT EXISTS citext;
|
|
CREATE EXTENSION IF NOT EXISTS unaccent;
|
|
CREATE EXTENSION IF NOT EXISTS pgcrypto;
|
|
|
|
-- Arabic-friendly ICU collation, non-deterministic for case/diacritics
|
|
DO $$
|
|
BEGIN
|
|
PERFORM 1 FROM pg_collation WHERE collname='arabic';
|
|
IF NOT FOUND THEN
|
|
CREATE COLLATION arabic (provider = icu, locale = 'ar', deterministic = false);
|
|
END IF;
|
|
END$$;
|
|
|
|
01_tune.sql: |
|
|
-- Enable pg_stat_statements on next server start
|
|
DO $$
|
|
DECLARE
|
|
cur text := current_setting('shared_preload_libraries', true);
|
|
BEGIN
|
|
IF cur IS NULL OR position('pg_stat_statements' in cur) = 0 THEN
|
|
PERFORM pg_catalog.pg_reload_conf(); -- harmless even if no changes yet
|
|
EXECUTE $$ALTER SYSTEM SET shared_preload_libraries =
|
|
$$ || quote_literal(coalesce(NULLIF(cur,'' ) || ',pg_stat_statements', 'pg_stat_statements'));
|
|
END IF;
|
|
END$$;
|
|
|
|
-- Optional tuning (adjust to your limits)
|
|
ALTER SYSTEM SET shared_buffers = '1GB';
|
|
ALTER SYSTEM SET work_mem = '32MB';
|
|
ALTER SYSTEM SET maintenance_work_mem = '512MB';
|
|
ALTER SYSTEM SET max_connections = 200;
|
|
|
|
-- Reload applies some settings immediately; others need restart (OK after init completes)
|
|
SELECT pg_reload_conf();
|
|
ALTER SYSTEM SET pg_stat_statements.max = 10000;
|
|
ALTER SYSTEM SET pg_stat_statements.track = 'all';
|
|
ALTER SYSTEM SET pg_stat_statements.save = on;
|
|
pg_hba.conf: |
|
|
# Allow loopback
|
|
local all all trust
|
|
host all all 127.0.0.1/32 trust
|
|
host all all ::1/128 trust
|
|
# Allow TLS connections from your IP(s) only
|
|
hostssl all all YOUR_PUBLIC_IP/32 md5
|
|
# (Optional) Add more CIDRs or a private network range here:
|
|
# hostssl all all 10.0.0.0/8 md5
|
|
---
|
|
# Headless service required by StatefulSet for stable network IDs
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: postgres-hl
|
|
namespace: db
|
|
spec:
|
|
clusterIP: None
|
|
selector:
|
|
app: postgres
|
|
ports:
|
|
- name: postgres
|
|
port: 5432
|
|
targetPort: 5432
|
|
---
|
|
# Regular ClusterIP service for clients (keeps your original name)
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: postgres
|
|
namespace: db
|
|
spec:
|
|
selector:
|
|
app: postgres
|
|
ports:
|
|
- name: postgres
|
|
port: 5432
|
|
targetPort: 5432
|
|
---
|
|
apiVersion: apps/v1
|
|
kind: StatefulSet
|
|
metadata:
|
|
name: postgres
|
|
namespace: db
|
|
spec:
|
|
serviceName: postgres-hl
|
|
replicas: 1
|
|
selector:
|
|
matchLabels:
|
|
app: postgres
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: postgres
|
|
spec:
|
|
securityContext:
|
|
runAsUser: 999
|
|
runAsGroup: 999
|
|
fsGroup: 999
|
|
fsGroupChangePolicy: "Always"
|
|
initContainers:
|
|
# Copy cert-manager certs to a writable path with correct perms for Postgres
|
|
- name: install-certs
|
|
image: busybox:1.36
|
|
command:
|
|
- sh
|
|
- -c
|
|
- |
|
|
cp /in/tls.crt /out/server.crt
|
|
cp /in/tls.key /out/server.key
|
|
cp /in/ca.crt /out/ca.crt || true
|
|
chown 999:999 /out/* || true
|
|
chmod 600 /out/server.key
|
|
securityContext:
|
|
runAsUser: 0
|
|
volumeMounts:
|
|
- { name: pg-tls, mountPath: /in, readOnly: true }
|
|
- { name: pg-certs, mountPath: /out }
|
|
containers:
|
|
- name: postgres
|
|
image: axxs/postgres:18-postgis-vector
|
|
imagePullPolicy: IfNotPresent
|
|
args:
|
|
- -c
|
|
- ssl=on
|
|
- -c
|
|
- ssl_cert_file=/certs/server.crt
|
|
- -c
|
|
- ssl_key_file=/certs/server.key
|
|
- -c
|
|
- ssl_ca_file=/certs/ca.crt
|
|
- -c
|
|
- hba_file=/etc/postgresql-custom/pg_hba.conf
|
|
lifecycle:
|
|
postStart:
|
|
exec:
|
|
command:
|
|
- /bin/sh
|
|
- -c
|
|
- |
|
|
set -e
|
|
# Wait until server accepts connections
|
|
for i in $(seq 1 30); do
|
|
pg_isready -h 127.0.0.1 -U "$POSTGRES_USER" -d "$POSTGRES_DB" && break
|
|
sleep 1
|
|
done
|
|
psql -v ON_ERROR_STOP=1 -U "$POSTGRES_USER" -d "$POSTGRES_DB" -c "CREATE EXTENSION IF NOT EXISTS pg_stat_statements;"
|
|
env:
|
|
- name: POSTGRES_USER
|
|
value: "app"
|
|
- name: POSTGRES_DB
|
|
value: "gitea" # matches your \connect gitea
|
|
- name: POSTGRES_PASSWORD
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: pg18-secret
|
|
key: POSTGRES_PASSWORD
|
|
- name: TZ
|
|
value: "Europe/Paris"
|
|
ports:
|
|
- name: postgres
|
|
containerPort: 5432
|
|
volumeMounts:
|
|
# ✅ PG 18 requires this parent path; it will create /var/lib/postgresql/18/main
|
|
- name: data
|
|
mountPath: /var/lib/postgresql
|
|
# your init scripts ConfigMap
|
|
- name: init
|
|
mountPath: /docker-entrypoint-initdb.d
|
|
readOnly: true
|
|
- name: pg-certs
|
|
mountPath: /certs
|
|
# pg_hba.conf
|
|
- name: pg-conf
|
|
mountPath: /etc/postgresql-custom
|
|
readinessProbe:
|
|
exec:
|
|
command:
|
|
- /bin/sh
|
|
- -c
|
|
- pg_isready -U "$POSTGRES_USER" -d "$POSTGRES_DB" -h 127.0.0.1
|
|
initialDelaySeconds: 5
|
|
periodSeconds: 10
|
|
timeoutSeconds: 5
|
|
failureThreshold: 6
|
|
livenessProbe:
|
|
exec:
|
|
command:
|
|
- /bin/sh
|
|
- -c
|
|
- pg_isready -U "$POSTGRES_USER" -d "$POSTGRES_DB" -h 127.0.0.1
|
|
initialDelaySeconds: 20
|
|
periodSeconds: 10
|
|
timeoutSeconds: 5
|
|
failureThreshold: 6
|
|
resources:
|
|
requests:
|
|
cpu: "250m"
|
|
memory: "512Mi"
|
|
limits:
|
|
cpu: "1"
|
|
memory: "2Gi"
|
|
volumes:
|
|
- name: init
|
|
configMap:
|
|
name: pg-init-sql
|
|
defaultMode: 0444
|
|
- name: pg-tls
|
|
secret:
|
|
secretName: pg-tls
|
|
- name: pg-certs
|
|
emptyDir: {}
|
|
- name: pg-conf
|
|
configMap:
|
|
name: pg-conf
|
|
defaultMode: 0444
|
|
volumeClaimTemplates:
|
|
- metadata:
|
|
name: data
|
|
spec:
|
|
accessModes: ["ReadWriteOnce"]
|
|
resources:
|
|
requests:
|
|
storage: 10Gi
|
|
# storageClassName: <your-storageclass> # optionally pin this
|
|
|
|
=== ./k8s/postgres/postgres.yaml ===
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata: { name: postgres, namespace: db }
|
|
spec:
|
|
ports: [{ port: 5432, targetPort: 5432 }]
|
|
selector: { app: postgres }
|
|
---
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata: { name: pg-init-sql, namespace: db }
|
|
data:
|
|
00_extensions.sql: |
|
|
-- enable common extensions in the default DB and template1 so future DBs inherit them
|
|
\connect gitea
|
|
CREATE EXTENSION IF NOT EXISTS postgis;
|
|
CREATE EXTENSION IF NOT EXISTS vector;
|
|
CREATE COLLATION IF NOT EXISTS arabic (provider = icu, locale = 'ar', deterministic = false);
|
|
CREATE EXTENSION IF NOT EXISTS tablefunc;
|
|
CREATE EXTENSION IF NOT EXISTS pg_stat_statements;
|
|
|
|
CREATE EXTENSION IF NOT EXISTS postgis_topology;
|
|
CREATE EXTENSION IF NOT EXISTS fuzzystrmatch;
|
|
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
|
CREATE EXTENSION IF NOT EXISTS hstore;
|
|
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
|
|
CREATE EXTENSION IF NOT EXISTS citext;
|
|
CREATE EXTENSION IF NOT EXISTS unaccent;
|
|
CREATE EXTENSION IF NOT EXISTS pgcrypto;
|
|
-- PL/Python (optional; requires image with plpython3u, postgis image has it)
|
|
DO $$ BEGIN
|
|
CREATE EXTENSION IF NOT EXISTS plpython3u;
|
|
EXCEPTION WHEN undefined_file THEN
|
|
RAISE NOTICE 'plpython3u not available in this image';
|
|
END $$;
|
|
|
|
-- Also on template1 for new DBs:
|
|
\connect template1
|
|
CREATE EXTENSION IF NOT EXISTS postgis;
|
|
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
|
CREATE EXTENSION IF NOT EXISTS hstore;
|
|
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
|
|
CREATE EXTENSION IF NOT EXISTS citext;
|
|
CREATE EXTENSION IF NOT EXISTS unaccent;
|
|
CREATE EXTENSION IF NOT EXISTS pgcrypto;
|
|
|
|
-- Arabic-friendly ICU collation (PostgreSQL >= 13)
|
|
-- Non-deterministic collation helps proper case/diacritics comparisons
|
|
DO $$
|
|
BEGIN
|
|
PERFORM 1 FROM pg_collation WHERE collname='arabic';
|
|
IF NOT FOUND THEN
|
|
CREATE COLLATION arabic (provider = icu, locale = 'ar', deterministic = false);
|
|
END IF;
|
|
END$$;
|
|
|
|
-- Example: ensure gitea DB uses UTF8; Arabic text search often needs unaccent + custom dictionaries.
|
|
-- You can create additional DBs with: CREATE DATABASE mydb TEMPLATE template1 ENCODING 'UTF8';
|
|
|
|
01_tune.sql: |
|
|
-- small safe defaults; adjust later
|
|
ALTER SYSTEM SET shared_buffers = '1GB';
|
|
ALTER SYSTEM SET work_mem = '32MB';
|
|
ALTER SYSTEM SET maintenance_work_mem = '512MB';
|
|
ALTER SYSTEM SET max_connections = 200;
|
|
SELECT pg_reload_conf();
|
|
---
|
|
apiVersion: apps/v1
|
|
kind: StatefulSet
|
|
metadata: { name: postgres, namespace: db }
|
|
spec:
|
|
serviceName: postgres
|
|
replicas: 1
|
|
selector: { matchLabels: { app: postgres } }
|
|
template:
|
|
metadata: { labels: { app: postgres } }
|
|
spec:
|
|
nodeSelector:
|
|
node: hetzner-2
|
|
securityContext:
|
|
fsGroup: 999 # Debian postgres user/group in postgis image
|
|
fsGroupChangePolicy: OnRootMismatch
|
|
initContainers:
|
|
- name: fix-perms
|
|
image: busybox:1.36
|
|
command: ["sh","-c","chown -R 999:999 /var/lib/postgresql/data || true"]
|
|
securityContext: { runAsUser: 0 }
|
|
volumeMounts: [{ name: data, mountPath: /var/lib/postgresql/data }]
|
|
containers:
|
|
- name: postgres
|
|
image: postgres:16-3.4
|
|
env:
|
|
- name: POSTGRES_PASSWORD
|
|
valueFrom: { secretKeyRef: { name: postgres-auth, key: POSTGRES_PASSWORD } }
|
|
- { name: POSTGRES_USER, value: gitea }
|
|
- { name: POSTGRES_DB, value: gitea }
|
|
- name: POSTGRES_INITDB_ARGS
|
|
value: "--encoding=UTF8 --locale=C.UTF-8"
|
|
ports: [{ containerPort: 5432 }]
|
|
volumeMounts:
|
|
- { name: data, mountPath: /var/lib/postgresql/data }
|
|
- { name: init, mountPath: /docker-entrypoint-initdb.d }
|
|
volumeClaimTemplates:
|
|
- metadata: { name: data }
|
|
spec:
|
|
accessModes: ["ReadWriteOnce"]
|
|
storageClassName: local-ssd-hetzner
|
|
resources: { requests: { storage: 80Gi } }
|
|
---
|
|
# Mount the init scripts
|
|
apiVersion: apps/v1
|
|
kind: StatefulSet
|
|
metadata:
|
|
name: postgres
|
|
namespace: db
|
|
spec:
|
|
template:
|
|
spec:
|
|
volumes:
|
|
- name: init
|
|
configMap:
|
|
name: pg-init-sql
|
|
defaultMode: 0444
|
|
|
|
=== ./k8s/postgres/secret.yaml ===
|
|
apiVersion: v1
|
|
kind: Secret
|
|
metadata: { name: postgres-auth, namespace: db }
|
|
type: Opaque
|
|
stringData:
|
|
POSTGRES_PASSWORD: "PG-ADM1N"
|
|
GITEA_DB_PASSWORD: "G1TEA"
|
|
|
|
=== ./k8s/prometheus/prometheus-config.yaml ===
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata: { name: prometheus-config, namespace: monitoring }
|
|
data:
|
|
prometheus.yml: |
|
|
global: { scrape_interval: 15s }
|
|
scrape_configs:
|
|
- job_name: 'kubernetes-pods'
|
|
kubernetes_sd_configs: [ { role: pod } ]
|
|
relabel_configs:
|
|
- action: keep
|
|
source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
|
|
regex: 'true'
|
|
|
|
=== ./k8s/prometheus/prometheus.yaml ===
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata: { name: prometheus, namespace: monitoring }
|
|
spec:
|
|
ports: [{ port: 9090, targetPort: 9090 }]
|
|
selector: { app: prometheus }
|
|
---
|
|
apiVersion: apps/v1
|
|
kind: StatefulSet
|
|
metadata: { name: prometheus, namespace: monitoring }
|
|
spec:
|
|
serviceName: prometheus
|
|
replicas: 1
|
|
selector: { matchLabels: { app: prometheus } }
|
|
template:
|
|
metadata: { labels: { app: prometheus } }
|
|
spec:
|
|
nodeSelector: { node: hetzner-2 }
|
|
containers:
|
|
- name: prometheus
|
|
image: prom/prometheus:v2.53.0
|
|
args: ["--config.file=/etc/prometheus/prometheus.yml","--storage.tsdb.path=/prometheus"]
|
|
ports: [{ containerPort: 9090 }]
|
|
volumeMounts:
|
|
- { name: data, mountPath: /prometheus }
|
|
- { name: config, mountPath: /etc/prometheus }
|
|
volumes:
|
|
- { name: config, configMap: { name: prometheus-config } }
|
|
volumeClaimTemplates:
|
|
- metadata: { name: data }
|
|
spec:
|
|
accessModes: ["ReadWriteOnce"]
|
|
storageClassName: local-ssd-hetzner
|
|
resources: { requests: { storage: 50Gi } }
|
|
---
|
|
apiVersion: networking.k8s.io/v1
|
|
kind: Ingress
|
|
metadata:
|
|
name: prometheus
|
|
namespace: monitoring
|
|
annotations:
|
|
cert-manager.io/cluster-issuer: letsencrypt-prod
|
|
nginx.ingress.kubernetes.io/auth-type: basic
|
|
nginx.ingress.kubernetes.io/auth-secret: basic-auth-prometheus
|
|
nginx.ingress.kubernetes.io/auth-realm: "Authentication Required"
|
|
spec:
|
|
ingressClassName: nginx
|
|
tls: [{ hosts: ["prometheus.betelgeusebytes.io"], secretName: prometheus-tls }]
|
|
rules:
|
|
- host: prometheus.betelgeusebytes.io
|
|
http:
|
|
paths:
|
|
- path: /
|
|
pathType: Prefix
|
|
backend: { service: { name: prometheus, port: { number: 9090 } } }
|
|
|
|
=== ./k8s/redis/redis-pv.yaml ===
|
|
apiVersion: v1
|
|
kind: PersistentVolume
|
|
metadata:
|
|
name: pv-redis
|
|
spec:
|
|
capacity:
|
|
storage: 10Gi
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
persistentVolumeReclaimPolicy: Retain
|
|
storageClassName: local-ssd-hetzner
|
|
local:
|
|
path: /mnt/local-ssd/redis
|
|
nodeAffinity:
|
|
required:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: kubernetes.io/hostname
|
|
operator: In
|
|
values:
|
|
- hetzner-2
|
|
=== ./k8s/redis/redis.yaml ===
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata: { name: redis, namespace: db }
|
|
spec:
|
|
ports: [{ port: 6379, targetPort: 6379 }]
|
|
selector: { app: redis }
|
|
---
|
|
apiVersion: apps/v1
|
|
kind: StatefulSet
|
|
metadata: { name: redis, namespace: db }
|
|
spec:
|
|
serviceName: redis
|
|
replicas: 1
|
|
selector: { matchLabels: { app: redis } }
|
|
template:
|
|
metadata: { labels: { app: redis } }
|
|
spec:
|
|
nodeSelector: { node: hetzner-2 }
|
|
containers:
|
|
- name: redis
|
|
image: redis:7
|
|
args: ["--requirepass", "$(REDIS_PASSWORD)"]
|
|
env:
|
|
- name: REDIS_PASSWORD
|
|
valueFrom: { secretKeyRef: { name: redis-auth, key: REDIS_PASSWORD } }
|
|
ports: [{ containerPort: 6379 }]
|
|
volumeMounts:
|
|
- { name: data, mountPath: /data }
|
|
volumeClaimTemplates:
|
|
- metadata: { name: data }
|
|
spec:
|
|
accessModes: ["ReadWriteOnce"]
|
|
storageClassName: local-ssd-hetzner
|
|
resources: { requests: { storage: 10Gi } }
|
|
---
|
|
apiVersion: v1
|
|
kind: Secret
|
|
metadata: { name: redis-auth, namespace: db }
|
|
type: Opaque
|
|
stringData: { REDIS_PASSWORD: "RED1S" }
|
|
|
|
=== ./k8s/sso/sso.yaml ===
|
|
# PV
|
|
apiVersion: v1
|
|
kind: PersistentVolume
|
|
metadata:
|
|
name: pv-auth
|
|
spec:
|
|
capacity:
|
|
storage: 10Gi
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
persistentVolumeReclaimPolicy: Retain
|
|
storageClassName: local-ssd-hetzner
|
|
local:
|
|
path: /mnt/local-ssd/auth
|
|
nodeAffinity:
|
|
required:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: kubernetes.io/hostname
|
|
operator: In
|
|
values:
|
|
- hetzner-2
|
|
---
|
|
# k8s/auth/keycloak/secret.yaml
|
|
apiVersion: v1
|
|
kind: Secret
|
|
metadata: { name: keycloak-admin, namespace: db }
|
|
type: Opaque
|
|
stringData: { KEYCLOAK_ADMIN: "admin", KEYCLOAK_ADMIN_PASSWORD: "admin" }
|
|
|
|
---
|
|
# k8s/auth/keycloak/pvc.yaml
|
|
apiVersion: v1
|
|
kind: PersistentVolumeClaim
|
|
metadata: { name: keycloak-data, namespace: db }
|
|
spec:
|
|
accessModes: ["ReadWriteOnce"]
|
|
storageClassName: local-ssd-hetzner
|
|
resources: { requests: { storage: 10Gi } }
|
|
|
|
---
|
|
# k8s/auth/keycloak/deploy.yaml
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata: { name: keycloak, namespace: db }
|
|
spec:
|
|
replicas: 1
|
|
selector: { matchLabels: { app: keycloak } }
|
|
template:
|
|
metadata: { labels: { app: keycloak } }
|
|
spec:
|
|
# Ensure the PV is owned by the Keycloak UID/GID
|
|
securityContext:
|
|
fsGroup: 1000
|
|
initContainers:
|
|
- name: fix-permissions
|
|
image: busybox
|
|
command: ['sh', '-c', 'chown -R 1000:1000 /opt/keycloak/data && chmod -R 755 /opt/keycloak/data']
|
|
volumeMounts:
|
|
- name: data
|
|
mountPath: /opt/keycloak/data
|
|
containers:
|
|
- name: keycloak
|
|
image: quay.io/keycloak/keycloak:latest
|
|
args: ["start","--http-enabled=true","--proxy-headers=xforwarded","--hostname-strict=false"]
|
|
env:
|
|
- { name: KEYCLOAK_ADMIN, valueFrom: { secretKeyRef: { name: keycloak-admin, key: KEYCLOAK_ADMIN } } }
|
|
- { name: KEYCLOAK_ADMIN_PASSWORD, valueFrom: { secretKeyRef: { name: keycloak-admin, key: KEYCLOAK_ADMIN_PASSWORD } } }
|
|
ports: [{ containerPort: 8080 }]
|
|
volumeMounts: [{ name: data, mountPath: /opt/keycloak/data }]
|
|
securityContext:
|
|
runAsUser: 1000
|
|
runAsGroup: 1000
|
|
volumes:
|
|
- name: data
|
|
persistentVolumeClaim: { claimName: keycloak-data }
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata: { name: keycloak, namespace: db }
|
|
spec: { selector: { app: keycloak }, ports: [ { port: 80, targetPort: 8080 } ] }
|
|
---
|
|
apiVersion: networking.k8s.io/v1
|
|
kind: Ingress
|
|
metadata:
|
|
name: keycloak
|
|
namespace: db
|
|
annotations: { cert-manager.io/cluster-issuer: letsencrypt-prod }
|
|
spec:
|
|
ingressClassName: nginx
|
|
tls: [{ hosts: ["auth.betelgeusebytes.io"], secretName: keycloak-tls }]
|
|
rules:
|
|
- host: auth.betelgeusebytes.io
|
|
http:
|
|
paths:
|
|
- path: /
|
|
pathType: Prefix
|
|
backend: { service: { name: keycloak, port: { number: 80 } } }
|
|
|
|
=== ./k8s/storage/persistent-volumes.yaml ===
|
|
apiVersion: v1
|
|
kind: PersistentVolume
|
|
metadata:
|
|
name: pv-postgres
|
|
spec:
|
|
capacity:
|
|
storage: 80Gi
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
persistentVolumeReclaimPolicy: Retain
|
|
storageClassName: local-ssd-hetzner
|
|
local:
|
|
path: /mnt/local-ssd/postgres
|
|
nodeAffinity:
|
|
required:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: kubernetes.io/hostname
|
|
operator: In
|
|
values:
|
|
- hetzner-2
|
|
---
|
|
apiVersion: v1
|
|
kind: PersistentVolume
|
|
metadata:
|
|
name: pv-elasticsearch
|
|
spec:
|
|
capacity:
|
|
storage: 300Gi
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
persistentVolumeReclaimPolicy: Retain
|
|
storageClassName: local-ssd-hetzner
|
|
local:
|
|
path: /mnt/local-ssd/elasticsearch
|
|
nodeAffinity:
|
|
required:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: kubernetes.io/hostname
|
|
operator: In
|
|
values:
|
|
- hetzner-2
|
|
---
|
|
apiVersion: v1
|
|
kind: PersistentVolume
|
|
metadata:
|
|
name: pv-gitea
|
|
spec:
|
|
capacity:
|
|
storage: 50Gi
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
persistentVolumeReclaimPolicy: Retain
|
|
storageClassName: local-ssd-hetzner
|
|
local:
|
|
path: /mnt/local-ssd/gitea
|
|
nodeAffinity:
|
|
required:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: kubernetes.io/hostname
|
|
operator: In
|
|
values:
|
|
- hetzner-2
|
|
---
|
|
apiVersion: v1
|
|
kind: PersistentVolume
|
|
metadata:
|
|
name: pv-jupyter
|
|
spec:
|
|
capacity:
|
|
storage: 20Gi
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
persistentVolumeReclaimPolicy: Retain
|
|
storageClassName: local-ssd-hetzner
|
|
local:
|
|
path: /mnt/local-ssd/jupyter
|
|
nodeAffinity:
|
|
required:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: kubernetes.io/hostname
|
|
operator: In
|
|
values:
|
|
- hetzner-2
|
|
---
|
|
apiVersion: v1
|
|
kind: PersistentVolume
|
|
metadata:
|
|
name: pv-kafka
|
|
spec:
|
|
capacity:
|
|
storage: 50Gi
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
persistentVolumeReclaimPolicy: Retain
|
|
storageClassName: local-ssd-hetzner
|
|
local:
|
|
path: /mnt/local-ssd/kafka
|
|
nodeAffinity:
|
|
required:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: kubernetes.io/hostname
|
|
operator: In
|
|
values:
|
|
- hetzner-2
|
|
---
|
|
apiVersion: v1
|
|
kind: PersistentVolume
|
|
metadata:
|
|
name: pv-zookeeper-data
|
|
spec:
|
|
capacity:
|
|
storage: 10Gi
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
persistentVolumeReclaimPolicy: Retain
|
|
storageClassName: local-ssd-hetzner
|
|
local:
|
|
path: /mnt/local-ssd/zookeeper-data
|
|
nodeAffinity:
|
|
required:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: kubernetes.io/hostname
|
|
operator: In
|
|
values:
|
|
- hetzner-2
|
|
---
|
|
apiVersion: v1
|
|
kind: PersistentVolume
|
|
metadata:
|
|
name: pv-zookeeper-log
|
|
spec:
|
|
capacity:
|
|
storage: 10Gi
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
persistentVolumeReclaimPolicy: Retain
|
|
storageClassName: local-ssd-hetzner
|
|
local:
|
|
path: /mnt/local-ssd/zookeeper-log
|
|
nodeAffinity:
|
|
required:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: kubernetes.io/hostname
|
|
operator: In
|
|
values:
|
|
- hetzner-2
|
|
---
|
|
apiVersion: v1
|
|
kind: PersistentVolume
|
|
metadata:
|
|
name: pv-prometheus
|
|
spec:
|
|
capacity:
|
|
storage: 50Gi
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
persistentVolumeReclaimPolicy: Retain
|
|
storageClassName: local-ssd-hetzner
|
|
local:
|
|
path: /mnt/local-ssd/prometheus
|
|
nodeAffinity:
|
|
required:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: kubernetes.io/hostname
|
|
operator: In
|
|
values:
|
|
- hetzner-2
|
|
|
|
=== ./k8s/storage/storageclass.yaml ===
|
|
apiVersion: storage.k8s.io/v1
|
|
kind: StorageClass
|
|
metadata:
|
|
name: local-ssd-hetzner
|
|
provisioner: kubernetes.io/no-provisioner
|
|
volumeBindingMode: WaitForFirstConsumer
|
|
|
|
=== ./k8s/tei/tei.yaml ===
|
|
# k8s/ai/tei/deploy.yaml
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata: { name: tei, namespace: ml }
|
|
spec:
|
|
replicas: 1
|
|
selector: { matchLabels: { app: tei } }
|
|
template:
|
|
metadata: { labels: { app: tei } }
|
|
spec:
|
|
containers:
|
|
- name: tei
|
|
image: ghcr.io/huggingface/text-embeddings-inference:cpu-latest
|
|
env: [{ name: MODEL_ID, value: "mixedbread-ai/mxbai-embed-large-v1" }]
|
|
ports: [{ containerPort: 80 }]
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata: { name: tei, namespace: ml }
|
|
spec: { selector: { app: tei }, ports: [ { port: 80, targetPort: 80 } ] }
|
|
---
|
|
apiVersion: networking.k8s.io/v1
|
|
kind: Ingress
|
|
metadata:
|
|
name: tei
|
|
namespace: ml
|
|
annotations: { cert-manager.io/cluster-issuer: letsencrypt-prod }
|
|
spec:
|
|
ingressClassName: nginx
|
|
tls: [{ hosts: ["embeddings.betelgeusebytes.io"], secretName: tei-tls }]
|
|
rules:
|
|
- host: embeddings.betelgeusebytes.io
|
|
http:
|
|
paths:
|
|
- path: /
|
|
pathType: Prefix
|
|
backend: { service: { name: tei, port: { number: 80 } } }
|
|
|
|
=== ./k8s/trading/ib-gateway.yaml ===
|
|
apiVersion: v1
|
|
kind: Namespace
|
|
metadata:
|
|
name: trading
|
|
labels:
|
|
name: trading
|
|
environment: production
|
|
---
|
|
# OPTIONAL: Use this if you want to persist IB Gateway settings/logs
|
|
# across pod restarts. For most use cases, this is NOT needed since
|
|
# IB Gateway is mostly stateless and credentials are in Secrets.
|
|
#
|
|
# Only create this PV/PVC if you need to persist:
|
|
# - TWS session data
|
|
# - Custom workspace layouts
|
|
# - Historical API usage logs
|
|
|
|
apiVersion: v1
|
|
kind: PersistentVolume
|
|
metadata:
|
|
name: ib-gateway-data
|
|
labels:
|
|
type: local
|
|
app: ib-gateway
|
|
spec:
|
|
capacity:
|
|
storage: 5Gi
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
persistentVolumeReclaimPolicy: Retain
|
|
storageClassName: local-storage
|
|
local:
|
|
path: /mnt/local-ssd/ib-gateway # Adjust to your local SSD path
|
|
nodeAffinity:
|
|
required:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: kubernetes.io/hostname
|
|
operator: In
|
|
values:
|
|
- hetzner-2
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: PersistentVolumeClaim
|
|
metadata:
|
|
name: ib-gateway-data
|
|
namespace: trading
|
|
spec:
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
resources:
|
|
requests:
|
|
storage: 5Gi
|
|
storageClassName: local-storage
|
|
selector:
|
|
matchLabels:
|
|
app: ib-gateway
|
|
|
|
# To use this PVC, add to Deployment volumeMounts:
|
|
# - name: data
|
|
# mountPath: /root/Jts
|
|
# And to volumes:
|
|
# - name: data
|
|
# persistentVolumeClaim:
|
|
# claimName: ib-gateway-data
|
|
---
|
|
apiVersion: v1
|
|
kind: Secret
|
|
metadata:
|
|
name: ib-credentials
|
|
namespace: trading
|
|
type: Opaque
|
|
stringData:
|
|
# IMPORTANT: Replace these with your actual IB credentials
|
|
# For paper trading, use your paper trading account
|
|
username: "saladin85"
|
|
password: "3Lcd@05041985"
|
|
# Trading mode: "paper" or "live"
|
|
trading-mode: "paper"
|
|
|
|
# IB Gateway config (jts.ini equivalent)
|
|
# This enables headless mode and configures ports
|
|
ibgateway.conf: |
|
|
[IBGateway]
|
|
TradingMode=paper
|
|
ApiOnly=true
|
|
ReadOnlyApi=false
|
|
TrustedIPs=127.0.0.1
|
|
|
|
[IBGatewayAPI]
|
|
ApiPortNumber=4002
|
|
|
|
[Logon]
|
|
UseRemoteSettings=no
|
|
Locale=en
|
|
ColorPaletteName=dark
|
|
|
|
[Display]
|
|
ShowSplashScreen=no
|
|
---
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: ib-gateway-config
|
|
namespace: trading
|
|
data:
|
|
# Startup script to configure IB Gateway for headless operation
|
|
startup.sh: |
|
|
#!/bin/bash
|
|
set -e
|
|
|
|
echo "Starting IB Gateway in headless mode..."
|
|
echo "Trading Mode: ${TRADING_MODE}"
|
|
echo "Port: ${TWS_PORT}"
|
|
|
|
# Configure based on trading mode
|
|
if [ "${TRADING_MODE}" == "live" ]; then
|
|
export TWS_PORT=4001
|
|
echo "⚠️ LIVE TRADING MODE - USE WITH CAUTION ⚠️"
|
|
else
|
|
export TWS_PORT=4002
|
|
echo "📝 Paper Trading Mode (Safe)"
|
|
fi
|
|
# IMPORTANT: use the env vars provided by the Deployment
|
|
export IB_USERNAME="${TWS_USERID}"
|
|
export IB_PASSWORD="${TWS_PASSWORD}"
|
|
|
|
# Start IB Gateway
|
|
exec /opt/ibgateway/ibgateway-latest-standalone-linux-x64.sh \
|
|
--tws-path=/root/Jts \
|
|
--tws-settings-path=/root \
|
|
--user="${IB_USERNAME}" \
|
|
--pw="${IB_PASSWORD}" \
|
|
--mode="${TRADING_MODE}" \
|
|
--port="${TWS_PORT}"
|
|
|
|
# Health check script
|
|
healthcheck.sh: |
|
|
#!/bin/bash
|
|
# Check if TWS API port is listening
|
|
# PORT=${TWS_PORT:-4002}
|
|
# nc -z localhost $PORT
|
|
# exit $?
|
|
#!/bin/sh
|
|
# Pure-python TCP check (no nc required)
|
|
PORT="${TWS_PORT:-4002}"
|
|
python - <<'PY'
|
|
import os, socket, sys
|
|
port = int(os.environ.get("TWS_PORT", os.environ.get("PORT", "4002")))
|
|
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
s.settimeout(2)
|
|
try:
|
|
s.connect(("127.0.0.1", port))
|
|
sys.exit(0)
|
|
except Exception:
|
|
sys.exit(1)
|
|
finally:
|
|
s.close()
|
|
PY
|
|
---
|
|
# apiVersion: apps/v1
|
|
# kind: Deployment
|
|
# metadata:
|
|
# name: ib-gateway
|
|
# namespace: trading
|
|
# labels:
|
|
# app: ib-gateway
|
|
# component: trading-infrastructure
|
|
# spec:
|
|
# replicas: 1 # IB Gateway should only have 1 instance per account
|
|
# strategy:
|
|
# type: Recreate # Avoid multiple simultaneous logins
|
|
# selector:
|
|
# matchLabels:
|
|
# app: ib-gateway
|
|
# template:
|
|
# metadata:
|
|
# labels:
|
|
# app: ib-gateway
|
|
# annotations:
|
|
# prometheus.io/scrape: "false" # No metrics endpoint by default
|
|
# spec:
|
|
# # Pin to hetzner-2 (matches your existing pattern)
|
|
# nodeSelector:
|
|
# kubernetes.io/hostname: hetzner-2
|
|
|
|
# # Security context
|
|
# securityContext:
|
|
# runAsNonRoot: false # IB Gateway requires root for VNC (even if unused)
|
|
# fsGroup: 1000
|
|
|
|
# containers:
|
|
# - name: ib-gateway
|
|
# # Using community-maintained IB Gateway image
|
|
# # Alternative: waytrade/ib-gateway:latest
|
|
# image: ghcr.io/gnzsnz/ib-gateway:stable
|
|
# imagePullPolicy: IfNotPresent
|
|
|
|
# env:
|
|
# - name: TWS_USERID
|
|
# valueFrom:
|
|
# secretKeyRef:
|
|
# name: ib-credentials
|
|
# key: username
|
|
# - name: TWS_PASSWORD
|
|
# valueFrom:
|
|
# secretKeyRef:
|
|
# name: ib-credentials
|
|
# key: password
|
|
# - name: TRADING_MODE
|
|
# valueFrom:
|
|
# secretKeyRef:
|
|
# name: ib-credentials
|
|
# key: trading-mode
|
|
# - name: TWS_PORT
|
|
# value: "4002" # Default to paper trading
|
|
# - name: READ_ONLY_API
|
|
# value: "no"
|
|
|
|
# # Ports
|
|
# ports:
|
|
# - name: paper-trading
|
|
# containerPort: 4002
|
|
# protocol: TCP
|
|
# - name: live-trading
|
|
# containerPort: 4001
|
|
# protocol: TCP
|
|
# - name: vnc
|
|
# containerPort: 5900
|
|
# protocol: TCP # VNC (not exposed externally)
|
|
|
|
# # Resource limits
|
|
# resources:
|
|
# requests:
|
|
# memory: "1Gi"
|
|
# cpu: "500m"
|
|
# limits:
|
|
# memory: "2Gi"
|
|
# cpu: "1000m"
|
|
|
|
# # Liveness probe (check if API port is responsive)
|
|
# startupProbe:
|
|
# tcpSocket:
|
|
# port: 4002
|
|
# initialDelaySeconds: 60 # Wait 60s before first check
|
|
# periodSeconds: 10 # Check every 10s
|
|
# timeoutSeconds: 5
|
|
# failureThreshold: 18 # 60s + (10s * 18) = 240s total startup time
|
|
|
|
# livenessProbe:
|
|
# tcpSocket:
|
|
# port: 4002
|
|
# initialDelaySeconds: 0 # IB Gateway takes time to start
|
|
# periodSeconds: 60
|
|
# timeoutSeconds: 5
|
|
# failureThreshold: 3
|
|
|
|
# # Readiness probe
|
|
# readinessProbe:
|
|
# tcpSocket:
|
|
# port: 4002
|
|
# initialDelaySeconds: 0
|
|
# periodSeconds: 10
|
|
# timeoutSeconds: 5
|
|
# failureThreshold: 2
|
|
|
|
# # Volume mounts for config
|
|
# volumeMounts:
|
|
# - name: ib-config
|
|
# mountPath: /root/Jts/jts.ini
|
|
# subPath: ibgateway.conf
|
|
# - name: startup-script
|
|
# mountPath: /startup.sh
|
|
# subPath: startup.sh
|
|
# - name: data
|
|
# mountPath: /root/Jts
|
|
|
|
# # Logging to stdout (Fluent Bit will collect)
|
|
# # IB Gateway logs go to /root/Jts/log by default
|
|
# lifecycle:
|
|
# postStart:
|
|
# exec:
|
|
# command:
|
|
# - /bin/sh
|
|
# - -c
|
|
# - |
|
|
# mkdir -p /root/Jts/log
|
|
# ln -sf /dev/stdout /root/Jts/log/ibgateway.log || true
|
|
|
|
# volumes:
|
|
# - name: ib-config
|
|
# secret:
|
|
# secretName: ib-credentials
|
|
# defaultMode: 0644
|
|
# - name: startup-script
|
|
# configMap:
|
|
# name: ib-gateway-config
|
|
# defaultMode: 0755
|
|
# - name: data
|
|
# persistentVolumeClaim:
|
|
# claimName: ib-gateway-data
|
|
|
|
# # Restart policy
|
|
# restartPolicy: Always
|
|
|
|
# # DNS policy for internal cluster resolution
|
|
# dnsPolicy: ClusterFirst
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
name: ib-gateway
|
|
namespace: trading
|
|
labels:
|
|
app: ib-gateway
|
|
component: trading-infrastructure
|
|
spec:
|
|
replicas: 1
|
|
strategy:
|
|
type: Recreate
|
|
selector:
|
|
matchLabels:
|
|
app: ib-gateway
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: ib-gateway
|
|
annotations:
|
|
prometheus.io/scrape: "false"
|
|
spec:
|
|
nodeSelector:
|
|
kubernetes.io/hostname: hetzner-2
|
|
|
|
securityContext:
|
|
runAsNonRoot: false
|
|
fsGroup: 1000
|
|
|
|
# Seed writable jts.ini into the PVC once
|
|
initContainers:
|
|
- name: seed-jts-config
|
|
image: busybox:1.36
|
|
command:
|
|
- sh
|
|
- -c
|
|
- |
|
|
set -e
|
|
mkdir -p /data
|
|
if [ ! -f /data/jts.ini ]; then
|
|
echo "Seeding jts.ini into PVC"
|
|
cp /config/ibgateway.conf /data/jts.ini
|
|
chmod 644 /data/jts.ini
|
|
else
|
|
echo "jts.ini already exists in PVC"
|
|
fi
|
|
volumeMounts:
|
|
- name: ib-config
|
|
mountPath: /config
|
|
readOnly: true
|
|
- name: data
|
|
mountPath: /data
|
|
|
|
containers:
|
|
# ------------------------------------------------------------------
|
|
# IB Gateway
|
|
# ------------------------------------------------------------------
|
|
- name: ib-gateway
|
|
image: ghcr.io/gnzsnz/ib-gateway:stable
|
|
imagePullPolicy: IfNotPresent
|
|
|
|
env:
|
|
- name: TWS_USERID
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: ib-credentials
|
|
key: username
|
|
- name: TWS_PASSWORD
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: ib-credentials
|
|
key: password
|
|
- name: TRADING_MODE
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: ib-credentials
|
|
key: trading-mode
|
|
- name: TWS_PORT
|
|
value: "4002"
|
|
- name: READ_ONLY_API
|
|
value: "no"
|
|
|
|
ports:
|
|
- name: ib-api-local
|
|
containerPort: 4002
|
|
protocol: TCP
|
|
- name: live-trading
|
|
containerPort: 4001
|
|
protocol: TCP
|
|
- name: vnc
|
|
containerPort: 5900
|
|
protocol: TCP
|
|
|
|
resources:
|
|
requests:
|
|
memory: "1Gi"
|
|
cpu: "500m"
|
|
limits:
|
|
memory: "2Gi"
|
|
cpu: "1000m"
|
|
|
|
# IMPORTANT: Probes should check the local IB port (4002)
|
|
startupProbe:
|
|
tcpSocket:
|
|
port: 4002
|
|
initialDelaySeconds: 60
|
|
periodSeconds: 10
|
|
timeoutSeconds: 5
|
|
failureThreshold: 18
|
|
|
|
livenessProbe:
|
|
tcpSocket:
|
|
port: 4002
|
|
periodSeconds: 60
|
|
timeoutSeconds: 5
|
|
failureThreshold: 3
|
|
|
|
readinessProbe:
|
|
tcpSocket:
|
|
port: 4002
|
|
periodSeconds: 10
|
|
timeoutSeconds: 5
|
|
failureThreshold: 2
|
|
|
|
volumeMounts:
|
|
- name: data
|
|
mountPath: /root/Jts
|
|
|
|
lifecycle:
|
|
postStart:
|
|
exec:
|
|
command:
|
|
- sh
|
|
- -c
|
|
- |
|
|
mkdir -p /root/Jts/log
|
|
ln -sf /dev/stdout /root/Jts/log/ibgateway.log || true
|
|
|
|
# ------------------------------------------------------------------
|
|
# Sidecar TCP proxy: accepts cluster traffic, forwards to localhost:4002
|
|
# ------------------------------------------------------------------
|
|
- name: ib-api-proxy
|
|
image: alpine/socat:1.8.0.0
|
|
imagePullPolicy: IfNotPresent
|
|
args:
|
|
- "-d"
|
|
- "-d"
|
|
- "TCP-LISTEN:4003,fork,reuseaddr"
|
|
- "TCP:127.0.0.1:4002"
|
|
ports:
|
|
- name: ib-api
|
|
containerPort: 4003
|
|
protocol: TCP
|
|
resources:
|
|
requests:
|
|
memory: "32Mi"
|
|
cpu: "10m"
|
|
limits:
|
|
memory: "128Mi"
|
|
cpu: "100m"
|
|
# basic probe: is proxy listening
|
|
readinessProbe:
|
|
tcpSocket:
|
|
port: 4003
|
|
periodSeconds: 5
|
|
timeoutSeconds: 2
|
|
failureThreshold: 3
|
|
|
|
volumes:
|
|
- name: ib-config
|
|
secret:
|
|
secretName: ib-credentials
|
|
defaultMode: 0644
|
|
|
|
- name: data
|
|
persistentVolumeClaim:
|
|
claimName: ib-gateway-data
|
|
|
|
restartPolicy: Always
|
|
dnsPolicy: ClusterFirst
|
|
|
|
|
|
---
|
|
# apiVersion: v1
|
|
# kind: Service
|
|
# metadata:
|
|
# name: ib-gateway
|
|
# namespace: trading
|
|
# labels:
|
|
# app: ib-gateway
|
|
# spec:
|
|
# type: ClusterIP # Internal-only, not exposed publicly
|
|
# clusterIP: None # Headless service (optional, remove if you want a stable ClusterIP)
|
|
# selector:
|
|
# app: ib-gateway
|
|
# ports:
|
|
# - name: paper-trading
|
|
# port: 4002
|
|
# targetPort: 4002
|
|
# protocol: TCP
|
|
# - name: live-trading
|
|
# port: 4001
|
|
# targetPort: 4001
|
|
# protocol: TCP
|
|
# sessionAffinity: ClientIP # Stick to same pod (important for stateful TWS sessions)
|
|
# sessionAffinityConfig:
|
|
# clientIP:
|
|
# timeoutSeconds: 3600 # 1 hour session stickiness
|
|
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: ib-gateway
|
|
namespace: trading
|
|
labels:
|
|
app: ib-gateway
|
|
spec:
|
|
type: ClusterIP
|
|
selector:
|
|
app: ib-gateway
|
|
ports:
|
|
- name: paper-trading
|
|
port: 4002
|
|
targetPort: 4003 # <-- proxy sidecar, not the gateway directly
|
|
protocol: TCP
|
|
- name: live-trading
|
|
port: 4001
|
|
targetPort: 4001
|
|
protocol: TCP
|
|
sessionAffinity: ClientIP
|
|
sessionAffinityConfig:
|
|
clientIP:
|
|
timeoutSeconds: 3600
|
|
|
|
=== ./k8s/trading/ib-gateway2.yaml ===
|
|
apiVersion: v1
|
|
kind: Namespace
|
|
metadata:
|
|
name: trading
|
|
labels:
|
|
name: trading
|
|
environment: production
|
|
---
|
|
apiVersion: v1
|
|
kind: Secret
|
|
metadata:
|
|
name: ib-credentials
|
|
namespace: trading
|
|
type: Opaque
|
|
stringData:
|
|
# Rotate your creds (you pasted them earlier).
|
|
username: "saladin85"
|
|
password: "3Lcd@05041985"
|
|
trading-mode: "paper"
|
|
---
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
name: ib-gateway
|
|
namespace: trading
|
|
labels:
|
|
app: ib-gateway
|
|
component: trading-infrastructure
|
|
spec:
|
|
replicas: 1
|
|
strategy:
|
|
type: Recreate
|
|
selector:
|
|
matchLabels:
|
|
app: ib-gateway
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: ib-gateway
|
|
annotations:
|
|
prometheus.io/scrape: "false"
|
|
spec:
|
|
nodeSelector:
|
|
kubernetes.io/hostname: hetzner-2
|
|
|
|
# Keep your original security context
|
|
securityContext:
|
|
runAsNonRoot: false
|
|
fsGroup: 1000
|
|
|
|
containers:
|
|
- name: ib-gateway
|
|
image: ghcr.io/gnzsnz/ib-gateway:stable
|
|
imagePullPolicy: IfNotPresent
|
|
|
|
# IMPORTANT: use env vars this image expects
|
|
env:
|
|
- name: TWS_USERID
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: ib-credentials
|
|
key: username
|
|
- name: TWS_PASSWORD
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: ib-credentials
|
|
key: password
|
|
- name: TRADING_MODE
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: ib-credentials
|
|
key: trading-mode
|
|
- name: READ_ONLY_API
|
|
value: "no"
|
|
|
|
# These two match what your log shows the image uses
|
|
- name: API_PORT
|
|
value: "4002"
|
|
- name: SOCAT_PORT
|
|
value: "4004"
|
|
|
|
# optional but nice
|
|
- name: TIME_ZONE
|
|
value: "Etc/UTC"
|
|
- name: TWOFA_TIMEOUT_ACTION
|
|
value: "exit"
|
|
|
|
ports:
|
|
# IB API ports (inside container / localhost use)
|
|
- name: api-paper
|
|
containerPort: 4002
|
|
protocol: TCP
|
|
- name: api-live
|
|
containerPort: 4001
|
|
protocol: TCP
|
|
|
|
# socat relay port for non-localhost clients (what we expose via Service)
|
|
- name: api-socat
|
|
containerPort: 4004
|
|
protocol: TCP
|
|
|
|
# optional UI/VNC
|
|
- name: vnc
|
|
containerPort: 5900
|
|
protocol: TCP
|
|
|
|
resources:
|
|
requests:
|
|
memory: "1Gi"
|
|
cpu: "500m"
|
|
limits:
|
|
memory: "2Gi"
|
|
cpu: "1000m"
|
|
|
|
# Probe the socat port (represents remote connectivity)
|
|
startupProbe:
|
|
tcpSocket:
|
|
port: 4004
|
|
initialDelaySeconds: 60
|
|
periodSeconds: 10
|
|
timeoutSeconds: 5
|
|
failureThreshold: 18
|
|
|
|
readinessProbe:
|
|
tcpSocket:
|
|
port: 4004
|
|
periodSeconds: 10
|
|
timeoutSeconds: 5
|
|
failureThreshold: 2
|
|
|
|
livenessProbe:
|
|
tcpSocket:
|
|
port: 4004
|
|
periodSeconds: 60
|
|
timeoutSeconds: 5
|
|
failureThreshold: 3
|
|
|
|
restartPolicy: Always
|
|
dnsPolicy: ClusterFirst
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: ib-gateway
|
|
namespace: trading
|
|
labels:
|
|
app: ib-gateway
|
|
spec:
|
|
type: ClusterIP
|
|
selector:
|
|
app: ib-gateway
|
|
ports:
|
|
# Clients connect to 4002, but we forward to SOCAT_PORT=4004
|
|
- name: paper-trading
|
|
port: 4002
|
|
targetPort: 4004
|
|
protocol: TCP
|
|
|
|
# If you truly need live, you should relay live via another socat port too.
|
|
# For now keep it direct (or remove it entirely for safety).
|
|
- name: live-trading
|
|
port: 4001
|
|
targetPort: 4001
|
|
protocol: TCP
|
|
|
|
sessionAffinity: ClientIP
|
|
sessionAffinityConfig:
|
|
clientIP:
|
|
timeoutSeconds: 3600
|
|
|
|
=== ./k8s/vector/qdrant.yaml ===
|
|
# k8s/vec/qdrant/pvc.yaml
|
|
apiVersion: v1
|
|
kind: PersistentVolumeClaim
|
|
metadata: { name: qdrant-data, namespace: db}
|
|
spec:
|
|
accessModes: ["ReadWriteOnce"]
|
|
storageClassName: local-ssd-hetzner
|
|
resources: { requests: { storage: 20Gi } }
|
|
|
|
---
|
|
# k8s/vec/qdrant/deploy.yaml
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata: { name: qdrant, namespace: db}
|
|
spec:
|
|
replicas: 1
|
|
selector: { matchLabels: { app: qdrant } }
|
|
template:
|
|
metadata: { labels: { app: qdrant } }
|
|
spec:
|
|
containers:
|
|
- name: qdrant
|
|
image: qdrant/qdrant:latest
|
|
ports:
|
|
- { containerPort: 6333 } # HTTP + Web UI
|
|
- { containerPort: 6334 } # gRPC
|
|
volumeMounts:
|
|
- { name: data, mountPath: /qdrant/storage }
|
|
volumes:
|
|
- name: data
|
|
persistentVolumeClaim: { claimName: qdrant-data }
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata: { name: qdrant, namespace: db}
|
|
spec:
|
|
selector: { app: qdrant }
|
|
ports:
|
|
- { name: http, port: 80, targetPort: 6333 }
|
|
- { name: grpc, port: 6334, targetPort: 6334 }
|
|
---
|
|
apiVersion: networking.k8s.io/v1
|
|
kind: Ingress
|
|
metadata:
|
|
name: qdrant
|
|
namespace: db
|
|
annotations: { cert-manager.io/cluster-issuer: letsencrypt-prod }
|
|
spec:
|
|
ingressClassName: nginx
|
|
tls: [{ hosts: ["vector.betelgeusebytes.io"], secretName: qdrant-tls }]
|
|
rules:
|
|
- host: vector.betelgeusebytes.io
|
|
http:
|
|
paths:
|
|
- path: /
|
|
pathType: Prefix
|
|
backend: { service: { name: qdrant, port: { number: 80 } } }
|
|
---
|
|
# PV
|
|
apiVersion: v1
|
|
kind: PersistentVolume
|
|
metadata:
|
|
name: pv-qdrant
|
|
spec:
|
|
capacity:
|
|
storage: 20Gi
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
persistentVolumeReclaimPolicy: Retain
|
|
storageClassName: local-ssd-hetzner
|
|
local:
|
|
path: /mnt/local-ssd/qdrant
|
|
nodeAffinity:
|
|
required:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: kubernetes.io/hostname
|
|
operator: In
|
|
values:
|
|
- hetzner-2
|
|
|
|
=== ./k8s/vllm/vllm.yaml ===
|
|
# PV
|
|
apiVersion: v1
|
|
kind: PersistentVolume
|
|
metadata:
|
|
name: pv-vllm
|
|
spec:
|
|
capacity:
|
|
storage: 50Gi
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
persistentVolumeReclaimPolicy: Retain
|
|
storageClassName: local-ssd-hetzner
|
|
local:
|
|
path: /mnt/local-ssd/vllm
|
|
nodeAffinity:
|
|
required:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: kubernetes.io/hostname
|
|
operator: In
|
|
values:
|
|
- hetzner-2
|
|
---
|
|
# k8s/ai/vllm/secret.yaml
|
|
apiVersion: v1
|
|
kind: Secret
|
|
metadata: { name: vllm-auth, namespace: ml }
|
|
type: Opaque
|
|
stringData: { API_KEY: "replace_me" }
|
|
|
|
---
|
|
# k8s/ai/ollama/deploy.yaml
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata: { name: ollama, namespace: ml }
|
|
spec:
|
|
replicas: 1
|
|
selector: { matchLabels: { app: ollama } }
|
|
template:
|
|
metadata: { labels: { app: ollama } }
|
|
spec:
|
|
securityContext:
|
|
runAsUser: 0 # needed so the init can write into /root/.ollama
|
|
initContainers:
|
|
- name: warm-models
|
|
image: ollama/ollama:latest
|
|
command: ["/bin/sh","-c"]
|
|
args:
|
|
- |
|
|
ollama serve & # start a temp daemon
|
|
sleep 2
|
|
# pull one or more small, quantized models for CPU
|
|
ollama pull qwen2.5:3b-instruct-q4_K_M || true
|
|
ollama pull llama3.2:3b-instruct-q4_K_M || true
|
|
pkill ollama || true
|
|
volumeMounts:
|
|
- { name: data, mountPath: /root/.ollama }
|
|
containers:
|
|
- name: ollama
|
|
image: ollama/ollama:latest
|
|
env:
|
|
- { name: OLLAMA_ORIGINS, value: "*" } # CORS if you call from browser
|
|
ports:
|
|
- { containerPort: 11434 }
|
|
volumeMounts:
|
|
- { name: data, mountPath: /root/.ollama }
|
|
resources:
|
|
requests: { cpu: "2", memory: "4Gi" }
|
|
limits: { cpu: "4", memory: "8Gi" }
|
|
volumes:
|
|
- name: data
|
|
persistentVolumeClaim: { claimName: ollama-data }
|
|
|
|
---
|
|
# k8s/ai/ollama/svc-ing.yaml
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata: { name: ollama, namespace: ml }
|
|
spec:
|
|
selector: { app: ollama }
|
|
ports: [ { name: http, port: 80, targetPort: 11434 } ]
|
|
|
|
# ---
|
|
# # old k8s/ai/vllm/deploy.yaml
|
|
# apiVersion: apps/v1
|
|
# kind: Deployment
|
|
# metadata: { name: vllm, namespace: ml }
|
|
# spec:
|
|
# replicas: 1
|
|
# selector: { matchLabels: { app: vllm } }
|
|
# template:
|
|
# metadata: { labels: { app: vllm } }
|
|
# spec:
|
|
# containers:
|
|
# - name: vllm
|
|
# image: vllm/vllm-openai:latest
|
|
# args: ["--model","Qwen/Qwen2.5-7B-Instruct","--max-model-len","8192","--port","8000","--host","0.0.0.0"]
|
|
# env:
|
|
# - name: VLLM_API_KEY
|
|
# valueFrom: { secretKeyRef: { name: vllm-auth, key: API_KEY } }
|
|
# ports: [{ containerPort: 8000 }]
|
|
# resources:
|
|
# limits:
|
|
# nvidia.com/gpu: 1
|
|
# requests:
|
|
# nvidia.com/gpu: 1
|
|
# volumeMounts:
|
|
# - { name: cache, mountPath: /root/.cache/huggingface }
|
|
# volumes:
|
|
# - name: cache
|
|
# persistentVolumeClaim: { claimName: vllm-cache-pvc }
|
|
---
|
|
apiVersion: v1
|
|
kind: PersistentVolumeClaim
|
|
metadata: { name: ollama-data, namespace: ml }
|
|
spec:
|
|
accessModes: ["ReadWriteOnce"]
|
|
storageClassName: local-ssd-hetzner
|
|
resources: { requests: { storage: 50Gi } }
|
|
# ---
|
|
#old k8s/ai/vllm/svc-ing.yaml
|
|
# apiVersion: v1
|
|
# kind: Service
|
|
# metadata: { name: vllm, namespace: ml }
|
|
# spec: { selector: { app: vllm }, ports: [ { port: 80, targetPort: 8000 } ] }
|
|
---
|
|
apiVersion: networking.k8s.io/v1
|
|
kind: Ingress
|
|
metadata:
|
|
name: vllm
|
|
namespace: ml
|
|
annotations: { cert-manager.io/cluster-issuer: letsencrypt-prod }
|
|
spec:
|
|
ingressClassName: nginx
|
|
tls: [{ hosts: ["llm.betelgeusebytes.io"], secretName: vllm-tls }]
|
|
rules:
|
|
- host: llm.betelgeusebytes.io
|
|
http:
|
|
paths:
|
|
- path: /
|
|
pathType: Prefix
|
|
backend: { service: { name: vllm, port: { number: 80 } } }
|
|
|