319 lines
14 KiB
Bash
Executable File
319 lines
14 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
set -e
|
|
|
|
echo "=========================================================="
|
|
echo "Removing Existing Monitoring Stack"
|
|
echo "=========================================================="
|
|
echo ""
|
|
|
|
RED='\033[0;31m'
|
|
YELLOW='\033[1;33m'
|
|
GREEN='\033[0;32m'
|
|
NC='\033[0m' # No Color
|
|
|
|
echo -e "${YELLOW}This script will remove common monitoring deployments including:${NC}"
|
|
echo " - Prometheus (standalone or operator)"
|
|
echo " - Grafana"
|
|
echo " - Fluent Bit"
|
|
echo " - Vector"
|
|
echo " - Loki"
|
|
echo " - Tempo"
|
|
echo " - Node exporters"
|
|
echo " - kube-state-metrics"
|
|
echo " - Any monitoring/prometheus/grafana namespaces"
|
|
echo ""
|
|
echo -e "${RED}WARNING: This will delete all existing monitoring data!${NC}"
|
|
echo ""
|
|
read -p "Are you sure you want to continue? (yes/no): " confirm
|
|
|
|
if [ "$confirm" != "yes" ]; then
|
|
echo "Cleanup cancelled."
|
|
exit 0
|
|
fi
|
|
|
|
echo ""
|
|
echo -e "${YELLOW}Step 1: Checking for existing monitoring namespaces...${NC}"
|
|
|
|
# Common namespace names for monitoring
|
|
NAMESPACES=("monitoring" "prometheus" "grafana" "loki" "tempo" "logging")
|
|
|
|
for ns in "${NAMESPACES[@]}"; do
|
|
if kubectl get namespace "$ns" &> /dev/null; then
|
|
echo -e "${GREEN}Found namespace: $ns${NC}"
|
|
|
|
# Show what's in the namespace
|
|
echo " Resources in $ns:"
|
|
kubectl get all -n "$ns" 2>/dev/null | head -20 || true
|
|
echo ""
|
|
|
|
read -p " Delete namespace '$ns'? (yes/no): " delete_ns
|
|
if [ "$delete_ns" = "yes" ]; then
|
|
echo " Deleting namespace $ns..."
|
|
kubectl delete namespace "$ns" --timeout=120s || {
|
|
echo -e "${YELLOW} Warning: Namespace deletion timed out, forcing...${NC}"
|
|
kubectl delete namespace "$ns" --grace-period=0 --force &
|
|
}
|
|
fi
|
|
fi
|
|
done
|
|
|
|
echo ""
|
|
echo -e "${YELLOW}Step 2: Removing common monitoring Helm releases...${NC}"
|
|
|
|
# Check if helm is available
|
|
if command -v helm &> /dev/null; then
|
|
echo "Checking for Helm releases..."
|
|
|
|
# Common Helm release names
|
|
RELEASES=("prometheus" "grafana" "loki" "tempo" "fluent-bit" "prometheus-operator" "kube-prometheus-stack")
|
|
|
|
for release in "${RELEASES[@]}"; do
|
|
# Check all namespaces for the release
|
|
if helm list -A | grep -q "$release"; then
|
|
ns=$(helm list -A | grep "$release" | awk '{print $2}')
|
|
echo -e "${GREEN}Found Helm release: $release in namespace $ns${NC}"
|
|
read -p " Uninstall Helm release '$release'? (yes/no): " uninstall
|
|
if [ "$uninstall" = "yes" ]; then
|
|
echo " Uninstalling $release..."
|
|
helm uninstall "$release" -n "$ns" || echo -e "${YELLOW} Warning: Failed to uninstall $release${NC}"
|
|
fi
|
|
fi
|
|
done
|
|
else
|
|
echo "Helm not found, skipping Helm releases check"
|
|
fi
|
|
|
|
echo ""
|
|
echo -e "${YELLOW}Step 3: Removing standalone monitoring components...${NC}"
|
|
|
|
# Remove common DaemonSets in kube-system or default
|
|
echo "Checking for monitoring DaemonSets..."
|
|
for ns in kube-system default; do
|
|
if kubectl get daemonset -n "$ns" 2>/dev/null | grep -q "node-exporter\|fluent-bit\|fluentd\|vector"; then
|
|
echo -e "${GREEN}Found monitoring DaemonSets in $ns${NC}"
|
|
kubectl get daemonset -n "$ns" | grep -E "node-exporter|fluent-bit|fluentd|vector"
|
|
read -p " Delete these DaemonSets? (yes/no): " delete_ds
|
|
if [ "$delete_ds" = "yes" ]; then
|
|
kubectl delete daemonset -n "$ns" -l app=node-exporter --ignore-not-found
|
|
kubectl delete daemonset -n "$ns" -l app=fluent-bit --ignore-not-found
|
|
kubectl delete daemonset -n "$ns" -l app=fluentd --ignore-not-found
|
|
kubectl delete daemonset -n "$ns" -l app=vector --ignore-not-found
|
|
kubectl delete daemonset -n "$ns" node-exporter --ignore-not-found
|
|
kubectl delete daemonset -n "$ns" fluent-bit --ignore-not-found
|
|
kubectl delete daemonset -n "$ns" fluentd --ignore-not-found
|
|
kubectl delete daemonset -n "$ns" vector --ignore-not-found
|
|
fi
|
|
fi
|
|
done
|
|
|
|
# Remove common Deployments
|
|
echo ""
|
|
echo "Checking for monitoring Deployments..."
|
|
for ns in kube-system default; do
|
|
if kubectl get deployment -n "$ns" 2>/dev/null | grep -q "prometheus\|grafana\|kube-state-metrics\|loki\|tempo"; then
|
|
echo -e "${GREEN}Found monitoring Deployments in $ns${NC}"
|
|
kubectl get deployment -n "$ns" | grep -E "prometheus|grafana|kube-state-metrics|loki|tempo"
|
|
read -p " Delete these Deployments? (yes/no): " delete_deploy
|
|
if [ "$delete_deploy" = "yes" ]; then
|
|
kubectl delete deployment -n "$ns" -l app=prometheus --ignore-not-found
|
|
kubectl delete deployment -n "$ns" -l app=grafana --ignore-not-found
|
|
kubectl delete deployment -n "$ns" -l app=kube-state-metrics --ignore-not-found
|
|
kubectl delete deployment -n "$ns" -l app=loki --ignore-not-found
|
|
kubectl delete deployment -n "$ns" -l app=tempo --ignore-not-found
|
|
kubectl delete deployment -n "$ns" prometheus --ignore-not-found
|
|
kubectl delete deployment -n "$ns" grafana --ignore-not-found
|
|
kubectl delete deployment -n "$ns" kube-state-metrics --ignore-not-found
|
|
fi
|
|
fi
|
|
done
|
|
|
|
# Remove common StatefulSets
|
|
echo ""
|
|
echo "Checking for monitoring StatefulSets..."
|
|
for ns in kube-system default; do
|
|
if kubectl get statefulset -n "$ns" 2>/dev/null | grep -q "prometheus\|grafana\|loki\|tempo"; then
|
|
echo -e "${GREEN}Found monitoring StatefulSets in $ns${NC}"
|
|
kubectl get statefulset -n "$ns" | grep -E "prometheus|grafana|loki|tempo"
|
|
read -p " Delete these StatefulSets? (yes/no): " delete_sts
|
|
if [ "$delete_sts" = "yes" ]; then
|
|
kubectl delete statefulset -n "$ns" -l app=prometheus --ignore-not-found
|
|
kubectl delete statefulset -n "$ns" -l app=grafana --ignore-not-found
|
|
kubectl delete statefulset -n "$ns" -l app=loki --ignore-not-found
|
|
kubectl delete statefulset -n "$ns" -l app=tempo --ignore-not-found
|
|
kubectl delete statefulset -n "$ns" prometheus --ignore-not-found
|
|
kubectl delete statefulset -n "$ns" grafana --ignore-not-found
|
|
kubectl delete statefulset -n "$ns" loki --ignore-not-found
|
|
kubectl delete statefulset -n "$ns" tempo --ignore-not-found
|
|
fi
|
|
fi
|
|
done
|
|
|
|
echo ""
|
|
echo -e "${YELLOW}Step 4: Removing monitoring ConfigMaps...${NC}"
|
|
|
|
# Ask before removing ConfigMaps (they might contain important configs)
|
|
echo "Checking for monitoring ConfigMaps..."
|
|
for ns in kube-system default monitoring prometheus grafana; do
|
|
if kubectl get namespace "$ns" &> /dev/null; then
|
|
if kubectl get configmap -n "$ns" 2>/dev/null | grep -q "prometheus\|grafana\|loki\|tempo\|fluent"; then
|
|
echo -e "${GREEN}Found monitoring ConfigMaps in $ns${NC}"
|
|
kubectl get configmap -n "$ns" | grep -E "prometheus|grafana|loki|tempo|fluent"
|
|
read -p " Delete these ConfigMaps? (yes/no): " delete_cm
|
|
if [ "$delete_cm" = "yes" ]; then
|
|
kubectl delete configmap -n "$ns" -l app=prometheus --ignore-not-found
|
|
kubectl delete configmap -n "$ns" -l app=grafana --ignore-not-found
|
|
kubectl delete configmap -n "$ns" -l app=loki --ignore-not-found
|
|
kubectl delete configmap -n "$ns" -l app=fluent-bit --ignore-not-found
|
|
fi
|
|
fi
|
|
fi
|
|
done
|
|
|
|
echo ""
|
|
echo -e "${YELLOW}Step 5: Removing ClusterRoles and ClusterRoleBindings...${NC}"
|
|
|
|
# Remove monitoring-related RBAC
|
|
echo "Checking for monitoring ClusterRoles..."
|
|
if kubectl get clusterrole 2>/dev/null | grep -q "prometheus\|grafana\|kube-state-metrics\|fluent-bit\|node-exporter"; then
|
|
echo -e "${GREEN}Found monitoring ClusterRoles${NC}"
|
|
kubectl get clusterrole | grep -E "prometheus|grafana|kube-state-metrics|fluent-bit|node-exporter"
|
|
read -p " Delete these ClusterRoles? (yes/no): " delete_cr
|
|
if [ "$delete_cr" = "yes" ]; then
|
|
kubectl delete clusterrole prometheus --ignore-not-found
|
|
kubectl delete clusterrole grafana --ignore-not-found
|
|
kubectl delete clusterrole kube-state-metrics --ignore-not-found
|
|
kubectl delete clusterrole fluent-bit --ignore-not-found
|
|
kubectl delete clusterrole node-exporter --ignore-not-found
|
|
fi
|
|
fi
|
|
|
|
echo "Checking for monitoring ClusterRoleBindings..."
|
|
if kubectl get clusterrolebinding 2>/dev/null | grep -q "prometheus\|grafana\|kube-state-metrics\|fluent-bit\|node-exporter"; then
|
|
echo -e "${GREEN}Found monitoring ClusterRoleBindings${NC}"
|
|
kubectl get clusterrolebinding | grep -E "prometheus|grafana|kube-state-metrics|fluent-bit|node-exporter"
|
|
read -p " Delete these ClusterRoleBindings? (yes/no): " delete_crb
|
|
if [ "$delete_crb" = "yes" ]; then
|
|
kubectl delete clusterrolebinding prometheus --ignore-not-found
|
|
kubectl delete clusterrolebinding grafana --ignore-not-found
|
|
kubectl delete clusterrolebinding kube-state-metrics --ignore-not-found
|
|
kubectl delete clusterrolebinding fluent-bit --ignore-not-found
|
|
kubectl delete clusterrolebinding node-exporter --ignore-not-found
|
|
fi
|
|
fi
|
|
|
|
echo ""
|
|
echo -e "${YELLOW}Step 6: Removing PVCs and PVs...${NC}"
|
|
|
|
# Check for monitoring PVCs
|
|
echo "Checking for monitoring PersistentVolumeClaims..."
|
|
for ns in kube-system default monitoring prometheus grafana; do
|
|
if kubectl get namespace "$ns" &> /dev/null; then
|
|
if kubectl get pvc -n "$ns" 2>/dev/null | grep -q "prometheus\|grafana\|loki\|tempo"; then
|
|
echo -e "${GREEN}Found monitoring PVCs in $ns${NC}"
|
|
kubectl get pvc -n "$ns" | grep -E "prometheus|grafana|loki|tempo"
|
|
echo -e "${RED} WARNING: Deleting PVCs will delete all stored data!${NC}"
|
|
read -p " Delete these PVCs? (yes/no): " delete_pvc
|
|
if [ "$delete_pvc" = "yes" ]; then
|
|
kubectl delete pvc -n "$ns" -l app=prometheus --ignore-not-found
|
|
kubectl delete pvc -n "$ns" -l app=grafana --ignore-not-found
|
|
kubectl delete pvc -n "$ns" -l app=loki --ignore-not-found
|
|
kubectl delete pvc -n "$ns" -l app=tempo --ignore-not-found
|
|
# Also try by name patterns
|
|
kubectl get pvc -n "$ns" -o name | grep -E "prometheus|grafana|loki|tempo" | xargs -r kubectl delete -n "$ns" || true
|
|
fi
|
|
fi
|
|
fi
|
|
done
|
|
|
|
# Check for monitoring PVs
|
|
echo ""
|
|
echo "Checking for monitoring PersistentVolumes..."
|
|
if kubectl get pv 2>/dev/null | grep -q "prometheus\|grafana\|loki\|tempo\|monitoring"; then
|
|
echo -e "${GREEN}Found monitoring PVs${NC}"
|
|
kubectl get pv | grep -E "prometheus|grafana|loki|tempo|monitoring"
|
|
echo -e "${RED} WARNING: Deleting PVs may delete data on disk!${NC}"
|
|
read -p " Delete these PVs? (yes/no): " delete_pv
|
|
if [ "$delete_pv" = "yes" ]; then
|
|
kubectl get pv -o name | grep -E "prometheus|grafana|loki|tempo|monitoring" | xargs -r kubectl delete || true
|
|
fi
|
|
fi
|
|
|
|
echo ""
|
|
echo -e "${YELLOW}Step 7: Checking for monitoring Ingresses...${NC}"
|
|
|
|
for ns in kube-system default monitoring prometheus grafana; do
|
|
if kubectl get namespace "$ns" &> /dev/null; then
|
|
if kubectl get ingress -n "$ns" 2>/dev/null | grep -q "prometheus\|grafana\|loki"; then
|
|
echo -e "${GREEN}Found monitoring Ingresses in $ns${NC}"
|
|
kubectl get ingress -n "$ns" | grep -E "prometheus|grafana|loki"
|
|
read -p " Delete these Ingresses? (yes/no): " delete_ing
|
|
if [ "$delete_ing" = "yes" ]; then
|
|
kubectl delete ingress -n "$ns" -l app=prometheus --ignore-not-found
|
|
kubectl delete ingress -n "$ns" -l app=grafana --ignore-not-found
|
|
kubectl delete ingress -n "$ns" prometheus-ingress --ignore-not-found
|
|
kubectl delete ingress -n "$ns" grafana-ingress --ignore-not-found
|
|
fi
|
|
fi
|
|
fi
|
|
done
|
|
|
|
echo ""
|
|
echo -e "${YELLOW}Step 8: Checking for Prometheus Operator CRDs...${NC}"
|
|
|
|
# Check for Prometheus Operator CRDs
|
|
if kubectl get crd 2>/dev/null | grep -q "monitoring.coreos.com"; then
|
|
echo -e "${GREEN}Found Prometheus Operator CRDs${NC}"
|
|
kubectl get crd | grep "monitoring.coreos.com"
|
|
echo ""
|
|
echo -e "${RED}WARNING: Deleting these CRDs will remove ALL Prometheus Operator resources cluster-wide!${NC}"
|
|
read -p " Delete Prometheus Operator CRDs? (yes/no): " delete_crd
|
|
if [ "$delete_crd" = "yes" ]; then
|
|
kubectl delete crd prometheuses.monitoring.coreos.com --ignore-not-found
|
|
kubectl delete crd prometheusrules.monitoring.coreos.com --ignore-not-found
|
|
kubectl delete crd servicemonitors.monitoring.coreos.com --ignore-not-found
|
|
kubectl delete crd podmonitors.monitoring.coreos.com --ignore-not-found
|
|
kubectl delete crd alertmanagers.monitoring.coreos.com --ignore-not-found
|
|
kubectl delete crd alertmanagerconfigs.monitoring.coreos.com --ignore-not-found
|
|
kubectl delete crd probes.monitoring.coreos.com --ignore-not-found
|
|
kubectl delete crd thanosrulers.monitoring.coreos.com --ignore-not-found
|
|
fi
|
|
fi
|
|
|
|
echo ""
|
|
echo -e "${YELLOW}Step 9: Optional - Clean up data directories on nodes...${NC}"
|
|
echo ""
|
|
echo "You may have monitoring data stored on your nodes at:"
|
|
echo " - /mnt/local-ssd/prometheus"
|
|
echo " - /mnt/local-ssd/grafana"
|
|
echo " - /mnt/local-ssd/loki"
|
|
echo " - /mnt/local-ssd/tempo"
|
|
echo " - /var/lib/prometheus"
|
|
echo " - /var/lib/grafana"
|
|
echo ""
|
|
echo "To remove these, SSH to each node and run:"
|
|
echo " sudo rm -rf /mnt/local-ssd/{prometheus,grafana,loki,tempo}"
|
|
echo " sudo rm -rf /var/lib/{prometheus,grafana,loki,tempo}"
|
|
echo ""
|
|
read -p "Have you cleaned up the data directories? (yes to continue, no to skip): " cleanup_dirs
|
|
|
|
echo ""
|
|
echo -e "${GREEN}=========================================================="
|
|
echo "Existing Monitoring Stack Cleanup Complete!"
|
|
echo "==========================================================${NC}"
|
|
echo ""
|
|
echo "Summary of actions taken:"
|
|
echo " - Removed monitoring namespaces (if confirmed)"
|
|
echo " - Uninstalled Helm releases (if found and confirmed)"
|
|
echo " - Removed standalone monitoring components"
|
|
echo " - Removed monitoring ConfigMaps"
|
|
echo " - Removed RBAC resources"
|
|
echo " - Removed PVCs and PVs (if confirmed)"
|
|
echo " - Removed Ingresses"
|
|
echo " - Removed Prometheus Operator CRDs (if confirmed)"
|
|
echo ""
|
|
echo -e "${YELLOW}Next Steps:${NC}"
|
|
echo "1. Verify cleanup: kubectl get all -A | grep -E 'prometheus|grafana|loki|tempo|monitoring'"
|
|
echo "2. Clean up node data directories (see above)"
|
|
echo "3. Deploy new observability stack: ./deploy.sh"
|
|
echo "" |