#!/bin/bash set -e echo "==========================================================" echo "Removing Existing Monitoring Stack" echo "==========================================================" echo "" RED='\033[0;31m' YELLOW='\033[1;33m' GREEN='\033[0;32m' NC='\033[0m' # No Color echo -e "${YELLOW}This script will remove common monitoring deployments including:${NC}" echo " - Prometheus (standalone or operator)" echo " - Grafana" echo " - Fluent Bit" echo " - Vector" echo " - Loki" echo " - Tempo" echo " - Node exporters" echo " - kube-state-metrics" echo " - Any monitoring/prometheus/grafana namespaces" echo "" echo -e "${RED}WARNING: This will delete all existing monitoring data!${NC}" echo "" read -p "Are you sure you want to continue? (yes/no): " confirm if [ "$confirm" != "yes" ]; then echo "Cleanup cancelled." exit 0 fi echo "" echo -e "${YELLOW}Step 1: Checking for existing monitoring namespaces...${NC}" # Common namespace names for monitoring NAMESPACES=("monitoring" "prometheus" "grafana" "loki" "tempo" "logging") for ns in "${NAMESPACES[@]}"; do if kubectl get namespace "$ns" &> /dev/null; then echo -e "${GREEN}Found namespace: $ns${NC}" # Show what's in the namespace echo " Resources in $ns:" kubectl get all -n "$ns" 2>/dev/null | head -20 || true echo "" read -p " Delete namespace '$ns'? (yes/no): " delete_ns if [ "$delete_ns" = "yes" ]; then echo " Deleting namespace $ns..." kubectl delete namespace "$ns" --timeout=120s || { echo -e "${YELLOW} Warning: Namespace deletion timed out, forcing...${NC}" kubectl delete namespace "$ns" --grace-period=0 --force & } fi fi done echo "" echo -e "${YELLOW}Step 2: Removing common monitoring Helm releases...${NC}" # Check if helm is available if command -v helm &> /dev/null; then echo "Checking for Helm releases..." # Common Helm release names RELEASES=("prometheus" "grafana" "loki" "tempo" "fluent-bit" "prometheus-operator" "kube-prometheus-stack") for release in "${RELEASES[@]}"; do # Check all namespaces for the release if helm list -A | grep -q "$release"; then ns=$(helm list -A | grep "$release" | awk '{print $2}') echo -e "${GREEN}Found Helm release: $release in namespace $ns${NC}" read -p " Uninstall Helm release '$release'? (yes/no): " uninstall if [ "$uninstall" = "yes" ]; then echo " Uninstalling $release..." helm uninstall "$release" -n "$ns" || echo -e "${YELLOW} Warning: Failed to uninstall $release${NC}" fi fi done else echo "Helm not found, skipping Helm releases check" fi echo "" echo -e "${YELLOW}Step 3: Removing standalone monitoring components...${NC}" # Remove common DaemonSets in kube-system or default echo "Checking for monitoring DaemonSets..." for ns in kube-system default; do if kubectl get daemonset -n "$ns" 2>/dev/null | grep -q "node-exporter\|fluent-bit\|fluentd\|vector"; then echo -e "${GREEN}Found monitoring DaemonSets in $ns${NC}" kubectl get daemonset -n "$ns" | grep -E "node-exporter|fluent-bit|fluentd|vector" read -p " Delete these DaemonSets? (yes/no): " delete_ds if [ "$delete_ds" = "yes" ]; then kubectl delete daemonset -n "$ns" -l app=node-exporter --ignore-not-found kubectl delete daemonset -n "$ns" -l app=fluent-bit --ignore-not-found kubectl delete daemonset -n "$ns" -l app=fluentd --ignore-not-found kubectl delete daemonset -n "$ns" -l app=vector --ignore-not-found kubectl delete daemonset -n "$ns" node-exporter --ignore-not-found kubectl delete daemonset -n "$ns" fluent-bit --ignore-not-found kubectl delete daemonset -n "$ns" fluentd --ignore-not-found kubectl delete daemonset -n "$ns" vector --ignore-not-found fi fi done # Remove common Deployments echo "" echo "Checking for monitoring Deployments..." for ns in kube-system default; do if kubectl get deployment -n "$ns" 2>/dev/null | grep -q "prometheus\|grafana\|kube-state-metrics\|loki\|tempo"; then echo -e "${GREEN}Found monitoring Deployments in $ns${NC}" kubectl get deployment -n "$ns" | grep -E "prometheus|grafana|kube-state-metrics|loki|tempo" read -p " Delete these Deployments? (yes/no): " delete_deploy if [ "$delete_deploy" = "yes" ]; then kubectl delete deployment -n "$ns" -l app=prometheus --ignore-not-found kubectl delete deployment -n "$ns" -l app=grafana --ignore-not-found kubectl delete deployment -n "$ns" -l app=kube-state-metrics --ignore-not-found kubectl delete deployment -n "$ns" -l app=loki --ignore-not-found kubectl delete deployment -n "$ns" -l app=tempo --ignore-not-found kubectl delete deployment -n "$ns" prometheus --ignore-not-found kubectl delete deployment -n "$ns" grafana --ignore-not-found kubectl delete deployment -n "$ns" kube-state-metrics --ignore-not-found fi fi done # Remove common StatefulSets echo "" echo "Checking for monitoring StatefulSets..." for ns in kube-system default; do if kubectl get statefulset -n "$ns" 2>/dev/null | grep -q "prometheus\|grafana\|loki\|tempo"; then echo -e "${GREEN}Found monitoring StatefulSets in $ns${NC}" kubectl get statefulset -n "$ns" | grep -E "prometheus|grafana|loki|tempo" read -p " Delete these StatefulSets? (yes/no): " delete_sts if [ "$delete_sts" = "yes" ]; then kubectl delete statefulset -n "$ns" -l app=prometheus --ignore-not-found kubectl delete statefulset -n "$ns" -l app=grafana --ignore-not-found kubectl delete statefulset -n "$ns" -l app=loki --ignore-not-found kubectl delete statefulset -n "$ns" -l app=tempo --ignore-not-found kubectl delete statefulset -n "$ns" prometheus --ignore-not-found kubectl delete statefulset -n "$ns" grafana --ignore-not-found kubectl delete statefulset -n "$ns" loki --ignore-not-found kubectl delete statefulset -n "$ns" tempo --ignore-not-found fi fi done echo "" echo -e "${YELLOW}Step 4: Removing monitoring ConfigMaps...${NC}" # Ask before removing ConfigMaps (they might contain important configs) echo "Checking for monitoring ConfigMaps..." for ns in kube-system default monitoring prometheus grafana; do if kubectl get namespace "$ns" &> /dev/null; then if kubectl get configmap -n "$ns" 2>/dev/null | grep -q "prometheus\|grafana\|loki\|tempo\|fluent"; then echo -e "${GREEN}Found monitoring ConfigMaps in $ns${NC}" kubectl get configmap -n "$ns" | grep -E "prometheus|grafana|loki|tempo|fluent" read -p " Delete these ConfigMaps? (yes/no): " delete_cm if [ "$delete_cm" = "yes" ]; then kubectl delete configmap -n "$ns" -l app=prometheus --ignore-not-found kubectl delete configmap -n "$ns" -l app=grafana --ignore-not-found kubectl delete configmap -n "$ns" -l app=loki --ignore-not-found kubectl delete configmap -n "$ns" -l app=fluent-bit --ignore-not-found fi fi fi done echo "" echo -e "${YELLOW}Step 5: Removing ClusterRoles and ClusterRoleBindings...${NC}" # Remove monitoring-related RBAC echo "Checking for monitoring ClusterRoles..." if kubectl get clusterrole 2>/dev/null | grep -q "prometheus\|grafana\|kube-state-metrics\|fluent-bit\|node-exporter"; then echo -e "${GREEN}Found monitoring ClusterRoles${NC}" kubectl get clusterrole | grep -E "prometheus|grafana|kube-state-metrics|fluent-bit|node-exporter" read -p " Delete these ClusterRoles? (yes/no): " delete_cr if [ "$delete_cr" = "yes" ]; then kubectl delete clusterrole prometheus --ignore-not-found kubectl delete clusterrole grafana --ignore-not-found kubectl delete clusterrole kube-state-metrics --ignore-not-found kubectl delete clusterrole fluent-bit --ignore-not-found kubectl delete clusterrole node-exporter --ignore-not-found fi fi echo "Checking for monitoring ClusterRoleBindings..." if kubectl get clusterrolebinding 2>/dev/null | grep -q "prometheus\|grafana\|kube-state-metrics\|fluent-bit\|node-exporter"; then echo -e "${GREEN}Found monitoring ClusterRoleBindings${NC}" kubectl get clusterrolebinding | grep -E "prometheus|grafana|kube-state-metrics|fluent-bit|node-exporter" read -p " Delete these ClusterRoleBindings? (yes/no): " delete_crb if [ "$delete_crb" = "yes" ]; then kubectl delete clusterrolebinding prometheus --ignore-not-found kubectl delete clusterrolebinding grafana --ignore-not-found kubectl delete clusterrolebinding kube-state-metrics --ignore-not-found kubectl delete clusterrolebinding fluent-bit --ignore-not-found kubectl delete clusterrolebinding node-exporter --ignore-not-found fi fi echo "" echo -e "${YELLOW}Step 6: Removing PVCs and PVs...${NC}" # Check for monitoring PVCs echo "Checking for monitoring PersistentVolumeClaims..." for ns in kube-system default monitoring prometheus grafana; do if kubectl get namespace "$ns" &> /dev/null; then if kubectl get pvc -n "$ns" 2>/dev/null | grep -q "prometheus\|grafana\|loki\|tempo"; then echo -e "${GREEN}Found monitoring PVCs in $ns${NC}" kubectl get pvc -n "$ns" | grep -E "prometheus|grafana|loki|tempo" echo -e "${RED} WARNING: Deleting PVCs will delete all stored data!${NC}" read -p " Delete these PVCs? (yes/no): " delete_pvc if [ "$delete_pvc" = "yes" ]; then kubectl delete pvc -n "$ns" -l app=prometheus --ignore-not-found kubectl delete pvc -n "$ns" -l app=grafana --ignore-not-found kubectl delete pvc -n "$ns" -l app=loki --ignore-not-found kubectl delete pvc -n "$ns" -l app=tempo --ignore-not-found # Also try by name patterns kubectl get pvc -n "$ns" -o name | grep -E "prometheus|grafana|loki|tempo" | xargs -r kubectl delete -n "$ns" || true fi fi fi done # Check for monitoring PVs echo "" echo "Checking for monitoring PersistentVolumes..." if kubectl get pv 2>/dev/null | grep -q "prometheus\|grafana\|loki\|tempo\|monitoring"; then echo -e "${GREEN}Found monitoring PVs${NC}" kubectl get pv | grep -E "prometheus|grafana|loki|tempo|monitoring" echo -e "${RED} WARNING: Deleting PVs may delete data on disk!${NC}" read -p " Delete these PVs? (yes/no): " delete_pv if [ "$delete_pv" = "yes" ]; then kubectl get pv -o name | grep -E "prometheus|grafana|loki|tempo|monitoring" | xargs -r kubectl delete || true fi fi echo "" echo -e "${YELLOW}Step 7: Checking for monitoring Ingresses...${NC}" for ns in kube-system default monitoring prometheus grafana; do if kubectl get namespace "$ns" &> /dev/null; then if kubectl get ingress -n "$ns" 2>/dev/null | grep -q "prometheus\|grafana\|loki"; then echo -e "${GREEN}Found monitoring Ingresses in $ns${NC}" kubectl get ingress -n "$ns" | grep -E "prometheus|grafana|loki" read -p " Delete these Ingresses? (yes/no): " delete_ing if [ "$delete_ing" = "yes" ]; then kubectl delete ingress -n "$ns" -l app=prometheus --ignore-not-found kubectl delete ingress -n "$ns" -l app=grafana --ignore-not-found kubectl delete ingress -n "$ns" prometheus-ingress --ignore-not-found kubectl delete ingress -n "$ns" grafana-ingress --ignore-not-found fi fi fi done echo "" echo -e "${YELLOW}Step 8: Checking for Prometheus Operator CRDs...${NC}" # Check for Prometheus Operator CRDs if kubectl get crd 2>/dev/null | grep -q "monitoring.coreos.com"; then echo -e "${GREEN}Found Prometheus Operator CRDs${NC}" kubectl get crd | grep "monitoring.coreos.com" echo "" echo -e "${RED}WARNING: Deleting these CRDs will remove ALL Prometheus Operator resources cluster-wide!${NC}" read -p " Delete Prometheus Operator CRDs? (yes/no): " delete_crd if [ "$delete_crd" = "yes" ]; then kubectl delete crd prometheuses.monitoring.coreos.com --ignore-not-found kubectl delete crd prometheusrules.monitoring.coreos.com --ignore-not-found kubectl delete crd servicemonitors.monitoring.coreos.com --ignore-not-found kubectl delete crd podmonitors.monitoring.coreos.com --ignore-not-found kubectl delete crd alertmanagers.monitoring.coreos.com --ignore-not-found kubectl delete crd alertmanagerconfigs.monitoring.coreos.com --ignore-not-found kubectl delete crd probes.monitoring.coreos.com --ignore-not-found kubectl delete crd thanosrulers.monitoring.coreos.com --ignore-not-found fi fi echo "" echo -e "${YELLOW}Step 9: Optional - Clean up data directories on nodes...${NC}" echo "" echo "You may have monitoring data stored on your nodes at:" echo " - /mnt/local-ssd/prometheus" echo " - /mnt/local-ssd/grafana" echo " - /mnt/local-ssd/loki" echo " - /mnt/local-ssd/tempo" echo " - /var/lib/prometheus" echo " - /var/lib/grafana" echo "" echo "To remove these, SSH to each node and run:" echo " sudo rm -rf /mnt/local-ssd/{prometheus,grafana,loki,tempo}" echo " sudo rm -rf /var/lib/{prometheus,grafana,loki,tempo}" echo "" read -p "Have you cleaned up the data directories? (yes to continue, no to skip): " cleanup_dirs echo "" echo -e "${GREEN}==========================================================" echo "Existing Monitoring Stack Cleanup Complete!" echo "==========================================================${NC}" echo "" echo "Summary of actions taken:" echo " - Removed monitoring namespaces (if confirmed)" echo " - Uninstalled Helm releases (if found and confirmed)" echo " - Removed standalone monitoring components" echo " - Removed monitoring ConfigMaps" echo " - Removed RBAC resources" echo " - Removed PVCs and PVs (if confirmed)" echo " - Removed Ingresses" echo " - Removed Prometheus Operator CRDs (if confirmed)" echo "" echo -e "${YELLOW}Next Steps:${NC}" echo "1. Verify cleanup: kubectl get all -A | grep -E 'prometheus|grafana|loki|tempo|monitoring'" echo "2. Clean up node data directories (see above)" echo "3. Deploy new observability stack: ./deploy.sh" echo ""