betelgeusebytes/k8s/observability-stack/test-loki-logs.sh

158 lines
5.5 KiB
Bash

#!/bin/bash
set -e
GREEN='\033[0;32m'
RED='\033[0;31m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
echo -e "${BLUE}========================================${NC}"
echo -e "${BLUE} Loki Log Collection Test${NC}"
echo -e "${BLUE}========================================${NC}"
echo ""
PASS=0
FAIL=0
# Test 1: Check Alloy DaemonSet
echo -e "${YELLOW}Test 1: Checking Alloy DaemonSet...${NC}"
if kubectl get pods -n observability -l app=alloy --no-headers 2>/dev/null | grep -q "Running"; then
ALLOY_COUNT=$(kubectl get pods -n observability -l app=alloy --no-headers | grep -c "Running")
echo -e "${GREEN}✓ Alloy is running ($ALLOY_COUNT pod(s))${NC}"
PASS=$((PASS+1))
else
echo -e "${RED}✗ Alloy is not running${NC}"
FAIL=$((FAIL+1))
fi
echo ""
# Test 2: Check Loki pod
echo -e "${YELLOW}Test 2: Checking Loki pod...${NC}"
if kubectl get pods -n observability -l app=loki --no-headers 2>/dev/null | grep -q "Running"; then
echo -e "${GREEN}✓ Loki is running${NC}"
PASS=$((PASS+1))
else
echo -e "${RED}✗ Loki is not running${NC}"
FAIL=$((FAIL+1))
fi
echo ""
# Test 3: Test Loki readiness endpoint
echo -e "${YELLOW}Test 3: Testing Loki readiness endpoint...${NC}"
READY=$(kubectl run test-loki-ready-$RANDOM --rm -i --restart=Never --image=curlimages/curl:latest -- \
curl -s -m 5 http://loki.observability.svc.cluster.local:3100/ready 2>/dev/null || echo "failed")
if [ "$READY" = "ready" ]; then
echo -e "${GREEN}✓ Loki is ready${NC}"
PASS=$((PASS+1))
else
echo -e "${RED}✗ Loki is not ready (response: $READY)${NC}"
FAIL=$((FAIL+1))
fi
echo ""
# Test 4: Check Alloy can connect to Loki
echo -e "${YELLOW}Test 4: Checking Alloy → Loki connectivity...${NC}"
ALLOY_ERRORS=$(kubectl logs -n observability -l app=alloy --tail=50 2>/dev/null | grep -i "error.*loki" | wc -l)
if [ "$ALLOY_ERRORS" -eq 0 ]; then
echo -e "${GREEN}✓ No Alloy → Loki connection errors${NC}"
PASS=$((PASS+1))
else
echo -e "${RED}✗ Found $ALLOY_ERRORS error(s) in Alloy logs${NC}"
kubectl logs -n observability -l app=alloy --tail=20 | grep -i error
FAIL=$((FAIL+1))
fi
echo ""
# Test 5: Create test pod and verify logs
echo -e "${YELLOW}Test 5: Creating test pod and verifying log collection...${NC}"
# Clean up any existing test pod
kubectl delete pod test-logger-verify --ignore-not-found 2>/dev/null
# Create test pod
echo " Creating test pod that logs every second..."
kubectl run test-logger-verify --image=busybox --restart=Never -- sh -c \
'for i in 1 2 3 4 5 6 7 8 9 10; do echo "LOKI-TEST-LOG: Message number $i at $(date)"; sleep 1; done' \
>/dev/null 2>&1
# Wait for pod to start and generate logs
echo " Waiting 15 seconds for logs to be collected..."
sleep 15
# Query Loki API for test logs
echo " Querying Loki for test logs..."
START_TIME=$(date -u -d '2 minutes ago' +%s)000000000
END_TIME=$(date -u +%s)000000000
QUERY_RESULT=$(kubectl run test-loki-query-$RANDOM --rm -i --restart=Never --image=curlimages/curl:latest -- \
curl -s -m 10 "http://loki.observability.svc.cluster.local:3100/loki/api/v1/query_range" \
--data-urlencode 'query={pod="test-logger-verify"}' \
--data-urlencode "start=$START_TIME" \
--data-urlencode "end=$END_TIME" 2>/dev/null || echo "failed")
if echo "$QUERY_RESULT" | grep -q "LOKI-TEST-LOG"; then
LOG_COUNT=$(echo "$QUERY_RESULT" | grep -o "LOKI-TEST-LOG" | wc -l)
echo -e "${GREEN}✓ Found $LOG_COUNT test log messages in Loki${NC}"
PASS=$((PASS+1))
else
echo -e "${RED}✗ Test logs not found in Loki${NC}"
echo " Response: ${QUERY_RESULT:0:200}"
FAIL=$((FAIL+1))
fi
# Clean up test pod
kubectl delete pod test-logger-verify --ignore-not-found >/dev/null 2>&1
echo ""
# Test 6: Check observability namespace logs
echo -e "${YELLOW}Test 6: Checking for observability namespace logs...${NC}"
OBS_QUERY=$(kubectl run test-loki-obs-$RANDOM --rm -i --restart=Never --image=curlimages/curl:latest -- \
curl -s -m 10 "http://loki.observability.svc.cluster.local:3100/loki/api/v1/query_range" \
--data-urlencode 'query={namespace="observability"}' \
--data-urlencode "start=$START_TIME" \
--data-urlencode "end=$END_TIME" \
--data-urlencode "limit=10" 2>/dev/null || echo "failed")
if echo "$OBS_QUERY" | grep -q '"values":\[\['; then
echo -e "${GREEN}✓ Observability namespace logs found in Loki${NC}"
PASS=$((PASS+1))
else
echo -e "${RED}✗ No logs found for observability namespace${NC}"
FAIL=$((FAIL+1))
fi
echo ""
echo -e "${BLUE}========================================${NC}"
echo -e "${BLUE} Test Results${NC}"
echo -e "${BLUE}========================================${NC}"
echo ""
TOTAL=$((PASS+FAIL))
echo -e "Passed: ${GREEN}$PASS${NC} / $TOTAL"
echo -e "Failed: ${RED}$FAIL${NC} / $TOTAL"
echo ""
if [ $FAIL -eq 0 ]; then
echo -e "${GREEN}✓✓✓ All tests passed! Logs are flowing to Loki! ✓✓✓${NC}"
echo ""
echo "Next steps:"
echo " 1. Open Grafana: https://grafana.betelgeusebytes.io"
echo " 2. Go to Explore → Loki"
echo " 3. Query: {namespace=\"observability\"}"
echo ""
else
echo -e "${RED}✗✗✗ Some tests failed. Check the output above for details. ✗✗✗${NC}"
echo ""
echo "Troubleshooting:"
echo " - Check Alloy logs: kubectl logs -n observability -l app=alloy"
echo " - Check Loki logs: kubectl logs -n observability loki-0"
echo " - Verify services: kubectl get svc -n observability"
echo " - See full guide: VERIFY-LOKI-LOGS.md"
echo ""
exit 1
fi