unbound: fix healthcheck logging + added fail tolerance to checks (#6004)

* unbound: fix healthcheck logging to stdout + rewrote healthcheck logic

* compose: bump unbound tag

* unbound: fixed healthcheck logic
This commit is contained in:
Niklas Meyer
2024-08-13 15:59:57 +02:00
committed by GitHub
parent b1c1e403d2
commit b26ccc2019
6 changed files with 152 additions and 56 deletions
+77 -51
View File
@@ -1,76 +1,102 @@
#!/bin/bash
# Skip Unbound (DNS Resolver) Healthchecks (NOT Recommended!)
if [[ "${SKIP_UNBOUND_HEALTHCHECK}" =~ ^([yY][eE][sS]|[yY])+$ ]]; then
SKIP_UNBOUND_HEALTHCHECK=y
fi
STATUS_FILE="/tmp/healthcheck_status"
RUNS=0
# Reset logfile
echo "$(date +"%Y-%m-%d %H:%M:%S"): Starting health check - logs can be found in /var/log/healthcheck.log"
echo "$(date +"%Y-%m-%d %H:%M:%S"): Starting health check" > /var/log/healthcheck.log
# Declare log function for logfile inside container
function log_to_file() {
echo "$(date +"%Y-%m-%d %H:%M:%S"): $1" >> /var/log/healthcheck.log
# Declare log function for logfile to stdout
function log_to_stdout() {
echo "$(date +"%Y-%m-%d %H:%M:%S"): $1"
}
# General Ping function to check general pingability
function check_ping() {
declare -a ipstoping=("1.1.1.1" "8.8.8.8" "9.9.9.9")
declare -a ipstoping=("1.1.1.1" "8.8.8.8" "9.9.9.9")
local fail_tolerance=1
local failures=0
for ip in "${ipstoping[@]}" ; do
ping -q -c 3 -w 5 "$ip"
if [ $? -ne 0 ]; then
log_to_file "Healthcheck: Couldn't ping $ip for 5 seconds... Gave up!"
log_to_file "Please check your internet connection or firewall rules to fix this error, because a simple ping test should always go through from the unbound container!"
return 1
fi
for ip in "${ipstoping[@]}" ; do
success=false
for ((i=1; i<=3; i++)); do
ping -q -c 3 -w 5 "$ip" > /dev/null
if [ $? -eq 0 ]; then
success=true
break
else
log_to_stdout "Healthcheck: Failed to ping $ip on attempt $i. Trying again..."
fi
done
if [ "$success" = false ]; then
log_to_stdout "Healthcheck: Couldn't ping $ip after 3 attempts. Marking this IP as failed."
((failures++))
fi
done
if [ $failures -gt $fail_tolerance ]; then
log_to_stdout "Healthcheck: Too many ping failures ($fail_tolerance failures allowed, you got $failures failures), marking Healthcheck as unhealthy..."
return 1
fi
return 0
log_to_file "Healthcheck: Ping Checks WORKING properly!"
return 0
}
# General DNS Resolve Check against Unbound Resolver himself
function check_dns() {
declare -a domains=("mailcow.email" "github.com" "hub.docker.com")
declare -a domains=("fuzzy.mailcow.email" "github.com" "hub.docker.com")
local fail_tolerance=1
local failures=0
for domain in "${domains[@]}" ; do
for ((i=1; i<=3; i++)); do
dig +short +timeout=2 +tries=1 "$domain" @127.0.0.1 > /dev/null
if [ $? -ne 0 ]; then
log_to_file "Healthcheck: DNS Resolution Failed on $i attempt! Trying again..."
if [ $i -eq 3 ]; then
log_to_file "Healthcheck: DNS Resolution not possible after $i attempts... Gave up!"
log_to_file "Maybe check your outbound firewall, as it needs to resolve DNS over TCP AND UDP!"
return 1
fi
for domain in "${domains[@]}" ; do
success=false
for ((i=1; i<=3; i++)); do
dig_output=$(dig +short +timeout=2 +tries=1 "$domain" @127.0.0.1 2>/dev/null)
dig_rc=$?
if [ $dig_rc -ne 0 ] || [ -z "$dig_output" ]; then
log_to_stdout "Healthcheck: DNS Resolution Failed on attempt $i for $domain! Trying again..."
else
success=true
break
fi
done
done
log_to_file "Healthcheck: DNS Resolver WORKING properly!"
return 0
if [ "$success" = false ]; then
log_to_stdout "Healthcheck: DNS Resolution not possible after 3 attempts for $domain... Gave up!"
((failures++))
fi
done
if [ $failures -gt $fail_tolerance ]; then
log_to_stdout "Healthcheck: Too many DNS failures ($fail_tolerance failures allowed, you got $failures failures), marking Healthcheck as unhealthy..."
return 1
fi
return 0
}
if [[ ${SKIP_UNBOUND_HEALTHCHECK} == "y" ]]; then
log_to_file "Healthcheck: ALL CHECKS WERE SKIPPED! Unbound is healthy!"
exit 0
fi
while true; do
# run checks, if check is not returning 0 (return value if check is ok), healthcheck will exit with 1 (marked in docker as unhealthy)
check_ping
if [[ ${SKIP_UNBOUND_HEALTHCHECK} == "y" ]]; then
log_to_stdout "Healthcheck: ALL CHECKS WERE SKIPPED! Unbound is healthy!"
echo "0" > $STATUS_FILE
sleep 365d
fi
if [ $? -ne 0 ]; then
exit 1
fi
# run checks, if check is not returning 0 (return value if check is ok), healthcheck will exit with 1 (marked in docker as unhealthy)
check_ping
PING_STATUS=$?
check_dns
check_dns
DNS_STATUS=$?
if [ $? -ne 0 ]; then
exit 1
fi
if [ $PING_STATUS -ne 0 ] || [ $DNS_STATUS -ne 0 ]; then
echo "1" > $STATUS_FILE
log_to_file "Healthcheck: ALL CHECKS WERE SUCCESSFUL! Unbound is healthy!"
exit 0
else
echo "0" > $STATUS_FILE
fi
sleep 30
done