healthcheck checks for cpu overload

This commit is contained in:
Moritz 2024-05-14 22:29:02 +02:00
parent 51b24791a4
commit 41de881966
3 changed files with 94 additions and 30 deletions

View File

@ -57,16 +57,16 @@ services:
depends_on:
- db
healthcheck:
test: node /build/healthcheck.js # no curl for healthcheck in wekan container
test: bash /build/healthcheck.sh
interval: 30s
timeout: 10s
retries: 10
start_period: 1m
start_period: 3m
secrets:
- oauth2_secret
configs:
- source: healthcheck_js
target: /build/healthcheck.js
- source: healthcheck_sh
target: /build/healthcheck.sh
mode: 0555
- source: entrypoint
target: /home/wekan/app/docker-entrypoint.sh
@ -95,9 +95,9 @@ networks:
internal:
configs:
healthcheck_js:
name: ${STACK_NAME}_healthcheck_js
file: healthcheck.js
healthcheck_sh:
name: ${STACK_NAME}_healthcheck_sh
file: healthcheck.sh
entrypoint:
name: ${STACK_NAME}_entrypoint_${ENTRYPOINT_VERSION}
file: entrypoint.sh.tmpl

View File

@ -1,23 +0,0 @@
const http = require('http');
const options = {
host: '0.0.0.0',
port: 8080,
timeout: 2000
};
const healthCheck = http.request(options, (res) => {
console.log(`HEALTHCHECK STATUS: ${res.statusCode}`);
if (res.statusCode == 200) {
process.exit(0);
}
else {
process.exit(1);
}
});
healthCheck.on('error', function (err) {
console.error('ERROR');
process.exit(1);
});
healthCheck.end();

87
healthcheck.sh Normal file
View File

@ -0,0 +1,87 @@
#!/bin/sh
printf "GET / HTTP/1.1\n\n" > /dev/tcp/127.0.0.1/8080
if [ $? != 0 ]; then
echo "Webservice not accessible"
exit 1
fi
CPU_USAGE_THRESHOLD=100.0
PID=$(pidof node)
if [ -z "$PID" ]; then
echo "Node.js process not running."
exit 1
fi
STATE_FILE="/tmp/cpu_usage_state_$PID"
# Function to get total CPU time
get_total_cpu_time() {
cat /proc/stat | grep '^cpu ' | awk '{sum=0; for(i=2;i<=NF;i++) sum += $i; print sum;}'
}
# Function to get process CPU time
get_process_cpu_time() {
cat /proc/$PID/stat | awk '{print $14+$15}' # Only user and system time
}
# Function to get the number of CPUs
get_cpu_count() {
grep -c '^processor' /proc/cpuinfo
}
# Main script
if [ -z "$PID" ]; then
echo "Usage: $0 <pid>"
exit 1
fi
# Check if the process directory exists
if [ ! -d "/proc/$PID" ]; then
echo "Process with PID $PID does not exist."
exit 1
fi
# Get current CPU times
current_total=$(get_total_cpu_time)
current_process=$(get_process_cpu_time $PID)
# Check for previous state
if [ -f "$STATE_FILE" ]; then
# Read previous state
read prev_total prev_process < "$STATE_FILE"
# Calculate the CPU usage
delta_total=$((current_total - prev_total))
delta_process=$((current_process - prev_process))
CPU_COUNT=$(get_cpu_count)
# Normalize CPU usage across all CPUs and calculate percentage
if [ $delta_total -gt 0 ]; then # To avoid division by zero
CPU_USAGE=$(awk -v delta_process="$delta_process" -v delta_total="$delta_total" -v cpu_count="$CPU_COUNT" \
'BEGIN {printf "%.2f", (delta_process / delta_total) * 100 * cpu_count}')
awk -v usage="$CPU_USAGE" -v threshold="$CPU_USAGE_THRESHOLD" '
BEGIN {
if (usage > threshold) {
print "High CPU usage: " usage "% (Threshold: " threshold "%)";
exit 1; # Exit with status 1 for high usage
} else {
print "CPU usage within limits: " usage "%";
exit 0; # Normal exit status
}
}'
EXIT_STATUS=$?
else
echo "No enough data to calculate CPU usage. Please try again."
fi
else
echo "No previous data found. Run the script later to get CPU usage."
fi
# Save current state
echo "$current_total $current_process" > "$STATE_FILE"
if [ $EXIT_STATUS -ne 0 ]; then
echo "Exiting due to high CPU usage."
exit $EXIT_STATUS
fi