healthcheck checks for cpu overload
This commit is contained in:
parent
51b24791a4
commit
41de881966
14
compose.yml
14
compose.yml
|
@ -57,16 +57,16 @@ services:
|
|||
depends_on:
|
||||
- db
|
||||
healthcheck:
|
||||
test: node /build/healthcheck.js # no curl for healthcheck in wekan container
|
||||
test: bash /build/healthcheck.sh
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 10
|
||||
start_period: 1m
|
||||
start_period: 3m
|
||||
secrets:
|
||||
- oauth2_secret
|
||||
configs:
|
||||
- source: healthcheck_js
|
||||
target: /build/healthcheck.js
|
||||
- source: healthcheck_sh
|
||||
target: /build/healthcheck.sh
|
||||
mode: 0555
|
||||
- source: entrypoint
|
||||
target: /home/wekan/app/docker-entrypoint.sh
|
||||
|
@ -95,9 +95,9 @@ networks:
|
|||
internal:
|
||||
|
||||
configs:
|
||||
healthcheck_js:
|
||||
name: ${STACK_NAME}_healthcheck_js
|
||||
file: healthcheck.js
|
||||
healthcheck_sh:
|
||||
name: ${STACK_NAME}_healthcheck_sh
|
||||
file: healthcheck.sh
|
||||
entrypoint:
|
||||
name: ${STACK_NAME}_entrypoint_${ENTRYPOINT_VERSION}
|
||||
file: entrypoint.sh.tmpl
|
||||
|
|
|
@ -1,23 +0,0 @@
|
|||
const http = require('http');
|
||||
const options = {
|
||||
host: '0.0.0.0',
|
||||
port: 8080,
|
||||
timeout: 2000
|
||||
};
|
||||
|
||||
const healthCheck = http.request(options, (res) => {
|
||||
console.log(`HEALTHCHECK STATUS: ${res.statusCode}`);
|
||||
if (res.statusCode == 200) {
|
||||
process.exit(0);
|
||||
}
|
||||
else {
|
||||
process.exit(1);
|
||||
}
|
||||
});
|
||||
|
||||
healthCheck.on('error', function (err) {
|
||||
console.error('ERROR');
|
||||
process.exit(1);
|
||||
});
|
||||
|
||||
healthCheck.end();
|
|
@ -0,0 +1,87 @@
|
|||
#!/bin/sh
|
||||
|
||||
printf "GET / HTTP/1.1\n\n" > /dev/tcp/127.0.0.1/8080
|
||||
if [ $? != 0 ]; then
|
||||
echo "Webservice not accessible"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CPU_USAGE_THRESHOLD=100.0
|
||||
|
||||
PID=$(pidof node)
|
||||
if [ -z "$PID" ]; then
|
||||
echo "Node.js process not running."
|
||||
exit 1
|
||||
fi
|
||||
STATE_FILE="/tmp/cpu_usage_state_$PID"
|
||||
|
||||
# Function to get total CPU time
|
||||
get_total_cpu_time() {
|
||||
cat /proc/stat | grep '^cpu ' | awk '{sum=0; for(i=2;i<=NF;i++) sum += $i; print sum;}'
|
||||
}
|
||||
|
||||
# Function to get process CPU time
|
||||
get_process_cpu_time() {
|
||||
cat /proc/$PID/stat | awk '{print $14+$15}' # Only user and system time
|
||||
}
|
||||
|
||||
# Function to get the number of CPUs
|
||||
get_cpu_count() {
|
||||
grep -c '^processor' /proc/cpuinfo
|
||||
}
|
||||
|
||||
# Main script
|
||||
if [ -z "$PID" ]; then
|
||||
echo "Usage: $0 <pid>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check if the process directory exists
|
||||
if [ ! -d "/proc/$PID" ]; then
|
||||
echo "Process with PID $PID does not exist."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Get current CPU times
|
||||
current_total=$(get_total_cpu_time)
|
||||
current_process=$(get_process_cpu_time $PID)
|
||||
|
||||
# Check for previous state
|
||||
if [ -f "$STATE_FILE" ]; then
|
||||
# Read previous state
|
||||
read prev_total prev_process < "$STATE_FILE"
|
||||
|
||||
# Calculate the CPU usage
|
||||
delta_total=$((current_total - prev_total))
|
||||
delta_process=$((current_process - prev_process))
|
||||
CPU_COUNT=$(get_cpu_count)
|
||||
|
||||
# Normalize CPU usage across all CPUs and calculate percentage
|
||||
if [ $delta_total -gt 0 ]; then # To avoid division by zero
|
||||
CPU_USAGE=$(awk -v delta_process="$delta_process" -v delta_total="$delta_total" -v cpu_count="$CPU_COUNT" \
|
||||
'BEGIN {printf "%.2f", (delta_process / delta_total) * 100 * cpu_count}')
|
||||
awk -v usage="$CPU_USAGE" -v threshold="$CPU_USAGE_THRESHOLD" '
|
||||
BEGIN {
|
||||
if (usage > threshold) {
|
||||
print "High CPU usage: " usage "% (Threshold: " threshold "%)";
|
||||
exit 1; # Exit with status 1 for high usage
|
||||
} else {
|
||||
print "CPU usage within limits: " usage "%";
|
||||
exit 0; # Normal exit status
|
||||
}
|
||||
}'
|
||||
EXIT_STATUS=$?
|
||||
else
|
||||
echo "No enough data to calculate CPU usage. Please try again."
|
||||
fi
|
||||
else
|
||||
echo "No previous data found. Run the script later to get CPU usage."
|
||||
fi
|
||||
|
||||
# Save current state
|
||||
echo "$current_total $current_process" > "$STATE_FILE"
|
||||
|
||||
if [ $EXIT_STATUS -ne 0 ]; then
|
||||
echo "Exiting due to high CPU usage."
|
||||
exit $EXIT_STATUS
|
||||
fi
|
Loading…
Reference in New Issue