From 6ae8b9543dea0245d4a9a747da91d649d99b6f7d Mon Sep 17 00:00:00 2001 From: Moritz Date: Tue, 20 Aug 2024 17:36:56 +0200 Subject: [PATCH] Add healthchecks --- .env.sample | 1 - compose.yml | 77 ++++++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 64 insertions(+), 14 deletions(-) diff --git a/.env.sample b/.env.sample index 6a3cb60..b419429 100644 --- a/.env.sample +++ b/.env.sample @@ -10,4 +10,3 @@ LETS_ENCRYPT_ENV=production SECRET_DB_PASSWORD_VERSION=v1 #RAILS_TRUSTED_PROXIES=['127.0.0.1', '::1', 'your-traefik_app'] -X_FRAME_OPTIONS_ALLOW_FROM=none diff --git a/compose.yml b/compose.yml index 0690e93..184fe11 100644 --- a/compose.yml +++ b/compose.yml @@ -38,7 +38,9 @@ x-shared: ZAMMAD_PROCESS_SCHEDULED: ZAMMAD_PROCESS_DELAYED_JOBS_WORKERS: image: ghcr.io/zammad/zammad:6.3.1-95 - restart: on-failure + deploy: + restart_policy: + condition: on-failure volumes: - zammad-storage:/opt/zammad/storage #old: - zammad-data:/opt/zammad @@ -70,11 +72,19 @@ services: zammad-elasticsearch: image: bitnami/elasticsearch:8.14.3 - restart: on-failure + deploy: + restart_policy: + condition: on-failure volumes: - elasticsearch-data:/bitnami/elasticsearch/data environment: - discovery.type=single-node + healthcheck: + test: "/opt/bitnami/scripts/elasticsearch/healthcheck.sh" + interval: 30s + timeout: 10s + retries: 10 + start_period: 5m deploy: resources: limits: @@ -87,16 +97,20 @@ services: command: ["zammad-init"] depends_on: - zammad-postgresql - restart: on-failure user: 0:0 - #deploy: - # restart_policy: - # condition: on-failure zammad-memcached: command: memcached -m 256M image: memcached:1.6.29-alpine - restart: on-failure + healthcheck: + test: 'echo "version" | nc -vn -w 1 127.0.0.1 11211' + interval: 30s + timeout: 10s + retries: 10 + start_period: 5m + deploy: + restart_policy: + condition: on-failure app: <<: *zammad-service @@ -118,13 +132,17 @@ services: - "traefik.http.routers.${STACK_NAME}.rule=Host(`${DOMAIN}`${EXTRA_DOMAINS})" - "traefik.http.routers.${STACK_NAME}.entrypoints=web-secure" - "traefik.http.routers.${STACK_NAME}.tls.certresolver=${LETS_ENCRYPT_ENV}" - - "traefik.http.routers.${STACK_NAME}.middlewares=${STACK_NAME}-redirect,${STACK_NAME}-frameOptions" + - "traefik.http.routers.${STACK_NAME}.middlewares=${STACK_NAME}-redirect" - "traefik.http.middlewares.${STACK_NAME}-redirect.headers.SSLForceHost=true" - "traefik.http.middlewares.${STACK_NAME}-redirect.headers.SSLHost=${DOMAIN}" - - "traefik.http.middlewares.${STACK_NAME}-frameOptions.headers.customFrameOptionsValue=SAMEORIGIN" - - "traefik.http.middlewares.${STACK_NAME}-frameOptions.headers.contentSecurityPolicy=frame-ancestors ${X_FRAME_OPTIONS_ALLOW_FROM}" - "coop-cloud.${STACK_NAME}.version=1.0.0+6.3.1-95" - "coop-cloud.${STACK_NAME}.timeout=${TIMEOUT:-120}" + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8080"] + interval: 30s + timeout: 10s + retries: 10 + start_period: 5m zammad-postgresql: image: postgres:15.7-alpine @@ -132,32 +150,66 @@ services: POSTGRES_DB: zammad_production POSTGRES_USER: zammad POSTGRES_PASSWORD_FILE: /run/secrets/db_password - restart: on-failure + deploy: + restart_policy: + condition: on-failure volumes: - postgresql-data:/var/lib/postgresql/data # Backup Restore #- zammad-backup:/var/tmp/zammad:ro secrets: - db_password + healthcheck: + test: ["CMD", "pg_isready", "-U", "zammad"] + interval: 30s + timeout: 10s + retries: 10 + start_period: 2m zammad-railsserver: <<: *zammad-service command: ["zammad-railsserver"] + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:3000"] + interval: 30s + timeout: 10s + retries: 10 + start_period: 5m zammad-redis: image: redis:7.2.5-alpine - restart: on-failure + deploy: + restart_policy: + condition: on-failure volumes: - redis-data:/data + healthcheck: + test: ["CMD-SHELL", "redis-cli ping | grep PONG"] + interval: 30s + timeout: 10s + retries: 10 + start_period: 1m zammad-scheduler: <<: *zammad-service command: ["zammad-scheduler"] + healthcheck: + test: 'ps x | grep "[b]ackground-worker.rb"' + interval: 30s + timeout: 10s + retries: 10 + start_period: 5m zammad-websocket: <<: *zammad-service command: ["zammad-websocket"] + healthcheck: + test: 'ruby -rsocket -e "s = TCPSocket.new(''localhost'', 6042); s.close"' + interval: 30s + timeout: 10s + retries: 10 + start_period: 5m volumes: elasticsearch-data: @@ -165,7 +217,6 @@ volumes: redis-data: zammad-backup: zammad-storage: - #zammad-data: networks: default: