commit 4cbd229c160ab362fe18680cad11f89448ad0933 Author: decentral1se Date: Thu Mar 31 14:26:41 2022 +0200 init diff --git a/.env.sample b/.env.sample new file mode 100644 index 0000000..acff59a --- /dev/null +++ b/.env.sample @@ -0,0 +1,38 @@ +TYPE=monitoring +STACK_NAME=gp_monitoring +LETS_ENCRYPT_ENV=production + +GRAFANA_DOMAIN=g.monitor.autonomic.zone +GRAFANA_CUSTOM_INI_VERSION=v3 +GF_SERVER_ROOT_URL=https://${GRAFANA_DOMAIN} +SECRET_GRAFANA_ADMIN_PASSWORD_VERSION=v1 +SECRET_GRAFANA_OAUTH_CLIENT_SECRET_VERSION=v1 + +PROMETHEUS_DOMAIN=p.monitor.autonomic.zone +PROMETHEUS_YML_VERSION=v10 +PROMETHEUS_WEB_YML_VERSION=v2 +SECRET_PROMETHEUS_ADMIN_PASSWORD_VERSION=v1 +SECRET_PROMETHEUS_ADMIN_PASSWORD_HASHED_VERSION=v1 + +LOKI_DOMAIN=l.monitor.autonomic.zone +LOKI_AWS_ENDPOINT=https://minio.autonomic.zone +LOKI_AWS_REGION=eu-west-1 +LOKI_ACCESS_KEY_ID=bush-debrief-approval-robust-scraggly-molecule +LOKI_BUCKET_NAMES=loki +LOKI_YML_VERSION=v7 +SECRET_LOKI_AWS_SECRET_ACCESS_KEY_VERSION=v1 +SECRET_LOKI_ADMIN_PASSWORD_HASHED_VERSION=v1 + +ALERTMANAGER_CONFIG_VERSION=v2 + +NGINX_CONFIG_VERSION=v5 +HTPASSWD_CONFIG_VERSION=v1 + +KEYCLOAK_AUTH_URL="https://id.autonomic.zone/auth/realms/autonomic/protocol/openid-connect/auth" +KEYCLOAK_API_URL="https://id.autonomic.zone/auth/realms/autonomic/protocol/openid-connect/userinfo" +KEYCLOAK_TOKEN_URL="https://id.autonomic.zone/auth/realms/autonomic/protocol/openid-connect/token" + +ALERTMANAGER_SMTP_FROM=noreply@autonomic.zone +ALERTMANAGER_SMTP_HOST=mail.gandi.net:587 +ALERTMANAGER_SMTP_TO=kaboom@autonomic.zone +SECRET_ALERTMANAGER_SMTP_PASSWORD_VERSION=v1 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..37b52cc --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/.envrc diff --git a/README.md b/README.md new file mode 100644 index 0000000..68fcccc --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +## monitoring + +> WIP diff --git a/alertmanager.yml.tmpl b/alertmanager.yml.tmpl new file mode 100644 index 0000000..9f3d795 --- /dev/null +++ b/alertmanager.yml.tmpl @@ -0,0 +1,13 @@ +global: + smtp_from: {{ env "ALERTMANAGER_SMTP_FROM" }} + smtp_smarthost: {{ env "ALERTMANAGER_SMTP_HOST" }} + smtp_auth_username: {{ env "ALERTMANAGER_SMTP_FROM" }} + smtp_auth_password: {{ secret "alertmanager_smtp_password" }} + +route: + receiver: "kaboom-mailer" + +receivers: + - name: "kaboom-mailer" + email_configs: + - to: {{ env "ALERTMANAGER_SMTP_TO" }} diff --git a/compose.yml b/compose.yml new file mode 100644 index 0000000..6a80dbb --- /dev/null +++ b/compose.yml @@ -0,0 +1,196 @@ +--- +version: "3.8" + +services: + app: + image: grafana/grafana:8.4.4 + volumes: + - grafana-data:/var/lib/grafana:rw + secrets: + - grafana_admin_password + - grafana_oauth_client_secret + configs: + - source: grafana_custom_ini + target: /etc/grafana/grafana.ini + networks: + - proxy + - internal + environment: + - GF_SERVER_ROOT_URL=https://${GRAFANA_DOMAIN} + - GF_SECURITY_ADMIN_PASSWORD__FILE=/run/secrets/grafana_admin_password + - KEYCLOAK_API_URL + - KEYCLOAK_AUTH_URL + - KEYCLOAK_TOKEN_URL + deploy: + labels: + - "traefik.enable=true" + - "traefik.http.services.${STACK_NAME}-grafana.loadbalancer.server.port=3000" + - "traefik.http.routers.${STACK_NAME}-grafana.rule=Host(`${GRAFANA_DOMAIN}`)" + - "traefik.http.routers.${STACK_NAME}-grafana.entrypoints=web-secure" + - "traefik.http.routers.${STACK_NAME}-grafana.tls=true" + - "traefik.http.routers.${STACK_NAME}-grafana.tls.certresolver=${LETS_ENCRYPT_ENV}" + healthcheck: + test: "wget -q http://localhost:3000/ -O/dev/null" + interval: 5s + timeout: 10s + retries: 3 + start_period: 10s + + prometheus: + image: prom/prometheus:v2.34.0 + secrets: + - prometheus_admin_password + - prometheus_admin_password_hashed + volumes: + - prometheus-data:/prometheus:rw + configs: + - source: prometheus_yml + target: /etc/prometheus/prometheus.yml + - source: prometheus_web_yml + target: /etc/prometheus/prometheus_web.yml + command: + - "--config.file=/etc/prometheus/prometheus.yml" + - "--web.config.file=/etc/prometheus/prometheus_web.yml" + - "--storage.tsdb.path=/prometheus" + - "--web.console.libraries=/usr/share/prometheus/console_libraries" + - "--web.console.templates=/usr/share/prometheus/consoles" + networks: + - proxy + - internal + deploy: + restart_policy: + condition: on-failure + labels: + - "traefik.enable=true" + - "traefik.http.services.${STACK_NAME}_prometheus.loadbalancer.server.port=9090" + - "traefik.http.routers.${STACK_NAME}-prometheus.rule=Host(`${PROMETHEUS_DOMAIN}`)" + - "traefik.http.routers.${STACK_NAME}-prometheus.entrypoints=web-secure" + - "traefik.http.routers.${STACK_NAME}-prometheus.tls=true" + - "traefik.http.routers.${STACK_NAME}-prometheus.tls.certresolver=${LETS_ENCRYPT_ENV}" + + alertmanager: + image: prom/alertmanager:v0.23.0 + volumes: + - alertmanager-data:/etc/alertmanager + command: + - "--config.file=/etc/alertmanager/config.yml" + - "--storage.path=/alertmanager" + networks: + - internal + secrets: + - alertmanager_smtp_password + configs: + - source: alertmanager_config + target: /etc/alertmanager/config.yml + environment: + - ALERTMANAGER_SMTP_FROM + - ALERTMANAGER_SMTP_HOST + - ALERTMANAGER_SMTP_TO + + web: + image: nginx:1.20.0 + networks: + - proxy + - internal + environment: + - LOKI_DOMAIN + - STACK_NAME + configs: + - source: nginx_config + target: /etc/nginx/nginx.conf + - source: htpasswd_conf + target: /etc/nginx/conf.d/loki.htpasswd + secrets: + - loki_admin_password_hashed + deploy: + restart_policy: + condition: on-failure + labels: + - "traefik.enable=true" + - "traefik.http.services.${STACK_NAME}-web.loadbalancer.server.port=80" + - "traefik.http.routers.${STACK_NAME}-web.rule=Host(`${LOKI_DOMAIN}`)" + - "traefik.http.routers.${STACK_NAME}-web.entrypoints=web-secure" + - "traefik.http.routers.${STACK_NAME}-web.tls.certresolver=${LETS_ENCRYPT_ENV}" + + loki: + image: grafana/loki:2.0.0 + command: -config.file=/etc/loki/local-config.yaml + networks: + - internal + configs: + - source: loki_yml + target: /etc/loki/local-config.yaml + volumes: + - loki-data:/loki + secrets: + - loki_aws_secret_access_key + environment: + - LOKI_ACCESS_KEY_ID + - LOKI_AWS_ENDPOINT + - LOKI_AWS_REGION + - LOKI_BUCKET_NAMES + - STACK_NAME + +configs: + grafana_custom_ini: + template_driver: golang + name: ${STACK_NAME}_grafana_custom_ini_${GRAFANA_CUSTOM_INI_VERSION} + file: grafana_custom.ini + prometheus_yml: + template_driver: golang + name: ${STACK_NAME}_prometheus_yml_${PROMETHEUS_YML_VERSION} + file: prometheus.yml.tmpl + prometheus_web_yml: + template_driver: golang + name: ${STACK_NAME}_prometheus_web_yml_${PROMETHEUS_WEB_YML_VERSION} + file: prometheus_web.yml.tmpl + loki_yml: + template_driver: golang + name: ${STACK_NAME}_loki_yml_${LOKI_YML_VERSION} + file: loki.yml.tmpl + alertmanager_config: + template_driver: golang + name: ${STACK_NAME}_alertmanager_config_${ALERTMANAGER_CONFIG_VERSION} + file: ./alertmanager.yml.tmpl + nginx_config: + template_driver: golang + name: ${STACK_NAME}_nginx_config_${NGINX_CONFIG_VERSION} + file: nginx.conf.tmpl + htpasswd_conf: + template_driver: golang + name: ${STACK_NAME}_htpasswd_${HTPASSWD_CONFIG_VERSION} + file: loki.htpasswd.tmpl + +volumes: + prometheus-data: + grafana-data: + loki-data: + alertmanager-data: + +networks: + proxy: + external: true + internal: + +secrets: + loki_aws_secret_access_key: + external: true + name: ${STACK_NAME}_loki_aws_secret_access_key_${SECRET_LOKI_AWS_SECRET_ACCESS_KEY_VERSION} + grafana_admin_password: + external: true + name: ${STACK_NAME}_grafana_admin_password_${SECRET_GRAFANA_ADMIN_PASSWORD_VERSION} + grafana_oauth_client_secret: + external: true + name: ${STACK_NAME}_grafana_oauth_client_secret_${SECRET_GRAFANA_OAUTH_CLIENT_SECRET_VERSION} + prometheus_admin_password_hashed: + external: true + name: ${STACK_NAME}_prometheus_admin_password_hashed_${SECRET_PROMETHEUS_ADMIN_PASSWORD_HASHED_VERSION} + prometheus_admin_password: + external: true + name: ${STACK_NAME}_prometheus_admin_password_${SECRET_PROMETHEUS_ADMIN_PASSWORD_VERSION} + alertmanager_smtp_password: + external: true + name: ${STACK_NAME}_alertmanager_smtp_password_${SECRET_ALERTMANAGER_SMTP_PASSWORD_VERSION} + loki_admin_password_hashed: + external: true + name: ${STACK_NAME}_loki_admin_password_hashed_${SECRET_LOKI_ADMIN_PASSWORD_HASHED_VERSION} diff --git a/grafana_custom.ini b/grafana_custom.ini new file mode 100644 index 0000000..cd1c546 --- /dev/null +++ b/grafana_custom.ini @@ -0,0 +1,30 @@ +[analytics] +reporting_enabled = false + +[snapshots] +external_enabled = false + +[users] +auto_assign_org_role = Admin + +[auth] +disable_login_form = true + +[auth.generic_oauth] +enabled = true +scopes = openid email profile +name = id.autonomic.zone +icon = signin +tls_skip_verify_insecure = false +allow_sign_up = true +client_id = grafana +client_secret = {{ secret "grafana_oauth_client_secret" }} +auth_url = {{ env "KEYCLOAK_AUTH_URL" }} +token_url = {{ env "KEYCLOAK_TOKEN_URL" }} +api_url = {{ env "KEYCLOAK_API_URL" }} + +[auth.basic] +enabled = false + +[plugins] +enable_alpha = true diff --git a/loki.htpasswd.tmpl b/loki.htpasswd.tmpl new file mode 100644 index 0000000..74f33cc --- /dev/null +++ b/loki.htpasswd.tmpl @@ -0,0 +1 @@ +loki:{{ secret "loki_admin_password_hashed" }} diff --git a/loki.yml.tmpl b/loki.yml.tmpl new file mode 100644 index 0000000..1f5d1f9 --- /dev/null +++ b/loki.yml.tmpl @@ -0,0 +1,77 @@ +auth_enabled: false + +ruler: + storage: + type: local + local: + directory: /loki/rules + rule_path: /loki/scratch + alertmanager_url: http://alertmanager:9093 + enable_api: true + enable_alertmanager_v2: true + ring: + kvstore: + store: inmemory + +server: + http_listen_port: 3100 + +distributor: + ring: + kvstore: + store: memberlist + +ingester: + lifecycler: + ring: + kvstore: + store: memberlist + replication_factor: 1 + final_sleep: 0s + chunk_idle_period: 5m + chunk_retain_period: 30s + +memberlist: + abort_if_cluster_join_fails: false + bind_port: 7946 + join_members: + - {{ env "STACK_NAME" }}_loki:7946 + max_join_backoff: 1m + max_join_retries: 10 + min_join_backoff: 1s + +schema_config: + configs: + - from: 2020-11-25 + store: boltdb-shipper + object_store: aws + schema: v11 + index: + prefix: index_ + period: 24h + +storage_config: + boltdb_shipper: + active_index_directory: /loki/index + cache_location: /loki/index_cache + resync_interval: 5s + shared_store: aws + + aws: + endpoint: {{ env "LOKI_AWS_ENDPOINT" }} + region: {{ env "LOKI_AWS_REGION" }} + access_key_id: {{ env "LOKI_ACCESS_KEY_ID" }} + secret_access_key: {{ secret "loki_aws_secret_access_key" }} + bucketnames: {{ env "LOKI_BUCKET_NAMES" }} + insecure: false + sse_encryption: false + http_config: + idle_conn_timeout: 90s + response_header_timeout: 0s + insecure_skip_verify: false + s3forcepathstyle: true + +limits_config: + enforce_metric_name: false + reject_old_samples: true + reject_old_samples_max_age: 168h diff --git a/nginx.conf.tmpl b/nginx.conf.tmpl new file mode 100644 index 0000000..b8af0ba --- /dev/null +++ b/nginx.conf.tmpl @@ -0,0 +1,43 @@ +user www-data; + +events { + worker_connections 768; +} + +http { + include /etc/nginx/mime.types; + + map $http_upgrade $connection_upgrade { + default upgrade; + '' close; + } + + server { + listen 80; + server_name {{ env "LOKI_DOMAIN" }}; + + auth_basic "loki"; + auth_basic_user_file /etc/nginx/conf.d/loki.htpasswd; + + location / { + proxy_read_timeout 1800s; + proxy_connect_timeout 1600s; + proxy_pass http://{{ env "STACK_NAME" }}_loki:3100; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection $connection_upgrade; + proxy_set_header Connection "Keep-Alive"; + proxy_set_header Proxy-Connection "Keep-Alive"; + proxy_redirect off; + } + + location /ready { + proxy_pass http://{{ env "STACK_NAME" }}_loki:3100; + proxy_http_version 1.1; + proxy_set_header Connection "Keep-Alive"; + proxy_set_header Proxy-Connection "Keep-Alive"; + proxy_redirect off; + auth_basic "off"; + } + } +} diff --git a/prometheus.yml.tmpl b/prometheus.yml.tmpl new file mode 100644 index 0000000..03656f2 --- /dev/null +++ b/prometheus.yml.tmpl @@ -0,0 +1,15 @@ +global: + scrape_interval: 30s + evaluation_interval: 30s + +alerting: + alertmanagers: + - static_configs: + - targets: + - alertmanager:9093 + +scrape_configs: + - job_name: "default" + file_sd_configs: + - files: + - /prometheus/scrape_configs/*.yml diff --git a/prometheus_web.yml.tmpl b/prometheus_web.yml.tmpl new file mode 100644 index 0000000..38c005a --- /dev/null +++ b/prometheus_web.yml.tmpl @@ -0,0 +1,2 @@ +basic_auth_users: + admin: {{ secret "prometheus_admin_password_hashed" }}