17 Commits

Author SHA1 Message Date
0352a393de feat: Adds dashboard and alerts for backupbot 2024-12-30 14:43:30 +01:00
92e7bbc730 chore: publish 1.3.0+v1.8.1 release 2024-12-28 13:55:05 +01:00
5bf3d31c0f docs: Improves documentation for pushgateway 2024-12-28 13:53:31 +01:00
a14cb575a2 fix: shorten names for dashboard files 2024-12-21 14:37:10 +01:00
1a59dfac7f add pushgateway 2024-12-21 14:23:50 +01:00
a9b76dff65 update backupbot label 2024-10-24 17:32:29 +02:00
0401de1d16 chore: publish 1.2.0+v1.8.1 release 2024-07-17 13:36:04 +02:00
aa133fcfea add backup label 2024-04-17 16:02:38 +02:00
3750e75439 chore: publish 1.1.0+v1.7.0 release 2024-04-16 16:45:42 +02:00
5d0b6d88fc fix default DOMAIN naming scheme 2024-01-19 12:30:58 +01:00
c06dda7205 chore: publish 1.0.0+v1.7.0 release 2023-12-21 02:24:47 +01:00
b497b4ce4e shorten config names 2023-12-21 01:43:44 +01:00
29b3fcd7d1 update grafana dashboards 2023-12-21 01:43:44 +01:00
d4c6bd4c12 shorten basic_auth secret 2023-12-20 22:46:27 +01:00
42c3695bf3 add diskIO and process metrics 2023-12-20 17:34:41 +01:00
74498b70fe update grafana dashboards 2023-07-31 16:28:44 +02:00
961a17f1ad chore: publish 0.3.0+v1.6.0 release 2023-07-11 14:59:37 +02:00
20 changed files with 3559 additions and 876 deletions

View File

@ -3,9 +3,10 @@ LETS_ENCRYPT_ENV=production
COMPOSE_FILE=compose.yml COMPOSE_FILE=compose.yml
DOMAIN=monitoring.example.com DOMAIN=monitoring.example.com
TIMEOUT=120 TIMEOUT=120
ENABLE_BACKUPS=true
## Enable this secret for Promtail / Prometheus ## Enable this secret for Promtail / Prometheus
# SECRET_BASIC_AUTH_ADMIN_PASSWORD_VERSION=v1 # SECRET_BASIC_AUTH_VERSION=v1
# #
# Promtail (Gathering Logs) # Promtail (Gathering Logs)
# COMPOSE_FILE="$COMPOSE_FILE:compose.promtail.yml" # COMPOSE_FILE="$COMPOSE_FILE:compose.promtail.yml"
@ -16,6 +17,10 @@ TIMEOUT=120
## Prometheus ## Prometheus
# COMPOSE_FILE="$COMPOSE_FILE:compose.prometheus.yml" # COMPOSE_FILE="$COMPOSE_FILE:compose.prometheus.yml"
# PROMETHEUS_RETENTION_TIME=1y # PROMETHEUS_RETENTION_TIME=1y
#
## Prometheus Pushgateway
# COMPOSE_FILE="$COMPOSE_FILE:compose.pushgateway.yml"
#
## Loki ## Loki
# Loki Server # Loki Server
# #
@ -36,16 +41,16 @@ TIMEOUT=120
## Grafana ## Grafana
# #
# COMPOSE_FILE="$COMPOSE_FILE:compose.grafana.yml" # COMPOSE_FILE="$COMPOSE_FILE:compose.grafana.yml"
# GF_SERVER_ROOT_URL=https://${DOMAIN} # GF_SERVER_ROOT_URL=https://monitoring.example.com
# SECRET_GRAFANA_ADMIN_PASSWORD_VERSION=v1 # SECRET_GRAFANA_ADMIN_PASSWORD_VERSION=v1
# #
## Single-Sign-On with OIDC ## Single-Sign-On with OIDC
# OIDC_ENABLED=1 # OIDC_ENABLED=1
# SECRET_GRAFANA_OIDC_CLIENT_SECRET_VERSION=v1 # SECRET_GRAFANA_OIDC_CLIENT_SECRET_VERSION=v1
# OIDC_CLIENT_ID=grafana # OIDC_CLIENT_ID=grafana
# OIDC_AUTH_URL="https://sso.example.com/auth/realms/autonomic/protocol/openid-connect/auth" # OIDC_AUTH_URL="https://authentik.example.com/application/o/authorize/"
# OIDC_API_URL="https://sso.example.com/auth/realms/autonomic/protocol/openid-connect/userinfo" # OIDC_API_URL="https://authentik.example.com/application/o/userinfo/"
# OIDC_TOKEN_URL="https://sso.example.com/auth/realms/autonomic/protocol/openid-connect/token" # OIDC_TOKEN_URL="https://authentik.example.com/application/o/token/"
# #
## Additional grafana settings (unlikely to require editing) ## Additional grafana settings (unlikely to require editing)
# GF_SECURITY_ALLOW_EMBEDDING=1 # GF_SECURITY_ALLOW_EMBEDDING=1
@ -59,3 +64,10 @@ TIMEOUT=120
# GF_SMTP_SKIP_VERIFY=false # GF_SMTP_SKIP_VERIFY=false
# SECRET_GRAFANA_SMTP_PASSWORD_VERSION=v1 # SECRET_GRAFANA_SMTP_PASSWORD_VERSION=v1
# #
# ALerts
#ALERT_BACKUP_FAILED_ENABLED=true
#ALERT_BACKUP_MISSING_ENABLED=true
#ALERT_BACKUP_NOT_SUCCESSFULL_ENABLED=true
#ALERT_NODE_DISK_SPACE_ENABLED=true
#ALERT_NODE_MEMORY_USAGE_ENABLED=true

View File

@ -54,15 +54,15 @@ Where gathering.org is the node you want to gather metrics from.
- monitoring.example.org - monitoring.example.org
- prometheus.monitoring.example.org - prometheus.monitoring.example.org
- loki.monitoring.example.org - loki.monitoring.example.org
1. Setup monitoring stack 2. Setup monitoring stack
- `abra app new monitoring-ng` - `abra app new monitoring-ng`
- `abra app config monitoring.example.org` - `abra app config monitoring.example.org`
Uncomment all the stuff Uncomment all the stuff
- `abra app secret insert monitoring.example.org basic_auth_admin_password v1 <secret>` - `abra app secret insert monitoring.example.org basic_auth v1 <secret>`
this needs the plaintext traefik basic-auth secret, not the hashed one! this needs the plaintext traefik basic-auth secret, not the hashed one!
- `abra app secret ls monitoring.example.org` - `abra app secret ls monitoring.example.org`
- `abra app deploy monitoring.example.org` - `abra app deploy monitoring.example.org`
1. add scrape config to prometheus 3. Add scrape config to prometheus
- `abra app cmd monitoring.example.org prometheus gathering.org` - `abra app cmd monitoring.example.org prometheus gathering.org`
- or manually - or manually
``` ```
@ -101,8 +101,18 @@ $ echo '{
$ systemctl restart docker.service $ systemctl restart docker.service
``` ```
## Setup Push Gateway
1. Enable in the env fiöle by uncommenting the following lines:
```
## Prometheus Pushgateway
# COMPOSE_FILE="$COMPOSE_FILE:compose.pushgateway.yml"
```
2. `abra app deploy monitoring.example.org`
This will expose the pushgateway at `https://pushgateway.${DOMAIN}`.
It is secured behind the same basic auth as the other services.
After that you need to add the `pushgateway.${DOMAIN}` to the scare config.
## Post-setup guide ## Post-setup guide
@ -119,3 +129,12 @@ $ systemctl restart docker.service
--- ---
THX to the previous work of @decentral1se @knooflok @3wc @cellarspoon @mirsal THX to the previous work of @decentral1se @knooflok @3wc @cellarspoon @mirsal
## alerts
It is possible to enable the following alerts, by setting the corresponding env variable to `true`:
- backupbot failed: `ALERT_BACKUP_FAILED_ENABLED`
- backupbot missing: `ALERT_BACKUP_MISSING_ENABLED`
- backupbot not successfull: `ALERT_BACKUP_NOT_SUCCESSFULL_ENABLED`
- node disk space: `ALERT_NODE_DISK_SPACE_ENABLED`
- node memory usage: `ALERT_NODE_MEMORY_USAGE_ENABLED`

18
abra.sh
View File

@ -1,13 +1,15 @@
export NODE_EXPORTER_ENTRYPOINT_VERSION=v1 export ENTRYPOINT_VERSION=v1
export GRAFANA_DATASOURCES_YML_VERSION=v1 export GRAFANA_DATASOURCES_YML_VERSION=v1
export GRAFANA_DASHBOARDS_YML_VERSION=v1 export GRAFANA_DASHBOARDS_YML_VERSION=v2
export GRAFANA_SWARM_DASHBOARD_JSON_VERSION=v1 export GRAFANA_SWARM_DASHBOARD_JSON_VERSION=v2
export GRAFANA_STACKS_DASHBOARD_JSON_VERSION=v1 export GRAFANA_STACKS_DASHBOARD_JSON_VERSION=v2
export GRAFANA_TRAEFIK_DASHBOARD_JSON_VERSION=v1 export GRAFANA_TRAEFIK_DASHBOARD_JSON_VERSION=v2
export GRAFANA_CUSTOM_INI_VERSION=v2 export GRAFANA_BACKUP_DASHBOARD_JSON_VERSION=v1
export PROMTAIL_YML_VERSION=v2 export GRAFANA_ALERTS_JSON_VERSION=v3
export GRAFANA_CUSTOM_INI_VERSION=v4
export PROMTAIL_YML_VERSION=v3
export LOKI_YML_VERSION=v2 export LOKI_YML_VERSION=v2
export PROMETHEUS_YML_VERSION=v1 export PROMETHEUS_YML_VERSION=v2
# creates a default prometheus scrape config for a given node # creates a default prometheus scrape config for a given node
add_node(){ add_node(){

View File

@ -2,7 +2,7 @@ version: '3.8'
services: services:
grafana: grafana:
image: grafana/grafana:9.5.2 image: grafana/grafana:10.4.14
volumes: volumes:
- grafana-data:/var/lib/grafana:rw - grafana-data:/var/lib/grafana:rw
secrets: secrets:
@ -22,6 +22,10 @@ services:
target: /var/lib/grafana/dashboards/docker-swarm-stacks.json target: /var/lib/grafana/dashboards/docker-swarm-stacks.json
- source: grafana_traefik_dashboard_json - source: grafana_traefik_dashboard_json
target: /var/lib/grafana/dashboards/traefik.json target: /var/lib/grafana/dashboards/traefik.json
- source: grafana_backup_dashboard_json
target: /var/lib/grafana/dashboards/backup.json
- source: grafana_alerts_json
target: /var/lib/grafana/alerts/alerts.json
networks: networks:
- proxy - proxy
- internal - internal
@ -62,20 +66,27 @@ configs:
name: ${STACK_NAME}_grafana_custom_ini_${GRAFANA_CUSTOM_INI_VERSION} name: ${STACK_NAME}_grafana_custom_ini_${GRAFANA_CUSTOM_INI_VERSION}
file: grafana_custom.ini file: grafana_custom.ini
grafana_datasources_yml: grafana_datasources_yml:
name: ${STACK_NAME}_grafana_datasources_yml_${GRAFANA_DATASOURCES_YML_VERSION} name: ${STACK_NAME}_g_datasources_yml_${GRAFANA_DATASOURCES_YML_VERSION}
file: grafana-datasources.yml file: grafana-datasources.yml
grafana_dashboards_yml: grafana_dashboards_yml:
name: ${STACK_NAME}_grafana_dashboards_yml_${GRAFANA_DASHBOARDS_YML_VERSION} name: ${STACK_NAME}_g_dashboards_yml_${GRAFANA_DASHBOARDS_YML_VERSION}
file: grafana-dashboards.yml file: grafana-dashboards.yml
grafana_swarm_dashboard_json: grafana_swarm_dashboard_json:
name: ${STACK_NAME}_grafana_swarm_dashboard_json_${GRAFANA_SWARM_DASHBOARD_JSON_VERSION} name: ${STACK_NAME}_g_swarm_dashboard_json_${GRAFANA_SWARM_DASHBOARD_JSON_VERSION}
file: grafana-swarm-dashboard.json file: grafana-swarm-dashboard.json
grafana_stacks_dashboard_json: grafana_stacks_dashboard_json:
name: ${STACK_NAME}_grafana_stacks_dashboard_json_${GRAFANA_STACKS_DASHBOARD_JSON_VERSION} name: ${STACK_NAME}_g_stacks_dashboard_json_${GRAFANA_STACKS_DASHBOARD_JSON_VERSION}
file: grafana-stacks-dashboard.json file: grafana-stacks-dashboard.json
grafana_traefik_dashboard_json: grafana_traefik_dashboard_json:
name: ${STACK_NAME}_grafana_traefik_dashboard_json_${GRAFANA_TRAEFIK_DASHBOARD_JSON_VERSION} name: ${STACK_NAME}_g_traefik_dashboard_json_${GRAFANA_TRAEFIK_DASHBOARD_JSON_VERSION}
file: grafana-traefik-dashboard.json file: grafana-traefik-dashboard.json
grafana_backup_dashboard_json:
name: ${STACK_NAME}_g_backup_dashboard_json_${GRAFANA_BACKUP_DASHBOARD_JSON_VERSION}
file: grafana-backup-dashboard.json
grafana_alerts_json:
template_driver: golang
name: ${STACK_NAME}_g_alerts_json_${GRAFANA_ALERTS_JSON_VERSION}
file: grafana-alerts.json.tmpl
volumes: volumes:
grafana-data: grafana-data:

View File

@ -2,7 +2,7 @@ version: '3.8'
services: services:
loki: loki:
image: grafana/loki:2.8.2 image: grafana/loki:2.9.11
command: -config.file=/etc/loki/local-config.yaml command: -config.file=/etc/loki/local-config.yaml
networks: networks:
- proxy - proxy

View File

@ -2,9 +2,9 @@ version: '3.8'
services: services:
prometheus: prometheus:
image: prom/prometheus:v2.44.0 image: prom/prometheus:v2.55.1
secrets: secrets:
- basic_auth_admin_password - basic_auth
volumes: volumes:
- prometheus-data:/prometheus:rw - prometheus-data:/prometheus:rw
configs: configs:

View File

@ -2,7 +2,7 @@ version: "3.8"
services: services:
promtail: promtail:
image: grafana/promtail:2.8.2 image: grafana/promtail:2.9.11
volumes: volumes:
- /var/log:/var/log:ro - /var/log:/var/log:ro
- /var/run/docker.sock:/var/run/docker.sock - /var/run/docker.sock:/var/run/docker.sock
@ -13,7 +13,7 @@ services:
networks: networks:
- internal - internal
secrets: secrets:
- basic_auth_admin_password - basic_auth
environment: environment:
- DOMAIN - DOMAIN
- LOKI_PUSH_URL - LOKI_PUSH_URL
@ -25,6 +25,6 @@ configs:
template_driver: golang template_driver: golang
secrets: secrets:
basic_auth_admin_password: basic_auth:
external: true external: true
name: ${STACK_NAME}_basic_auth_admin_password_${SECRET_BASIC_AUTH_ADMIN_PASSWORD_VERSION} name: ${STACK_NAME}_basic_auth_${SECRET_BASIC_AUTH_VERSION}

25
compose.pushgateway.yml Normal file
View File

@ -0,0 +1,25 @@
version: '3.8'
services:
pushgateway:
image: prom/pushgateway:v1.10.0
command:
- '--web.listen-address=:9191'
- '--push.disable-consistency-check'
- '--persistence.interval=5m'
ports:
- 9191:9191
networks:
- internal
- proxy
deploy:
restart_policy:
condition: on-failure
labels:
- "traefik.enable=true"
- "traefik.http.services.${STACK_NAME}-pushgateway.loadbalancer.server.port=9191"
- "traefik.http.routers.${STACK_NAME}-pushgateway.rule=Host(`pushgateway.${DOMAIN}`)"
- "traefik.http.routers.${STACK_NAME}-pushgateway.entrypoints=web-secure"
- "traefik.http.routers.${STACK_NAME}-pushgateway.tls=true"
- "traefik.http.routers.${STACK_NAME}-pushgateway.tls.certresolver=${LETS_ENCRYPT_ENV}"
- "traefik.http.routers.${STACK_NAME}-pushgateway.middlewares=basicauth@file"

View File

@ -3,7 +3,7 @@ version: "3.8"
services: services:
app: app:
image: prom/node-exporter:v1.5.0 image: prom/node-exporter:v1.8.1
user: root user: root
environment: environment:
- NODE_ID={{.Node.ID}} - NODE_ID={{.Node.ID}}
@ -20,7 +20,7 @@ services:
- "--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)" - "--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)"
- "--no-collector.ipvs" - "--no-collector.ipvs"
configs: configs:
- source: node_exporter_entrypoint_sh - source: entrypoint
target: /entrypoint.sh target: /entrypoint.sh
networks: networks:
- internal - internal
@ -30,6 +30,7 @@ services:
restart_policy: restart_policy:
condition: on-failure condition: on-failure
labels: labels:
- "backupbot.backup=${ENABLE_BACKUPS:-true}"
- "traefik.enable=true" - "traefik.enable=true"
- "traefik.http.services.${STACK_NAME}-node.loadbalancer.server.port=9100" - "traefik.http.services.${STACK_NAME}-node.loadbalancer.server.port=9100"
- "traefik.http.routers.${STACK_NAME}-node.rule=Host(`node.${DOMAIN}`)" - "traefik.http.routers.${STACK_NAME}-node.rule=Host(`node.${DOMAIN}`)"
@ -37,14 +38,14 @@ services:
- "traefik.http.routers.${STACK_NAME}-node.tls=true" - "traefik.http.routers.${STACK_NAME}-node.tls=true"
- "traefik.http.routers.${STACK_NAME}-node.tls.certresolver=${LETS_ENCRYPT_ENV}" - "traefik.http.routers.${STACK_NAME}-node.tls.certresolver=${LETS_ENCRYPT_ENV}"
- "traefik.http.routers.${STACK_NAME}-node.middlewares=basicauth@file" - "traefik.http.routers.${STACK_NAME}-node.middlewares=basicauth@file"
- "coop-cloud.${STACK_NAME}.version=0.2.0+v1.5.0" - "coop-cloud.${STACK_NAME}.version=1.3.0+v1.8.1"
- "coop-cloud.${STACK_NAME}.timeout=${TIMEOUT:-120}" - "coop-cloud.${STACK_NAME}.timeout=${TIMEOUT:-120}"
cadvisor: cadvisor:
image: gcr.io/cadvisor/cadvisor:v0.47.1 image: gcr.io/cadvisor/cadvisor:v0.49.2
command: command:
- "-logtostderr" - "-logtostderr"
- "--enable_metrics=cpu,cpuLoad,disk,memory,network" - "--enable_metrics=cpu,cpuLoad,disk,diskIO,process,memory,network"
# all possible metrics: advtcp,app,cpu,cpuLoad,cpu_topology,cpuset,disk,diskIO,hugetlb,memory,memory_numa,network,oom_event,percpu,perf_event,process,referenced_memory,resctrl,sched,tcp,udp. # all possible metrics: advtcp,app,cpu,cpuLoad,cpu_topology,cpuset,disk,diskIO,hugetlb,memory,memory_numa,network,oom_event,percpu,perf_event,process,referenced_memory,resctrl,sched,tcp,udp.
- "--housekeeping_interval=120s" - "--housekeeping_interval=120s"
- "--docker_only=true" - "--docker_only=true"
@ -76,8 +77,8 @@ services:
start_period: 30s start_period: 30s
configs: configs:
node_exporter_entrypoint_sh: entrypoint:
name: ${STACK_NAME}_node_exporter_entrypoint_${NODE_EXPORTER_ENTRYPOINT_VERSION} name: ${STACK_NAME}_entrypoint_${ENTRYPOINT_VERSION}
file: node-exporter-entrypoint.sh file: node-exporter-entrypoint.sh

315
grafana-alerts.json.tmpl Normal file
View File

@ -0,0 +1,315 @@
{
"apiVersion": 1,
"groups": [
{
"orgId": 1,
"name": "backupbot",
"folder": "node",
"interval": "1m",
"rules": [
{{ if eq (env "ALERT_BACKUP_FAILED_ENABLED") "true" }}
{
"uid": "de8e5xxup7t34a",
"title": "Backup Failed",
"condition": "C",
"data": [
{
"refId": "A",
"relativeTimeRange": { "from": 600, "to": 0 },
"datasourceUid": "PBFA97CFB590B2093",
"model": {
"disableTextWrap": false,
"editorMode": "builder",
"expr": "backup",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": true,
"intervalMs": 1000,
"legendFormat": "__auto",
"maxDataPoints": 43200,
"range": false,
"refId": "A",
"useBackend": false
}
},
{
"refId": "C",
"relativeTimeRange": { "from": 600, "to": 0 },
"datasourceUid": "__expr__",
"model": {
"conditions": [
{
"evaluator": { "params": [0], "type": "lt" },
"operator": { "type": "and" },
"query": { "params": ["C"] },
"reducer": { "params": [], "type": "last" },
"type": "query"
}
],
"datasource": { "type": "__expr__", "uid": "__expr__" },
"expression": "A",
"intervalMs": 1000,
"maxDataPoints": 43200,
"refId": "C",
"type": "threshold"
}
}
],
"noDataState": "NoData",
"execErrState": "Error",
"for": "1m",
"isPaused": false
},
{{ end }}
{{ if eq (env "ALERT_BACKUP_MISSING_ENABLED") "true" }}
{
"uid": "ce8e65uddcwe8d",
"title": "Backup Missing",
"condition": "B",
"data": [
{
"refId": "A",
"relativeTimeRange": { "from": 600, "to": 0 },
"datasourceUid": "PBFA97CFB590B2093",
"model": {
"disableTextWrap": false,
"editorMode": "builder",
"expr": "rate(backup[24h])",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": true,
"intervalMs": 1000,
"legendFormat": "__auto",
"maxDataPoints": 43200,
"range": false,
"refId": "A",
"useBackend": false
}
},
{
"refId": "B",
"relativeTimeRange": { "from": 600, "to": 0 },
"datasourceUid": "__expr__",
"model": {
"conditions": [
{
"evaluator": { "params": [0, 0], "type": "within_range" },
"operator": { "type": "and" },
"query": { "params": ["C"] },
"reducer": { "params": [], "type": "last" },
"type": "query"
}
],
"datasource": { "type": "__expr__", "uid": "__expr__" },
"expression": "A",
"intervalMs": 1000,
"maxDataPoints": 43200,
"refId": "B",
"type": "threshold"
}
}
],
"noDataState": "NoData",
"execErrState": "Error",
"for": "5m",
"isPaused": false
},
{{ end }}
{{ if eq (env "ALERT_BACKUP_NOT_SUCCESSFULL_ENABLED") "true" }}
{
"uid": "de8e6bc92a8lcc",
"title": "Backup Not Successfull",
"condition": "B",
"data": [
{
"refId": "A",
"relativeTimeRange": {
"from": 60,
"to": 0
},
"datasourceUid": "PBFA97CFB590B2093",
"model": {
"disableTextWrap": false,
"editorMode": "builder",
"expr": "backup",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": true,
"intervalMs": 1000,
"legendFormat": "__auto",
"maxDataPoints": 43200,
"range": false,
"refId": "A",
"useBackend": false
}
},
{
"refId": "B",
"relativeTimeRange": {
"from": 60,
"to": 0
},
"datasourceUid": "__expr__",
"model": {
"conditions": [
{
"evaluator": {
"params": [
0
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"C"
]
},
"reducer": {
"params": [],
"type": "last"
},
"type": "query"
}
],
"datasource": {
"type": "__expr__",
"uid": "__expr__"
},
"expression": "A",
"intervalMs": 1000,
"maxDataPoints": 43200,
"refId": "B",
"type": "threshold"
}
}
],
"noDataState": "NoData",
"execErrState": "Error",
"for": "20m",
"annotations": {
"summary": "Backup did not finish within 20 minutes"
},
"labels": {},
"isPaused": false
}
{{ end }}
]
},
{
"orgId": 1,
"name": "node",
"folder": "node",
"interval": "5m",
"rules": [
{{ if eq (env "ALERT_NODE_DISK_SPACE_ENABLED") "true" }}
{
"uid": "bds8bhxu97pxca",
"title": "Node Disk Space",
"condition": "C",
"data": [
{
"refId": "A",
"relativeTimeRange": { "from": 600, "to": 0 },
"datasourceUid": "PBFA97CFB590B2093",
"model": {
"editorMode": "code",
"expr": "(node_filesystem_free_bytes{fstype=\"ext4\",mountpoint=~\"(/$)|(/media.*)\"} / node_filesystem_size_bytes{fstype=\"ext4\",mountpoint=~\"(/$)|(/media.*)\"}) * 100",
"instant": true,
"intervalMs": 1000,
"legendFormat": "__auto",
"maxDataPoints": 43200,
"range": false,
"refId": "A"
}
},
{
"refId": "C",
"relativeTimeRange": { "from": 600, "to": 0 },
"datasourceUid": "__expr__",
"model": {
"conditions": [
{
"evaluator": { "params": [10], "type": "lt" },
"operator": { "type": "and" },
"query": { "params": ["C"] },
"reducer": { "params": [], "type": "last" },
"type": "query"
}
],
"datasource": { "type": "__expr__", "uid": "__expr__" },
"expression": "A",
"intervalMs": 1000,
"maxDataPoints": 43200,
"refId": "C",
"type": "threshold"
}
}
],
"noDataState": "NoData",
"execErrState": "Error",
"for": "5m",
"annotations": {},
"labels": {},
"isPaused": false
},
{{ end }}
{{ if eq (env "ALERT_NODE_MEMORY_USAGE_ENABLED") "true" }}
{
"uid": "ads8cswmly96oa",
"title": "Node Memory Usage",
"condition": "C",
"data": [
{
"refId": "A",
"relativeTimeRange": { "from": 600, "to": 0 },
"datasourceUid": "PBFA97CFB590B2093",
"model": {
"editorMode": "code",
"expr": "(node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * 100",
"instant": true,
"intervalMs": 1000,
"legendFormat": "__auto",
"maxDataPoints": 43200,
"range": false,
"refId": "A"
}
},
{
"refId": "C",
"relativeTimeRange": { "from": 600, "to": 0 },
"datasourceUid": "__expr__",
"model": {
"conditions": [
{
"evaluator": { "params": [90], "type": "gt" },
"operator": { "type": "and" },
"query": { "params": ["C"] },
"reducer": { "params": [], "type": "last" },
"type": "query"
}
],
"datasource": { "type": "__expr__", "uid": "__expr__" },
"expression": "A",
"intervalMs": 1000,
"maxDataPoints": 43200,
"refId": "C",
"type": "threshold"
}
}
],
"noDataState": "NoData",
"execErrState": "Error",
"for": "5m",
"annotations": {},
"labels": {},
"isPaused": false
}
{{ end }}
]
}
]
}

View File

@ -0,0 +1,228 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 6,
"links": [],
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMax": 2,
"axisSoftMin": -2,
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [
{
"options": {
"0": {
"color": "dark-green",
"index": 0
},
"1": {
"color": "dark-yellow",
"index": 1,
"text": "Running"
},
"-1": {
"index": 2,
"text": "Fail"
}
},
"type": "value"
}
],
"max": 1,
"min": -1,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "string"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 0
},
"id": 1,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "builder",
"exemplar": false,
"expr": "backup",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Backup Status",
"type": "timeseries"
},
{
"datasource": {
"type": "loki",
"uid": "P8E80F9AEF21F6940"
},
"gridPos": {
"h": 11,
"w": 24,
"x": 0,
"y": 7
},
"id": 2,
"options": {
"dedupStrategy": "none",
"enableLogDetails": true,
"prettifyLogMessage": false,
"showCommonLabels": false,
"showLabels": false,
"showTime": true,
"sortOrder": "Descending",
"wrapLogMessage": false
},
"targets": [
{
"datasource": {
"type": "loki",
"uid": "P8E80F9AEF21F6940"
},
"editorMode": "builder",
"expr": "{service_name=\"$ServiceName\"} |= ``",
"queryType": "range",
"refId": "A"
}
],
"title": "Backupbot Logs",
"type": "logs"
}
],
"refresh": "auto",
"schemaVersion": 39,
"tags": [],
"templating": {
"list": [
{
"current": {
"selected": true,
"text": "backup_marx_klasse-methode_it_app",
"value": "backup_marx_klasse-methode_it_app"
},
"datasource": {
"type": "loki",
"uid": "P8E80F9AEF21F6940"
},
"definition": "",
"hide": 0,
"includeAll": false,
"label": "Backupbot Service",
"multi": false,
"name": "ServiceName",
"options": [],
"query": {
"label": "service_name",
"refId": "LokiVariableQueryEditor-VariableQuery",
"stream": "",
"type": 1
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"type": "query"
}
]
},
"time": {
"from": "now-24h",
"to": "now"
},
"timepicker": {},
"timezone": "browser",
"title": "backupbot-two",
"uid": "be8e2xeofw4xsa",
"version": 3,
"weekStart": ""
}

View File

@ -11,3 +11,13 @@ providers:
options: options:
path: /var/lib/grafana/dashboards path: /var/lib/grafana/dashboards
foldersFromFilesStructure: true foldersFromFilesStructure: true
- name: 'default-alert-provider'
orgId: 1
folder: 'default-alerts'
type: file
disableDeletion: false
updateIntervalSeconds: 10
allowUiUpdates: true
options:
path: /var/lib/grafana/alerts
foldersFromFilesStructure: true

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -3,7 +3,10 @@
"list": [ "list": [
{ {
"builtIn": 1, "builtIn": 1,
"datasource": "-- Grafana --", "datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true, "enable": true,
"hide": true, "hide": true,
"iconColor": "rgba(0, 211, 255, 1)", "iconColor": "rgba(0, 211, 255, 1)",
@ -18,17 +21,35 @@
} }
] ]
}, },
"description": "Simple dashboard for Traefik 2", "description": "Dashboards for Traefik Reverse Proxy",
"editable": true, "editable": true,
"fiscalYearStartMonth": 0, "fiscalYearStartMonth": 0,
"gnetId": 11462, "gnetId": 11462,
"graphTooltip": 0, "graphTooltip": 0,
"id": 3, "id": 3,
"iteration": 1684839198931, "links": [
"links": [], {
"asDropdown": false,
"icon": "external link",
"includeVars": false,
"keepTime": false,
"tags": [
"menu"
],
"targetBlank": false,
"title": "dashboards",
"tooltip": "",
"type": "dashboards",
"url": ""
}
],
"liveNow": false, "liveNow": false,
"panels": [ "panels": [
{ {
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"decimals": 0, "decimals": 0,
@ -87,7 +108,7 @@
}, },
"textMode": "auto" "textMode": "auto"
}, },
"pluginVersion": "8.4.4", "pluginVersion": "10.0.2",
"targets": [ "targets": [
{ {
"datasource": { "datasource": {
@ -108,6 +129,10 @@
"type": "stat" "type": "stat"
}, },
{ {
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"decimals": 0, "decimals": 0,
@ -167,7 +192,7 @@
}, },
"textMode": "auto" "textMode": "auto"
}, },
"pluginVersion": "8.4.4", "pluginVersion": "10.0.2",
"targets": [ "targets": [
{ {
"datasource": { "datasource": {
@ -185,6 +210,10 @@
"type": "stat" "type": "stat"
}, },
{ {
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"decimals": 0, "decimals": 0,
@ -243,7 +272,7 @@
}, },
"textMode": "auto" "textMode": "auto"
}, },
"pluginVersion": "8.4.4", "pluginVersion": "10.0.2",
"targets": [ "targets": [
{ {
"datasource": { "datasource": {
@ -265,6 +294,10 @@
"type": "stat" "type": "stat"
}, },
{ {
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"description": "", "description": "",
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
@ -324,7 +357,7 @@
}, },
"textMode": "auto" "textMode": "auto"
}, },
"pluginVersion": "8.4.4", "pluginVersion": "10.0.2",
"targets": [ "targets": [
{ {
"datasource": { "datasource": {
@ -352,6 +385,10 @@
"label": "Others", "label": "Others",
"threshold": 0 "threshold": 0
}, },
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"description": "", "description": "",
"fontSize": "80%", "fontSize": "80%",
"format": "short", "format": "short",
@ -397,6 +434,10 @@
"bars": false, "bars": false,
"dashLength": 10, "dashLength": 10,
"dashes": false, "dashes": false,
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"decimals": 0, "decimals": 0,
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
@ -433,7 +474,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.4.4", "pluginVersion": "10.0.2",
"pointradius": 5, "pointradius": 5,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -493,6 +534,10 @@
"bars": false, "bars": false,
"dashLength": 10, "dashLength": 10,
"dashes": false, "dashes": false,
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"description": "", "description": "",
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
@ -530,7 +575,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.4.4", "pluginVersion": "10.0.2",
"pointradius": 5, "pointradius": 5,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -587,6 +632,10 @@
} }
}, },
{ {
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"mappings": [ "mappings": [
@ -652,7 +701,7 @@
}, },
"textMode": "auto" "textMode": "auto"
}, },
"pluginVersion": "8.4.4", "pluginVersion": "10.0.2",
"targets": [ "targets": [
{ {
"datasource": { "datasource": {
@ -676,6 +725,10 @@
"bars": true, "bars": true,
"dashLength": 10, "dashLength": 10,
"dashes": false, "dashes": false,
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"links": [] "links": []
@ -712,7 +765,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.4.4", "pluginVersion": "10.0.2",
"pointradius": 5, "pointradius": 5,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -726,12 +779,14 @@
"type": "prometheus", "type": "prometheus",
"uid": "PBFA97CFB590B2093" "uid": "PBFA97CFB590B2093"
}, },
"editorMode": "code",
"exemplar": true, "exemplar": true,
"expr": "sum(delta(traefik_service_requests_total{instance=\"$instance\"}[$interval]))", "expr": "sum(delta(traefik_service_requests_total{instance=\"${instance:raw}\"}[$interval]))",
"format": "time_series", "format": "time_series",
"interval": "", "interval": "",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "Total requests", "legendFormat": "Total requests",
"range": true,
"refId": "A" "refId": "A"
} }
], ],
@ -769,6 +824,10 @@
} }
}, },
{ {
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"links": [], "links": [],
@ -812,7 +871,7 @@
}, },
"textMode": "auto" "textMode": "auto"
}, },
"pluginVersion": "8.4.4", "pluginVersion": "10.0.2",
"targets": [ "targets": [
{ {
"datasource": { "datasource": {
@ -832,6 +891,10 @@
"type": "stat" "type": "stat"
}, },
{ {
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"description": "", "description": "",
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
@ -839,6 +902,8 @@
"mode": "palette-classic" "mode": "palette-classic"
}, },
"custom": { "custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "", "axisLabel": "",
"axisPlacement": "auto", "axisPlacement": "auto",
"barAlignment": 0, "barAlignment": 0,
@ -899,8 +964,11 @@
"lastNotNull", "lastNotNull",
"max" "max"
], ],
"displayMode": "list", "displayMode": "table",
"placement": "right" "placement": "right",
"showLegend": true,
"sortBy": "Last *",
"sortDesc": true
}, },
"tooltip": { "tooltip": {
"mode": "single", "mode": "single",
@ -914,12 +982,14 @@
"type": "prometheus", "type": "prometheus",
"uid": "PBFA97CFB590B2093" "uid": "PBFA97CFB590B2093"
}, },
"editorMode": "code",
"exemplar": true, "exemplar": true,
"expr": "rate(traefik_service_request_duration_seconds_sum{ instance=\"$instance\" }[5m])", "expr": "sum(rate(traefik_service_request_duration_seconds_sum{ instance=\"$instance\" }[5m])) by(service)",
"format": "time_series", "format": "time_series",
"interval": "", "interval": "",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{ service }}", "legendFormat": "{{ service }}",
"range": true,
"refId": "A" "refId": "A"
} }
], ],
@ -931,6 +1001,10 @@
"bars": false, "bars": false,
"dashLength": 10, "dashLength": 10,
"dashes": false, "dashes": false,
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"links": [] "links": []
@ -964,7 +1038,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.4.4", "pluginVersion": "10.0.2",
"pointradius": 5, "pointradius": 5,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -1023,6 +1097,10 @@
"bars": false, "bars": false,
"dashLength": 10, "dashLength": 10,
"dashes": false, "dashes": false,
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"decimals": 0, "decimals": 0,
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
@ -1061,7 +1139,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.4.4", "pluginVersion": "10.0.2",
"pointradius": 5, "pointradius": 5,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -1121,6 +1199,10 @@
"bars": false, "bars": false,
"dashLength": 10, "dashLength": 10,
"dashes": false, "dashes": false,
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"links": [] "links": []
@ -1158,7 +1240,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.4.4", "pluginVersion": "10.0.2",
"pointradius": 5, "pointradius": 5,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -1217,6 +1299,10 @@
"bars": false, "bars": false,
"dashLength": 10, "dashLength": 10,
"dashes": false, "dashes": false,
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"links": [] "links": []
@ -1254,7 +1340,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.4.4", "pluginVersion": "10.0.2",
"pointradius": 5, "pointradius": 5,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -1313,6 +1399,10 @@
"bars": false, "bars": false,
"dashLength": 10, "dashLength": 10,
"dashes": false, "dashes": false,
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"decimals": 0, "decimals": 0,
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
@ -1349,7 +1439,7 @@
"alertThreshold": true "alertThreshold": true
}, },
"percentage": false, "percentage": false,
"pluginVersion": "8.4.4", "pluginVersion": "10.0.2",
"pointradius": 5, "pointradius": 5,
"points": false, "points": false,
"renderer": "flot", "renderer": "flot",
@ -1413,13 +1503,14 @@
} }
], ],
"refresh": "", "refresh": "",
"schemaVersion": 35, "schemaVersion": 38,
"style": "dark", "style": "dark",
"tags": [ "tags": [
"traefik", "traefik",
"load-balancer", "load-balancer",
"docker", "docker",
"prometheus" "prometheus",
"menu"
], ],
"templating": { "templating": {
"list": [ "list": [
@ -1455,7 +1546,7 @@
}, },
{ {
"current": { "current": {
"selected": false, "selected": true,
"text": [ "text": [
"All" "All"
], ],
@ -1492,7 +1583,7 @@
"auto_count": 30, "auto_count": 30,
"auto_min": "10s", "auto_min": "10s",
"current": { "current": {
"selected": true, "selected": false,
"text": "5m", "text": "5m",
"value": "5m" "value": "5m"
}, },
@ -1562,17 +1653,12 @@
} }
], ],
"query": "1m,5m,10m,30m,1h,6h,12h,1d,7d,14d,30d", "query": "1m,5m,10m,30m,1h,6h,12h,1d,7d,14d,30d",
"queryValue": "5m", "queryValue": "",
"refresh": 2, "refresh": 2,
"skipUrlSync": false, "skipUrlSync": false,
"type": "interval" "type": "interval"
}, },
{ {
"current": {
"selected": true,
"text": "demo.local-it.cloud:8082",
"value": "demo.local-it.cloud:8082"
},
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "PBFA97CFB590B2093" "uid": "PBFA97CFB590B2093"
@ -1586,12 +1672,12 @@
"options": [], "options": [],
"query": { "query": {
"query": "label_values(instance)", "query": "label_values(instance)",
"refId": "StandardVariableQuery" "refId": "PrometheusVariableQueryEditor-VariableQuery"
}, },
"refresh": 1, "refresh": 1,
"regex": ".*8082.*", "regex": ".*8082",
"skipUrlSync": false, "skipUrlSync": false,
"sort": 0, "sort": 1,
"tagValuesQuery": "", "tagValuesQuery": "",
"tagsQuery": "", "tagsQuery": "",
"type": "query", "type": "query",
@ -1600,7 +1686,7 @@
] ]
}, },
"time": { "time": {
"from": "now-15m", "from": "now-2d",
"to": "now" "to": "now"
}, },
"timepicker": { "timepicker": {
@ -1629,8 +1715,8 @@
] ]
}, },
"timezone": "", "timezone": "",
"title": "Traefik 2", "title": "Traefik Reverse Proxy",
"uid": "3ipsWfViz", "uid": "3ipsWfViz",
"version": 5, "version": 9,
"weekStart": "" "weekStart": ""
} }

View File

@ -10,6 +10,7 @@ auto_assign_org_role = Admin
{{ if eq (env "OIDC_ENABLED") "1" }} {{ if eq (env "OIDC_ENABLED") "1" }}
[auth] [auth]
disable_login_form = true disable_login_form = true
oauth_allow_insecure_email_lookup=true # https://github.com/grafana/grafana/issues/70203
[auth.generic_oauth] [auth.generic_oauth]
enabled = true enabled = true
@ -31,3 +32,7 @@ enabled = false
[plugins] [plugins]
enable_alpha = true enable_alpha = true
[database]
type = sqlite3
wal = true

View File

@ -17,4 +17,4 @@ scrape_configs:
- /prometheus/scrape_configs/*.yml - /prometheus/scrape_configs/*.yml
basic_auth: basic_auth:
username: admin username: admin
password: {{ secret "basic_auth_admin_password" }} password: {{ secret "basic_auth" }}

View File

@ -9,7 +9,7 @@ clients:
- url: {{ env "LOKI_PUSH_URL" }} - url: {{ env "LOKI_PUSH_URL" }}
basic_auth: basic_auth:
username: admin username: admin
password: {{ secret "basic_auth_admin_password" }} password: {{ secret "basic_auth" }}
external_labels: external_labels:
hostname: {{ env "DOMAIN" }} hostname: {{ env "DOMAIN" }}

4
release/1.0.0+v1.7.0 Normal file
View File

@ -0,0 +1,4 @@
Breakng change: secret `basic_auth_admin_password` was renamed to `basic_auth`. Insert the secret before upgrading. And change the env BASIC_AUTH_ADMIN_PASSWORD to BASIC_AUTH
abra app secret insert monitoring.example.com basic_auth v1 $(abra app run monitoring.example.com promtail cat /var/run/secrets/basic_auth_admin_password)
sed -i ~/.abra/servers/example.com/monitoring.example.com.env -e 's/BASIC_AUTH_ADMIN_PASSWORD/BASIC_AUTH/'