forked from coop-cloud/monitoring-ng
Compare commits
2 Commits
shorter-na
...
restic-exp
| Author | SHA1 | Date | |
|---|---|---|---|
| 1b05bf992c | |||
| 757fcc08c5 |
@ -44,10 +44,10 @@ ENABLE_BACKUPS=true
|
||||
## Grafana
|
||||
#
|
||||
# COMPOSE_FILE="$COMPOSE_FILE:compose.grafana.yml"
|
||||
# GF_SERVER_ROOT_URL=https://monitoring.example.com
|
||||
## GRAFANA_DOMAIN needs to be set. change it for a different domain
|
||||
#GRAFANA_DOMAIN=$DOMAIN
|
||||
# GF_SERVER_ROOT_URL=https://${GRAFANA_DOMAIN}
|
||||
# SECRET_GRAFANA_ADMIN_PASSWORD_VERSION=v1
|
||||
## Seperate domain for Grafana
|
||||
#GRAFANA_DOMAIN=grafana.example.com
|
||||
#
|
||||
## Single-Sign-On with OIDC
|
||||
# COMPOSE_FILE="$COMPOSE_FILE:compose.grafana-oidc.yml"
|
||||
@ -85,3 +85,5 @@ ENABLE_BACKUPS=true
|
||||
#ALERT_BACKUP_NOT_SUCCESSFULL_ENABLED=true
|
||||
#ALERT_NODE_DISK_SPACE_ENABLED=true
|
||||
#ALERT_NODE_MEMORY_USAGE_ENABLED=true
|
||||
#ALERT_RESTIC_CHECK_FAILED_ENABLED=true
|
||||
#ALERT_RESTIC_OUTDATED_BACKUP_ENABLED=true
|
||||
1
abra.sh
1
abra.sh
@ -12,6 +12,7 @@ export PROMETHEUS_YML_VERSION=v2
|
||||
export MATRIX_ALERTMANAGER_CONFIG_VERSION=e
|
||||
export MATRIX_ALERTMANAGER_ENTRYPOINT_VERSION=a
|
||||
export GRAFANA_ALERTS_NODE_VERSION=v1c
|
||||
export GRAFANA_ALERTS_RESTIC_VERSION=v2
|
||||
|
||||
# creates a default prometheus scrape config for a given node
|
||||
add_node(){
|
||||
|
||||
191
alerts/restic.yml.tmpl
Normal file
191
alerts/restic.yml.tmpl
Normal file
@ -0,0 +1,191 @@
|
||||
apiVersion: 1
|
||||
|
||||
deleteRules:
|
||||
{{ if ne (env "ALERT_RESTIC_CHECK_FAILED_ENABLED") "true" }}
|
||||
- orgId: 1
|
||||
uid: ffglj6egxy8e8c
|
||||
{{ end }}
|
||||
{{ if ne (env "ALERT_RESTIC_OUTDATED_BACKUP_ENABLED") "true" }}
|
||||
- orgId: 1
|
||||
uid: ffgljntkp9ce8b
|
||||
{{ end }}
|
||||
|
||||
groups:
|
||||
- orgId: 1
|
||||
name: restic
|
||||
folder: restic
|
||||
interval: 5m
|
||||
rules:
|
||||
{{ if eq (env "ALERT_RESTIC_CHECK_FAILED_ENABLED") "true" }}
|
||||
- uid: ffglj6egxy8e8c
|
||||
title: Restic Check Failed
|
||||
condition: C
|
||||
data:
|
||||
- refId: A
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: prometheus
|
||||
model:
|
||||
disableTextWrap: false
|
||||
editorMode: builder
|
||||
expr: restic_check_success
|
||||
fullMetaSearch: false
|
||||
includeNullMetadata: true
|
||||
instant: true
|
||||
intervalMs: 1000
|
||||
legendFormat: __auto
|
||||
maxDataPoints: 43200
|
||||
range: false
|
||||
refId: A
|
||||
useBackend: false
|
||||
- refId: B
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params: []
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
query:
|
||||
params:
|
||||
- B
|
||||
reducer:
|
||||
params: []
|
||||
type: last
|
||||
type: query
|
||||
datasource:
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
expression: A
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
reducer: last
|
||||
refId: B
|
||||
type: reduce
|
||||
- refId: C
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params:
|
||||
- 1
|
||||
- 0
|
||||
type: lt
|
||||
operator:
|
||||
type: and
|
||||
query:
|
||||
params:
|
||||
- C
|
||||
reducer:
|
||||
params: []
|
||||
type: last
|
||||
type: query
|
||||
datasource:
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
expression: B
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
refId: C
|
||||
type: threshold
|
||||
noDataState: Alerting
|
||||
execErrState: Error
|
||||
for: 5m
|
||||
annotations: {}
|
||||
labels: {}
|
||||
isPaused: false
|
||||
{{ end }}
|
||||
{{ if eq (env "ALERT_RESTIC_OUTDATED_BACKUP_ENABLED") "true" }}
|
||||
- uid: ffgljntkp9ce8b
|
||||
title: Restic Outdated Backup
|
||||
condition: C
|
||||
data:
|
||||
- refId: A
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: prometheus
|
||||
model:
|
||||
disableTextWrap: false
|
||||
editorMode: builder
|
||||
expr: time() - max by(instance) (restic_backup_timestamp)
|
||||
fullMetaSearch: false
|
||||
includeNullMetadata: true
|
||||
instant: true
|
||||
intervalMs: 1000
|
||||
legendFormat: __auto
|
||||
maxDataPoints: 43200
|
||||
range: false
|
||||
refId: A
|
||||
useBackend: false
|
||||
- refId: B
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params: []
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
query:
|
||||
params:
|
||||
- B
|
||||
reducer:
|
||||
params: []
|
||||
type: last
|
||||
type: query
|
||||
datasource:
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
expression: A
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
reducer: last
|
||||
refId: B
|
||||
type: reduce
|
||||
- refId: C
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params:
|
||||
- 93600
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
query:
|
||||
params:
|
||||
- C
|
||||
reducer:
|
||||
params: []
|
||||
type: last
|
||||
type: query
|
||||
datasource:
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
expression: B
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
refId: C
|
||||
type: threshold
|
||||
noDataState: NoData
|
||||
execErrState: Error
|
||||
for: 5m
|
||||
annotations: {}
|
||||
labels: {}
|
||||
isPaused: false
|
||||
{{ end }}
|
||||
@ -24,6 +24,8 @@ services:
|
||||
target: /var/lib/grafana/dashboards/backup.json
|
||||
- source: gf_alerts_node
|
||||
target: /etc/grafana/provisioning/alerting/node.yml
|
||||
- source: gf_alerts_restic
|
||||
target: /etc/grafana/provisioning/alerting/restic.yml
|
||||
networks:
|
||||
- proxy
|
||||
- internal
|
||||
@ -34,12 +36,15 @@ services:
|
||||
- GF_INSTALL_PLUGINS
|
||||
- ALERT_NODE_DISK_SPACE_ENABLED
|
||||
- ALERT_NODE_MEMORY_USAGE_ENABLED
|
||||
- ALERT_RESTIC_CHECK_FAILED_ENABLED
|
||||
- ALERT_RESTIC_OUTDATED_BACKUP_ENABLED
|
||||
- DOMAIN
|
||||
deploy:
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.docker.network=proxy"
|
||||
- "traefik.http.services.${STACK_NAME}-grafana.loadbalancer.server.port=3000"
|
||||
- "traefik.http.routers.${STACK_NAME}-grafana.rule=Host(`${GRAFANA_DOMAIN:-$DOMAIN}`)"
|
||||
- "traefik.http.routers.${STACK_NAME}-grafana.rule=Host(`${GRAFANA_DOMAIN}`)"
|
||||
- "traefik.http.routers.${STACK_NAME}-grafana.entrypoints=web-secure"
|
||||
- "traefik.http.routers.${STACK_NAME}-grafana.tls=true"
|
||||
- "traefik.http.routers.${STACK_NAME}-grafana.tls.certresolver=${LETS_ENCRYPT_ENV}"
|
||||
@ -77,6 +82,10 @@ configs:
|
||||
template_driver: golang
|
||||
name: ${STACK_NAME}_gf_alerts_node_${GRAFANA_ALERTS_NODE_VERSION}
|
||||
file: alerts/node.yml.tmpl
|
||||
gf_alerts_restic:
|
||||
template_driver: golang
|
||||
name: ${STACK_NAME}_gf_alerts_restiv_${GRAFANA_ALERTS_RESTIC_VERSION}
|
||||
file: alerts/restic.yml.tmpl
|
||||
|
||||
volumes:
|
||||
grafana-data:
|
||||
|
||||
@ -39,3 +39,8 @@ configs:
|
||||
|
||||
volumes:
|
||||
prometheus-data:
|
||||
|
||||
secrets:
|
||||
basic_auth:
|
||||
external: true
|
||||
name: ${STACK_NAME}_basic_auth_${SECRET_BASIC_AUTH_VERSION}
|
||||
@ -27,4 +27,4 @@ configs:
|
||||
secrets:
|
||||
basic_auth:
|
||||
external: true
|
||||
name: ${STACK_NAME}_basic_auth_${SECRET_BASIC_AUTH_VERSION}
|
||||
name: ${STACK_NAME}_basic_auth_${SECRET_BASIC_AUTH_VERSION}
|
||||
@ -11,13 +11,3 @@ providers:
|
||||
options:
|
||||
path: /var/lib/grafana/dashboards
|
||||
foldersFromFilesStructure: true
|
||||
- name: 'default-alert-provider'
|
||||
orgId: 1
|
||||
folder: 'default-alerts'
|
||||
type: file
|
||||
disableDeletion: false
|
||||
updateIntervalSeconds: 10
|
||||
allowUiUpdates: true
|
||||
options:
|
||||
path: /var/lib/grafana/alerts
|
||||
foldersFromFilesStructure: true
|
||||
|
||||
@ -3,6 +3,7 @@ apiVersion: 1
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
uid: prometheus
|
||||
access: proxy
|
||||
orgId: 1
|
||||
url: http://prometheus:9090
|
||||
@ -10,6 +11,7 @@ datasources:
|
||||
editable: false
|
||||
- name: Loki
|
||||
type: loki
|
||||
uid: loki
|
||||
access: proxy
|
||||
orgId: 1
|
||||
url: http://loki:3100
|
||||
|
||||
Reference in New Issue
Block a user