forked from coop-cloud/monitoring-ng
Note that I did not copy the backupbot alert since this one gets a rework soon Reviewed-on: coop-cloud/monitoring-ng#16 Co-authored-by: p4u1 <p4u1_f4u1@riseup.net> Co-committed-by: p4u1 <p4u1_f4u1@riseup.net>
132 lines
3.6 KiB
Cheetah
132 lines
3.6 KiB
Cheetah
apiVersion: 1
|
|
|
|
# List of alert rule UIDs that should be deleted
|
|
deleteRules:
|
|
{{ if ne (env "ALERT_NODE_DISK_SPACE_ENABLED") "true" }}
|
|
- orgId: 1
|
|
uid: bds8bhxu97pxca
|
|
{{ end }}
|
|
{{ if ne (env "ALERT_NODE_MEMORY_USAGE_ENABLED") "true" }}
|
|
- orgId: 1
|
|
uid: ads8cswmly96oa
|
|
{{ end }}
|
|
|
|
groups:
|
|
- orgId: 1
|
|
name: node
|
|
folder: node
|
|
interval: 5m
|
|
rules:
|
|
{{ if eq (env "ALERT_NODE_DISK_SPACE_ENABLED") "true" }}
|
|
- uid: bds8bhxu97pxca
|
|
title: Node Disk Space
|
|
condition: C
|
|
data:
|
|
- refId: A
|
|
relativeTimeRange:
|
|
from: 600
|
|
to: 0
|
|
datasourceUid: PBFA97CFB590B2093
|
|
model:
|
|
editorMode: code
|
|
expr: (node_filesystem_free_bytes{fstype="ext4"} / node_filesystem_size_bytes{fstype="ext4"}) * 100
|
|
instant: true
|
|
intervalMs: 1000
|
|
legendFormat: __auto
|
|
maxDataPoints: 43200
|
|
range: false
|
|
refId: A
|
|
- refId: C
|
|
relativeTimeRange:
|
|
from: 600
|
|
to: 0
|
|
datasourceUid: __expr__
|
|
model:
|
|
conditions:
|
|
- evaluator:
|
|
params:
|
|
- 10
|
|
type: lt
|
|
operator:
|
|
type: and
|
|
query:
|
|
params:
|
|
- C
|
|
reducer:
|
|
params: []
|
|
type: last
|
|
type: query
|
|
datasource:
|
|
type: __expr__
|
|
uid: __expr__
|
|
expression: A
|
|
intervalMs: 1000
|
|
maxDataPoints: 43200
|
|
refId: C
|
|
type: threshold
|
|
noDataState: NoData
|
|
execErrState: Error
|
|
for: 5m
|
|
annotations:
|
|
description: ""
|
|
runbook_url: ""
|
|
summary: Less than 10% disk space left on {{`{{ $labels.instance }}`}} ({{`{{ (index $values "A").Value }}`}}% left)
|
|
labels:
|
|
"": ""
|
|
isPaused: false
|
|
{{ end }}
|
|
{{ if eq (env "ALERT_NODE_MEMORY_USAGE_ENABLED") "true" }}
|
|
- uid: ads8cswmly96oa
|
|
title: Node Memory Usage
|
|
condition: C
|
|
data:
|
|
- refId: A
|
|
relativeTimeRange:
|
|
from: 600
|
|
to: 0
|
|
datasourceUid: PBFA97CFB590B2093
|
|
model:
|
|
editorMode: code
|
|
expr: (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * 100
|
|
instant: true
|
|
intervalMs: 1000
|
|
legendFormat: __auto
|
|
maxDataPoints: 43200
|
|
range: false
|
|
refId: A
|
|
- refId: C
|
|
relativeTimeRange:
|
|
from: 600
|
|
to: 0
|
|
datasourceUid: __expr__
|
|
model:
|
|
conditions:
|
|
- evaluator:
|
|
params:
|
|
- 85
|
|
type: gt
|
|
operator:
|
|
type: and
|
|
query:
|
|
params:
|
|
- C
|
|
reducer:
|
|
params: []
|
|
type: last
|
|
type: query
|
|
datasource:
|
|
type: __expr__
|
|
uid: __expr__
|
|
expression: A
|
|
intervalMs: 1000
|
|
maxDataPoints: 43200
|
|
refId: C
|
|
type: threshold
|
|
noDataState: NoData
|
|
execErrState: Error
|
|
for: 5m
|
|
annotations:
|
|
summary: Memory usage is above 85% on {{`{{ $labels.instance }}`}} ({{`{{ printf "%.2f" (index $values "A").Value }}`}}% usage)
|
|
isPaused: false
|
|
{{ end }}
|