10 Commits

Author SHA1 Message Date
961a17f1ad chore: publish 0.3.0+v1.6.0 release 2023-07-11 14:59:37 +02:00
d549c33fe5 chore: publish 0.2.0+v1.5.0 release 2023-06-26 11:44:10 +02:00
9a791ef794 feat: add hostname label to promtail logs 2023-06-26 11:43:26 +02:00
7a0292f902 fix: grafana smtp stuff 2023-06-20 16:04:40 +02:00
705934abc1 fix: increase loki limits 2023-06-20 16:03:57 +02:00
b091bf1951 add process to cadvisor metrics 2023-06-14 14:02:35 +02:00
de1819521b remove prometheus alertmanager
grafana has basic alert mechanisms included, might be enough for most
usecases?
2023-06-01 15:10:53 +02:00
8c82943289 fix: config version 2023-05-24 12:28:35 +02:00
39071fa512 fix: grafana envs 2023-05-24 12:26:49 +02:00
3398e1d312 chore: remove junk 2023-05-24 12:05:50 +02:00
11 changed files with 385 additions and 71 deletions

View File

@ -4,11 +4,6 @@ COMPOSE_FILE=compose.yml
DOMAIN=monitoring.example.com
TIMEOUT=120
# Monitoring Client
#
## Node Exporter, Cadvisor (Gathering Metrics)
# COMPOSE_FILE="$COMPOSE_FILE:compose.metrics.yml"
#
## Enable this secret for Promtail / Prometheus
# SECRET_BASIC_AUTH_ADMIN_PASSWORD_VERSION=v1
#
@ -18,12 +13,8 @@ TIMEOUT=120
# Monitoring Server
#
## Prometheus, Alertmanager
## Prometheus
# COMPOSE_FILE="$COMPOSE_FILE:compose.prometheus.yml"
# ALERTMANAGER_SMTP_FROM=noreply@autonomic.zone
# ALERTMANAGER_SMTP_HOST=mail.gandi.net:587
# ALERTMANAGER_SMTP_TO=kaboom@autonomic.zone
# SECRET_ALERTMANAGER_SMTP_PASSWORD_VERSION=v1
# PROMETHEUS_RETENTION_TIME=1y
## Loki
# Loki Server
@ -62,6 +53,9 @@ TIMEOUT=120
#
## grafana SMTP configuration (optional)
# GF_SMTP_HOST=changeme
# GF_SMTP_ENABLED=1
# GF_SMTP_USER=changme
# GF_SMTP_ENABLED=true
# GF_SMTP_FROM_ADDRESS=grafana@example.com
# GF_SMTP_SKIP_VERIFY=1
# GF_SMTP_SKIP_VERIFY=false
# SECRET_GRAFANA_SMTP_PASSWORD_VERSION=v1
#

View File

@ -4,11 +4,10 @@ export GRAFANA_DASHBOARDS_YML_VERSION=v1
export GRAFANA_SWARM_DASHBOARD_JSON_VERSION=v1
export GRAFANA_STACKS_DASHBOARD_JSON_VERSION=v1
export GRAFANA_TRAEFIK_DASHBOARD_JSON_VERSION=v1
export GRAFANA_CUSTOM_INI_VERSION=v1
export PROMTAIL_YML_VERSION=v1
export LOKI_YML_VERSION=v1
export GRAFANA_CUSTOM_INI_VERSION=v3
export PROMTAIL_YML_VERSION=v2
export LOKI_YML_VERSION=v2
export PROMETHEUS_YML_VERSION=v1
export ALERTMANAGER_CONFIG_VERSION=v1
# creates a default prometheus scrape config for a given node
add_node(){

View File

@ -1,13 +0,0 @@
global:
smtp_from: {{ env "ALERTMANAGER_SMTP_FROM" }}
smtp_smarthost: {{ env "ALERTMANAGER_SMTP_HOST" }}
smtp_auth_username: {{ env "ALERTMANAGER_SMTP_FROM" }}
smtp_auth_password: {{ secret "alertmanager_smtp_password" }}
route:
receiver: "kaboom-mailer"
receivers:
- name: "kaboom-mailer"
email_configs:
- to: {{ env "ALERTMANAGER_SMTP_TO" }}

View File

@ -2,12 +2,13 @@ version: '3.8'
services:
grafana:
image: grafana/grafana:9.5.2
image: grafana/grafana:10.0.2
volumes:
- grafana-data:/var/lib/grafana:rw
secrets:
- grafana_admin_password
- grafana_oidc_client_secret
- grafana_smtp_password
configs:
- source: grafana_custom_ini
target: /etc/grafana/grafana.ini
@ -25,14 +26,16 @@ services:
- proxy
- internal
environment:
- GF_SERVER_ROOT_URL=https://${GRAFANA_DOMAIN}
- GF_SERVER_ROOT_URL
- GF_SECURITY_ADMIN_PASSWORD__FILE=/run/secrets/grafana_admin_password
- GF_SMTP_HOST
- GF_SMTP_USER
- GF_SMTP_PASSWORD__FILE=/run/secrets/grafana_smtp_password
- GF_SMTP_ENABLED
- GF_SMTP_FROM_ADDRESS
- GF_SMTP_SKIP_VERIFY
- GF_SECURITY_ALLOW_EMBEDDING
- GF_INSTALL_PLUGINS=grafana-piechart-panel
- GF_INSTALL_PLUGINS
- OIDC_API_URL
- OIDC_AUTH_URL
- OIDC_CLIENT_ID
@ -85,3 +88,6 @@ secrets:
grafana_oidc_client_secret:
external: true
name: ${STACK_NAME}_grafana_oidc_client_secret_${SECRET_GRAFANA_OIDC_CLIENT_SECRET_VERSION}
grafana_smtp_password:
external: true
name: ${STACK_NAME}_grafana_smtp_password_${SECRET_GRAFANA_SMTP_PASSWORD_VERSION}

View File

@ -2,7 +2,7 @@ version: '3.8'
services:
prometheus:
image: prom/prometheus:v2.44.0
image: prom/prometheus:v2.45.0
secrets:
- basic_auth_admin_password
volumes:
@ -31,42 +31,11 @@ services:
- "traefik.http.routers.${STACK_NAME}-prometheus.tls.certresolver=${LETS_ENCRYPT_ENV}"
- "traefik.http.routers.${STACK_NAME}-prometheus.middlewares=basicauth@file"
alertmanager:
image: prom/alertmanager:v0.25.0
volumes:
- alertmanager-data:/etc/alertmanager
command:
- "--config.file=/etc/alertmanager/config.yml"
- "--storage.path=/alertmanager"
networks:
- internal
secrets:
- alertmanager_smtp_password
configs:
- source: alertmanager_config
target: /etc/alertmanager/config.yml
environment:
- ALERTMANAGER_SMTP_FROM
- ALERTMANAGER_SMTP_HOST
- ALERTMANAGER_SMTP_TO
configs:
prometheus_yml:
template_driver: golang
name: ${STACK_NAME}_prometheus_yml_${PROMETHEUS_YML_VERSION}
file: prometheus.yml.tmpl
alertmanager_config:
template_driver: golang
name: ${STACK_NAME}_alertmanager_config_${ALERTMANAGER_CONFIG_VERSION}
file: ./alertmanager.yml.tmpl
volumes:
prometheus-data:
alertmanager-data:
secrets:
alertmanager_smtp_password:
external: true
name: ${STACK_NAME}_alertmanager_smtp_password_${SECRET_ALERTMANAGER_SMTP_PASSWORD_VERSION}
prometheus-data:

View File

@ -15,6 +15,7 @@ services:
secrets:
- basic_auth_admin_password
environment:
- DOMAIN
- LOKI_PUSH_URL
configs:

View File

@ -3,7 +3,7 @@ version: "3.8"
services:
app:
image: prom/node-exporter:v1.5.0
image: prom/node-exporter:v1.6.0
user: root
environment:
- NODE_ID={{.Node.ID}}
@ -37,18 +37,17 @@ services:
- "traefik.http.routers.${STACK_NAME}-node.tls=true"
- "traefik.http.routers.${STACK_NAME}-node.tls.certresolver=${LETS_ENCRYPT_ENV}"
- "traefik.http.routers.${STACK_NAME}-node.middlewares=basicauth@file"
- "coop-cloud.${STACK_NAME}.version=0.1.0+v1.5.0"
- "coop-cloud.${STACK_NAME}.version=0.3.0+v1.6.0"
- "coop-cloud.${STACK_NAME}.timeout=${TIMEOUT:-120}"
cadvisor:
image: gcr.io/cadvisor/cadvisor:v0.47.1
image: gcr.io/cadvisor/cadvisor:v0.47.2
command:
- "-logtostderr"
- "--enable_metrics=cpu,cpuLoad,disk,memory,network"
# all possible metrics: advtcp,app,cpu,cpuLoad,cpu_topology,cpuset,disk,diskIO,hugetlb,memory,memory_numa,network,oom_event,percpu,perf_event,process,referenced_memory,resctrl,sched,tcp,udp.
- "--housekeeping_interval=120s"
- "--docker_only=true"
volumes:
- /var/lib/docker/:/var/lib/docker:ro
- /dev/disk/:/dev/disk:ro

344
grafana-logs-dashboard.json Normal file
View File

@ -0,0 +1,344 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 4,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "loki",
"uid": "P8E80F9AEF21F6940"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
}
},
"mappings": []
},
"overrides": []
},
"gridPos": {
"h": 11,
"w": 10,
"x": 0,
"y": 0
},
"id": 5,
"options": {
"displayLabels": [
"value"
],
"legend": {
"displayMode": "table",
"placement": "right",
"showLegend": true,
"values": [
"value"
]
},
"pieType": "pie",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"tooltip": {
"mode": "single",
"sort": "asc"
}
},
"pluginVersion": "9.5.2",
"targets": [
{
"datasource": {
"type": "loki",
"uid": "P8E80F9AEF21F6940"
},
"editorMode": "code",
"expr": "sum by (event)(\n count_over_time(\n {service_name=~\"sso_.+_app|login_.+_app\"} \n | json \n | level=~\"warning|error\" \n | line_format \"{{.level}} {{ .stack_namespace }} {{.remote}} {{.event}} {{ .error}}\" \n [$__interval])\n)",
"hide": false,
"legendFormat": "{{ event }}",
"queryType": "range",
"refId": "B"
}
],
"title": "Authentik Warnings & Errors",
"type": "piechart"
},
{
"datasource": {
"type": "loki",
"uid": "P8E80F9AEF21F6940"
},
"gridPos": {
"h": 11,
"w": 14,
"x": 10,
"y": 0
},
"id": 6,
"options": {
"dedupStrategy": "numbers",
"enableLogDetails": true,
"prettifyLogMessage": false,
"showCommonLabels": false,
"showLabels": false,
"showTime": true,
"sortOrder": "Descending",
"wrapLogMessage": false
},
"pluginVersion": "9.5.2",
"targets": [
{
"datasource": {
"type": "loki",
"uid": "P8E80F9AEF21F6940"
},
"editorMode": "code",
"expr": "{service_name=~\"sso_.+_app|login_.+_app\"} \n | json \n | level=~\"warning|error\" \n | line_format \"{{.level}} {{ .stack_namespace }} {{.remote}} {{.event}} {{ .error}}\" ",
"hide": false,
"legendFormat": "{{ event }}",
"queryType": "range",
"refId": "B"
}
],
"title": "Authentik Warnings & Errors",
"type": "logs"
},
{
"datasource": {
"type": "loki",
"uid": "P8E80F9AEF21F6940"
},
"gridPos": {
"h": 6,
"w": 24,
"x": 0,
"y": 11
},
"id": 4,
"options": {
"dedupStrategy": "numbers",
"enableLogDetails": true,
"prettifyLogMessage": true,
"showCommonLabels": false,
"showLabels": false,
"showTime": true,
"sortOrder": "Descending",
"wrapLogMessage": true
},
"targets": [
{
"datasource": {
"type": "loki",
"uid": "P8E80F9AEF21F6940"
},
"editorMode": "code",
"expr": "{stack_namespace=~\".+\"} |= `[pool www] server reached pm.max_children setting (5), consider raising it`",
"hide": false,
"queryType": "range",
"refId": "B"
}
],
"title": "server reached pm.max_children setting",
"type": "logs"
},
{
"datasource": {
"type": "loki",
"uid": "P8E80F9AEF21F6940"
},
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 17
},
"id": 3,
"options": {
"dedupStrategy": "signature",
"enableLogDetails": true,
"prettifyLogMessage": false,
"showCommonLabels": true,
"showLabels": false,
"showTime": false,
"sortOrder": "Descending",
"wrapLogMessage": false
},
"targets": [
{
"datasource": {
"type": "loki",
"uid": "P8E80F9AEF21F6940"
},
"editorMode": "builder",
"expr": "{stack_namespace=~\"cloud_.*\"} |= `error`",
"key": "Q-309efd6a-66a9-4125-8bb2-ee64ea453563-0",
"queryType": "range",
"refId": "A"
}
],
"title": "Nextcloud Errors",
"type": "logs"
},
{
"datasource": {
"type": "loki",
"uid": "P8E80F9AEF21F6940"
},
"description": "",
"gridPos": {
"h": 12,
"w": 24,
"x": 0,
"y": 25
},
"id": 1,
"options": {
"dedupStrategy": "numbers",
"enableLogDetails": true,
"prettifyLogMessage": false,
"showCommonLabels": true,
"showLabels": false,
"showTime": true,
"sortOrder": "Descending",
"wrapLogMessage": false
},
"targets": [
{
"datasource": {
"type": "loki",
"uid": "P8E80F9AEF21F6940"
},
"editorMode": "code",
"expr": "{service_name=~\"$service\"} |= ``",
"key": "Q-185a8078-9643-4008-a994-52a67249a6ec-0",
"legendFormat": "",
"queryType": "range",
"refId": "A"
}
],
"title": "Docker Logs",
"type": "logs"
},
{
"datasource": {
"type": "loki",
"uid": "P8E80F9AEF21F6940"
},
"description": "",
"gridPos": {
"h": 10,
"w": 24,
"x": 0,
"y": 37
},
"id": 2,
"options": {
"dedupStrategy": "exact",
"enableLogDetails": true,
"prettifyLogMessage": false,
"showCommonLabels": true,
"showLabels": false,
"showTime": true,
"sortOrder": "Descending",
"wrapLogMessage": false
},
"targets": [
{
"datasource": {
"type": "loki",
"uid": "P8E80F9AEF21F6940"
},
"editorMode": "code",
"expr": "{job = \"varlogs\"} |= ``",
"key": "Q-185a8078-9643-4008-a994-52a67249a6ec-0",
"legendFormat": "",
"queryType": "range",
"refId": "A"
}
],
"title": "Server Logs",
"type": "logs"
}
],
"refresh": "",
"schemaVersion": 38,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"current": {
"selected": true,
"text": [
"All"
],
"value": [
"$__all"
]
},
"datasource": {
"type": "loki",
"uid": "P8E80F9AEF21F6940"
},
"definition": "",
"hide": 0,
"includeAll": true,
"label": "service",
"multi": true,
"name": "service",
"options": [],
"query": {
"label": "service_name",
"refId": "LokiVariableQueryEditor-VariableQuery",
"stream": "",
"type": 1
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
}
]
},
"time": {
"from": "now-7d",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Logs",
"uid": "c87e167e-9271-4b52-922e-d9c963bfd805",
"version": 15,
"weekStart": ""
}

View File

@ -10,6 +10,7 @@ auto_assign_org_role = Admin
{{ if eq (env "OIDC_ENABLED") "1" }}
[auth]
disable_login_form = true
oauth_allow_insecure_email_lookup=true # https://github.com/grafana/grafana/issues/70203
[auth.generic_oauth]
enabled = true
@ -18,6 +19,7 @@ name = oauth
icon = signin
tls_skip_verify_insecure = false
allow_sign_up = true
auto_login = true
client_id = {{ env "OIDC_CLIENT_ID" }}
client_secret = {{ secret "grafana_oidc_client_secret" }}
auth_url = {{ env "OIDC_AUTH_URL" }}
@ -28,6 +30,9 @@ api_url = {{ env "OIDC_API_URL" }}
enabled = false
{{ end }}
[plugins]
enable_alpha = true
enable_alpha = true
[database]
type = sqlite3
wal = true

View File

@ -114,6 +114,14 @@ limits_config:
reject_old_samples: true
reject_old_samples_max_age: 168h
retention_period: {{ env "LOKI_RETENTION_PERIOD" }}
split_queries_by_interval: 24h
max_query_parallelism: 100
query_scheduler:
max_outstanding_requests_per_tenant: 4096
frontend:
max_outstanding_per_tenant: 4096
chunk_store_config:
max_look_back_period: 0s

View File

@ -10,6 +10,8 @@ clients:
basic_auth:
username: admin
password: {{ secret "basic_auth_admin_password" }}
external_labels:
hostname: {{ env "DOMAIN" }}
scrape_configs:
- job_name: system