2 Commits

20 changed files with 289 additions and 188 deletions

View File

@ -39,20 +39,20 @@ ENABLE_BACKUPS=true
# LOKI_AWS_REGION=eu-west-1
# LOKI_ACCESS_KEY_ID=bush-debrief-approval-robust-scraggly-molecule
# LOKI_BUCKET_NAMES=loki
# SECRET_LOKI_AWS_KEY_VERSION=v1
# SECRET_LOKI_AWS_SECRET_ACCESS_KEY_VERSION=v1
#
## Grafana
#
# COMPOSE_FILE="$COMPOSE_FILE:compose.grafana.yml"
# GF_SERVER_ROOT_URL=https://monitoring.example.com
# SECRET_GF_ADMINPASSWD_VERSION=v1
## Seperate domain for Grafana
#GRAFANA_DOMAIN=grafana.example.com
## GRAFANA_DOMAIN needs to be set. change it for a different domain
#GRAFANA_DOMAIN=$DOMAIN
# GF_SERVER_ROOT_URL=https://${GRAFANA_DOMAIN}
# SECRET_GRAFANA_ADMIN_PASSWORD_VERSION=v1
#
## Single-Sign-On with OIDC
# COMPOSE_FILE="$COMPOSE_FILE:compose.grafana-oidc.yml"
# OIDC_ENABLED=1
# SECRET_GF_OIDC_SECRET_VERSION=v1
# SECRET_GRAFANA_OIDC_CLIENT_SECRET_VERSION=v1
# OIDC_CLIENT_ID=grafana
# OIDC_AUTH_URL="https://authentik.example.com/application/o/authorize/"
# OIDC_API_URL="https://authentik.example.com/application/o/userinfo/"
@ -69,12 +69,12 @@ ENABLE_BACKUPS=true
# GF_SMTP_ENABLED=true
# GF_SMTP_FROM_ADDRESS=grafana@example.com
# GF_SMTP_SKIP_VERIFY=false
# SECRET_GF_SMTP_PASSWD_VERSION=v1
# SECRET_GRAFANA_SMTP_PASSWORD_VERSION=v1
#
## Grafana Matrix Contact Point (optional)
#COMPOSE_FILE="$COMPOSE_FILE:compose.matrix-alertmanager-receiver.yml"
#SECRET_MATRIX_TOKEN_VERSION=v1
#SECRET_MATRIX_ACCESS_TOKEN_VERSION=v1
#GF_MATRIX_USER_ID="<user-id>"
#GF_MATRIX_ROOM_ID="<room-id>"
#GF_MATRIX_HOMESERVER_URL="<homeserver-url>"
@ -85,3 +85,5 @@ ENABLE_BACKUPS=true
#ALERT_BACKUP_NOT_SUCCESSFULL_ENABLED=true
#ALERT_NODE_DISK_SPACE_ENABLED=true
#ALERT_NODE_MEMORY_USAGE_ENABLED=true
#ALERT_RESTIC_CHECK_FAILED_ENABLED=true
#ALERT_RESTIC_OUTDATED_BACKUP_ENABLED=true

View File

@ -145,7 +145,7 @@ COMPOSE_FILE="$COMPOSE_FILE:compose.matrix-alertmanager-receiver.yml"
2. Insert the matrix access token secret:
```
abra app secret insert monitoring.marx.klasse-methode.it matrix_token v1
abra app secret insert monitoring.marx.klasse-methode.it matrix_access_token v1
```
3. Set required configurations:

114
abra.sh
View File

@ -1,122 +1,28 @@
export ENTRYPOINT_VERSION=v1
export GF_DATASOURCES_VERSION=v1
export GF_DASHBOARDS_VERSION=v2
export GF_SWARM_DASH_VERSION=v2
export GF_STACKS_DASH_VERSION=v2
export GF_TRAEFIK_DASH_VERSION=v2
export GF_BACKUP_DASH_VERSION=v1
export GF_CUSTOM_INI_VERSION=v4
export GRAFANA_DATASOURCES_YML_VERSION=v1
export GRAFANA_DASHBOARDS_YML_VERSION=v2
export GRAFANA_SWARM_DASHBOARD_JSON_VERSION=v2
export GRAFANA_STACKS_DASHBOARD_JSON_VERSION=v2
export GRAFANA_TRAEFIK_DASHBOARD_JSON_VERSION=v2
export GRAFANA_BACKUP_DASHBOARD_JSON_VERSION=v1
export GRAFANA_CUSTOM_INI_VERSION=v4
export PROMTAIL_YML_VERSION=v3
export LOKI_YML_VERSION=v3
export LOKI_YML_VERSION=v2
export PROMETHEUS_YML_VERSION=v2
export MATRIX_ALERTMANAGER_CONFIG_VERSION=e
export MATRIX_ALERTMANAGER_ENTRYPOINT_VERSION=a
export GRAFANA_ALERTS_NODE_VERSION=v1c
export GRAFANA_ALERTS_RESTIC_VERSION=v2
# creates a default prometheus scrape config for a given node
add_node(){
name=$1
add_domain "$name" "metrics.traefik.$name"
add_domain "$name" "$name:8082"
add_domain "$name" "node.monitoring.$name"
add_domain "$name" "cadvisor.monitoring.$name"
cat "/prometheus/scrape_configs/$name.yml"
}
# migrates secrets from old names to new names by reading values from the
# running containers on the server and re-inserting them under the new names.
# preview changes: abra app cmd --local <app> migrate_secret_names
# execute changes: abra app cmd --local <app> migrate_secret_names execute
migrate_secret_names() {
if ! command -v jq &> /dev/null; then
echo "jq is required on your local machine to migrate secret names"
echo "It could not be found in your PATH, please install jq to proceed."
echo "For example: On a debian/ubuntu system, run `apt install jq`"
exit 1
fi
# Hardcoded migration mappings: old_secret_name|new_secret_name
MIGRATIONS="
grafana_admin_password|gf_adminpasswd
grafana_smtp_password|gf_smtp_passwd
grafana_oidc_client_secret|gf_oidc_secret
matrix_access_token|matrix_token
loki_aws_secret_access_key|loki_aws_key
"
# Determine which server the app is deployed on
SERVER=$(abra app ls -m | jq -r --arg domain "$APP_NAME" '[.[].apps[] | select(.domain == $domain) | .server] | first' 2>/dev/null)
if [ -z "$SERVER" ]; then
echo "Error: could not determine server for app '$APP_NAME'"
exit 1
fi
# Build a lookup table of all secrets currently mounted in this stack.
# Each line: <secretID> <containerID> <secretName>
LOOKUP=$(ssh "$SERVER" "
docker stack services ${STACK_NAME} --format '{{.Name}}' | while read svc; do
CID=\$(docker ps --no-trunc -q --filter \"name=\${svc}\" | head -1)
docker service inspect \"\$svc\" --format '{{json .Spec.TaskTemplate.ContainerSpec.Secrets}}' | \
jq -r --arg cid \"\$CID\" '.[]? | .SecretID + \" \" + \$cid + \" \" + .SecretName'
done | sort -k3 -r
" 2>/dev/null)
echo "Secret migration plan for: $APP_NAME (server: $SERVER)"
echo ""
printf " %-24s %-8s %s\n" "OLD NAME" "FOUND" "ACTION"
printf " %-24s %-8s %s\n" "--------" "-----" "------"
# Check each old name against the lookup table and display the plan
ANY_FOUND=false
while IFS='|' read -r OLD_NAME NEW_NAME; do
[ -z "$OLD_NAME" ] && continue
MATCH=$(echo "$LOOKUP" | grep " ${STACK_NAME}_${OLD_NAME}_" | head -1)
if [ -n "$MATCH" ]; then
printf " %-24s %-8s %s\n" "$OLD_NAME" "yes" "recreate as '$NEW_NAME' version V1"
ANY_FOUND=true
else
printf " %-24s %-8s %s\n" "$OLD_NAME" "no" "nothing (not found on server)"
fi
done <<< "$MIGRATIONS"
echo ""
if [ "$ANY_FOUND" = false ]; then
echo "No old secrets found on server. Nothing to migrate."
return 0
fi
if [ "$1" != "execute" ]; then
echo "To apply the above changes, run:"
echo " abra app cmd --local $APP_NAME migrate_secret_names execute"
return 0
fi
# read each found secret from its container and re-insert with the new name
while IFS='|' read -r OLD_NAME NEW_NAME; do
[ -z "$OLD_NAME" ] && continue
MATCH=$(echo "$LOOKUP" | grep " ${STACK_NAME}_${OLD_NAME}_" | head -1)
[ -z "$MATCH" ] && continue
SECRET_ID=$(echo "$MATCH" | awk '{print $1}')
CID=$(echo "$MATCH" | awk '{print $2}')
SECRET_VALUE=$(ssh "$SERVER" "cat /var/lib/docker/containers/${CID}/mounts/secrets/${SECRET_ID} 2>/dev/null || sudo cat /var/lib/docker/containers/${CID}/mounts/secrets/${SECRET_ID} 2>/dev/null")
if [ -z "$SECRET_VALUE" ]; then
echo "Error: could not read value for '$OLD_NAME', skipping"
continue
fi
echo "Migrating: '$OLD_NAME' -> '$NEW_NAME' (v1)"
printf '%s' "$SECRET_VALUE" | abra app secret insert -C "$APP_NAME" "$NEW_NAME" v1
done <<< "$MIGRATIONS"
echo ""
echo "Done."
}
# adds a domain to a scrape config or creates a new one
add_domain(){
name=$1

View File

@ -12,7 +12,7 @@ http:
matrix:
homeserver-url: "{{ env "GF_MATRIX_HOMESERVER_URL" }}"
user-id: "{{ env "GF_MATRIX_USER_ID" }}"
access-token: "{{ secret "matrix_token" }}"
access-token: "{{ secret "matrix_access_token" }}"
room-mapping:
matrixroom: "{{ env "GF_MATRIX_ROOM_ID" }}"

191
alerts/restic.yml.tmpl Normal file
View File

@ -0,0 +1,191 @@
apiVersion: 1
deleteRules:
{{ if ne (env "ALERT_RESTIC_CHECK_FAILED_ENABLED") "true" }}
- orgId: 1
uid: ffglj6egxy8e8c
{{ end }}
{{ if ne (env "ALERT_RESTIC_OUTDATED_BACKUP_ENABLED") "true" }}
- orgId: 1
uid: ffgljntkp9ce8b
{{ end }}
groups:
- orgId: 1
name: restic
folder: restic
interval: 5m
rules:
{{ if eq (env "ALERT_RESTIC_CHECK_FAILED_ENABLED") "true" }}
- uid: ffglj6egxy8e8c
title: Restic Check Failed
condition: C
data:
- refId: A
relativeTimeRange:
from: 600
to: 0
datasourceUid: prometheus
model:
disableTextWrap: false
editorMode: builder
expr: restic_check_success
fullMetaSearch: false
includeNullMetadata: true
instant: true
intervalMs: 1000
legendFormat: __auto
maxDataPoints: 43200
range: false
refId: A
useBackend: false
- refId: B
relativeTimeRange:
from: 600
to: 0
datasourceUid: __expr__
model:
conditions:
- evaluator:
params: []
type: gt
operator:
type: and
query:
params:
- B
reducer:
params: []
type: last
type: query
datasource:
type: __expr__
uid: __expr__
expression: A
intervalMs: 1000
maxDataPoints: 43200
reducer: last
refId: B
type: reduce
- refId: C
relativeTimeRange:
from: 600
to: 0
datasourceUid: __expr__
model:
conditions:
- evaluator:
params:
- 1
- 0
type: lt
operator:
type: and
query:
params:
- C
reducer:
params: []
type: last
type: query
datasource:
type: __expr__
uid: __expr__
expression: B
intervalMs: 1000
maxDataPoints: 43200
refId: C
type: threshold
noDataState: Alerting
execErrState: Error
for: 5m
annotations: {}
labels: {}
isPaused: false
{{ end }}
{{ if eq (env "ALERT_RESTIC_OUTDATED_BACKUP_ENABLED") "true" }}
- uid: ffgljntkp9ce8b
title: Restic Outdated Backup
condition: C
data:
- refId: A
relativeTimeRange:
from: 600
to: 0
datasourceUid: prometheus
model:
disableTextWrap: false
editorMode: builder
expr: time() - max by(instance) (restic_backup_timestamp)
fullMetaSearch: false
includeNullMetadata: true
instant: true
intervalMs: 1000
legendFormat: __auto
maxDataPoints: 43200
range: false
refId: A
useBackend: false
- refId: B
relativeTimeRange:
from: 600
to: 0
datasourceUid: __expr__
model:
conditions:
- evaluator:
params: []
type: gt
operator:
type: and
query:
params:
- B
reducer:
params: []
type: last
type: query
datasource:
type: __expr__
uid: __expr__
expression: A
intervalMs: 1000
maxDataPoints: 43200
reducer: last
refId: B
type: reduce
- refId: C
relativeTimeRange:
from: 600
to: 0
datasourceUid: __expr__
model:
conditions:
- evaluator:
params:
- 93600
type: gt
operator:
type: and
query:
params:
- C
reducer:
params: []
type: last
type: query
datasource:
type: __expr__
uid: __expr__
expression: B
intervalMs: 1000
maxDataPoints: 43200
refId: C
type: threshold
noDataState: NoData
execErrState: Error
for: 5m
annotations: {}
labels: {}
isPaused: false
{{ end }}

View File

@ -3,7 +3,7 @@ version: '3.8'
services:
grafana:
secrets:
- gf_oidc_secret
- grafana_oidc_client_secret
environment:
- OIDC_API_URL
- OIDC_AUTH_URL
@ -12,6 +12,6 @@ services:
- OIDC_TOKEN_URL
secrets:
gf_oidc_secret:
grafana_oidc_client_secret:
external: true
name: ${STACK_NAME}_gf_oidc_secret_${SECRET_GF_OIDC_SECRET_VERSION}
name: ${STACK_NAME}_grafana_oidc_client_secret_${SECRET_GRAFANA_OIDC_CLIENT_SECRET_VERSION}

View File

@ -3,16 +3,16 @@ version: '3.8'
services:
grafana:
secrets:
- gf_smtp_passwd
- grafana_smtp_password
environment:
- GF_SMTP_HOST
- GF_SMTP_USER
- GF_SMTP_PASSWORD__FILE=/run/secrets/gf_smtp_passwd
- GF_SMTP_PASSWORD__FILE=/run/secrets/grafana_smtp_password
- GF_SMTP_ENABLED
- GF_SMTP_FROM_ADDRESS
- GF_SMTP_SKIP_VERIFY
secrets:
gf_smtp_passwd:
grafana_smtp_password:
external: true
name: ${STACK_NAME}_gf_smtp_passwd_${SECRET_GF_SMTP_PASSWD_VERSION}
name: ${STACK_NAME}_grafana_smtp_password_${SECRET_GRAFANA_SMTP_PASSWORD_VERSION}

View File

@ -2,44 +2,49 @@ version: '3.8'
services:
grafana:
image: grafana/grafana:12.4.0
image: grafana/grafana:10.4.14
volumes:
- grafana-data:/var/lib/grafana:rw
secrets:
- gf_adminpasswd
- grafana_admin_password
configs:
- source: gf_custom_ini
- source: grafana_custom_ini
target: /etc/grafana/grafana.ini
- source: gf_datasources
- source: grafana_datasources_yml
target: /etc/grafana/provisioning/datasources/datasources.yml
- source: gf_dashboards
- source: grafana_dashboards_yml
target: /etc/grafana/provisioning/dashboards/dashboards.yml
- source: gf_swarm_dash
- source: grafana_swarm_dashboard_json
target: /var/lib/grafana/dashboards/docker-swarm-nodes.json
- source: gf_stacks_dash
- source: grafana_stacks_dashboard_json
target: /var/lib/grafana/dashboards/docker-swarm-stacks.json
- source: gf_traefik_dash
- source: grafana_traefik_dashboard_json
target: /var/lib/grafana/dashboards/traefik.json
- source: gf_backup_dash
- source: grafana_backup_dashboard_json
target: /var/lib/grafana/dashboards/backup.json
- source: gf_alerts_node
target: /etc/grafana/provisioning/alerting/node.yml
- source: gf_alerts_restic
target: /etc/grafana/provisioning/alerting/restic.yml
networks:
- proxy
- internal
environment:
- GF_SERVER_ROOT_URL
- GF_SECURITY_ADMIN_PASSWORD__FILE=/run/secrets/gf_adminpasswd
- GF_SECURITY_ADMIN_PASSWORD__FILE=/run/secrets/grafana_admin_password
- GF_SECURITY_ALLOW_EMBEDDING
- GF_INSTALL_PLUGINS
- ALERT_NODE_DISK_SPACE_ENABLED
- ALERT_NODE_MEMORY_USAGE_ENABLED
- ALERT_RESTIC_CHECK_FAILED_ENABLED
- ALERT_RESTIC_OUTDATED_BACKUP_ENABLED
- DOMAIN
deploy:
labels:
- "traefik.enable=true"
- "traefik.docker.network=proxy"
- "traefik.http.services.${STACK_NAME}-grafana.loadbalancer.server.port=3000"
- "traefik.http.routers.${STACK_NAME}-grafana.rule=Host(`${GRAFANA_DOMAIN:-$DOMAIN}`)"
- "traefik.http.routers.${STACK_NAME}-grafana.rule=Host(`${GRAFANA_DOMAIN}`)"
- "traefik.http.routers.${STACK_NAME}-grafana.entrypoints=web-secure"
- "traefik.http.routers.${STACK_NAME}-grafana.tls=true"
- "traefik.http.routers.${STACK_NAME}-grafana.tls.certresolver=${LETS_ENCRYPT_ENV}"
@ -51,38 +56,42 @@ services:
start_period: 10s
configs:
gf_custom_ini:
grafana_custom_ini:
template_driver: golang
name: ${STACK_NAME}_gf_custom_ini_${GF_CUSTOM_INI_VERSION}
name: ${STACK_NAME}_grafana_custom_ini_${GRAFANA_CUSTOM_INI_VERSION}
file: grafana_custom.ini
gf_datasources:
name: ${STACK_NAME}_gf_datasources_${GF_DATASOURCES_VERSION}
grafana_datasources_yml:
name: ${STACK_NAME}_g_datasources_yml_${GRAFANA_DATASOURCES_YML_VERSION}
file: grafana-datasources.yml
gf_dashboards:
name: ${STACK_NAME}_gf_dashboards_${GF_DASHBOARDS_VERSION}
grafana_dashboards_yml:
name: ${STACK_NAME}_g_dashboards_yml_${GRAFANA_DASHBOARDS_YML_VERSION}
file: grafana-dashboards.yml
gf_swarm_dash:
name: ${STACK_NAME}_gf_swarm_dash_${GF_SWARM_DASH_VERSION}
grafana_swarm_dashboard_json:
name: ${STACK_NAME}_g_swarm_dashboard_json_${GRAFANA_SWARM_DASHBOARD_JSON_VERSION}
file: grafana-swarm-dashboard.json
gf_stacks_dash:
name: ${STACK_NAME}_gf_stacks_dash_${GF_STACKS_DASH_VERSION}
grafana_stacks_dashboard_json:
name: ${STACK_NAME}_g_stacks_dashboard_json_${GRAFANA_STACKS_DASHBOARD_JSON_VERSION}
file: grafana-stacks-dashboard.json
gf_traefik_dash:
name: ${STACK_NAME}_gf_traefik_dash_${GF_TRAEFIK_DASH_VERSION}
grafana_traefik_dashboard_json:
name: ${STACK_NAME}_g_traefik_dashboard_json_${GRAFANA_TRAEFIK_DASHBOARD_JSON_VERSION}
file: grafana-traefik-dashboard.json
gf_backup_dash:
name: ${STACK_NAME}_gf_backup_dash_${GF_BACKUP_DASH_VERSION}
grafana_backup_dashboard_json:
name: ${STACK_NAME}_g_backup_dashboard_json_${GRAFANA_BACKUP_DASHBOARD_JSON_VERSION}
file: grafana-backup-dashboard.json
gf_alerts_node:
template_driver: golang
name: ${STACK_NAME}_gf_alerts_node_${GRAFANA_ALERTS_NODE_VERSION}
file: alerts/node.yml.tmpl
gf_alerts_restic:
template_driver: golang
name: ${STACK_NAME}_gf_alerts_restiv_${GRAFANA_ALERTS_RESTIC_VERSION}
file: alerts/restic.yml.tmpl
volumes:
grafana-data:
secrets:
gf_adminpasswd:
grafana_admin_password:
external: true
name: ${STACK_NAME}_gf_adminpasswd_${SECRET_GF_ADMINPASSWD_VERSION}
name: ${STACK_NAME}_grafana_admin_password_${SECRET_GRAFANA_ADMIN_PASSWORD_VERSION}

View File

@ -2,7 +2,7 @@ version: '3.8'
services:
loki:
image: grafana/loki:3.6.7
image: grafana/loki:2.9.11
command: -config.file=/etc/loki/local-config.yaml
networks:
- proxy
@ -12,7 +12,7 @@ services:
volumes:
- loki-data:/loki
# secrets:
# - loki_aws_key
# - loki_aws_secret_access_key
environment:
- LOKI_ACCESS_KEY_ID
- LOKI_AWS_ENDPOINT
@ -47,6 +47,6 @@ volumes:
loki-data:
# secrets:
# loki_aws_key:
# loki_aws_secret_access_key:
# external: true
# name: ${STACK_NAME}_loki_aws_key_${SECRET_LOKI_AWS_KEY_VERSION}
# name: ${STACK_NAME}_loki_aws_secret_access_key_${SECRET_LOKI_AWS_SECRET_ACCESS_KEY_VERSION}

View File

@ -2,9 +2,9 @@ version: '3.8'
services:
matrix-alertmanager-receiver:
image: metio/matrix-alertmanager-receiver:2026.2.25
image: metio/matrix-alertmanager-receiver:2025.2.9
secrets:
- matrix_token
- matrix_access_token
configs:
- source: matrix-alertmanager-receiver-config
target: /etc/matrix-alertmanager-receiver/config.yml
@ -23,6 +23,6 @@ configs:
file: alertmanager-matrix-config.yml.tmpl
secrets:
matrix_token:
matrix_access_token:
external: true
name: ${STACK_NAME}_matrix_token_${SECRET_MATRIX_TOKEN_VERSION}
name: ${STACK_NAME}_matrix_access_token_${SECRET_MATRIX_ACCESS_TOKEN_VERSION}

View File

@ -2,7 +2,7 @@ version: '3.8'
services:
prometheus:
image: prom/prometheus:v3.10.0
image: prom/prometheus:v2.55.1
secrets:
- basic_auth
volumes:
@ -39,3 +39,8 @@ configs:
volumes:
prometheus-data:
secrets:
basic_auth:
external: true
name: ${STACK_NAME}_basic_auth_${SECRET_BASIC_AUTH_VERSION}

View File

@ -2,7 +2,7 @@ version: "3.8"
services:
promtail:
image: grafana/promtail:3.6.7
image: grafana/promtail:2.9.11
volumes:
- /var/log:/var/log:ro
- /var/run/docker.sock:/var/run/docker.sock
@ -27,4 +27,4 @@ configs:
secrets:
basic_auth:
external: true
name: ${STACK_NAME}_basic_auth_${SECRET_BASIC_AUTH_VERSION}
name: ${STACK_NAME}_basic_auth_${SECRET_BASIC_AUTH_VERSION}

View File

@ -2,7 +2,7 @@ version: '3.8'
services:
pushgateway:
image: prom/pushgateway:v1.11.2
image: prom/pushgateway:v1.10.0
command:
- '--web.listen-address=:9191'
- '--push.disable-consistency-check'

View File

@ -3,7 +3,7 @@ version: "3.8"
services:
app:
image: prom/node-exporter:v1.10.2
image: prom/node-exporter:v1.8.1
user: root
environment:
- NODE_ID={{.Node.ID}}
@ -43,7 +43,7 @@ services:
- "coop-cloud.${STACK_NAME}.timeout=${TIMEOUT}"
cadvisor:
image: gcr.io/cadvisor/cadvisor:v0.55.1
image: gcr.io/cadvisor/cadvisor:v0.49.2
command:
- "-logtostderr"
- "--enable_metrics=cpu,cpuLoad,disk,diskIO,process,memory,network"

View File

@ -11,13 +11,3 @@ providers:
options:
path: /var/lib/grafana/dashboards
foldersFromFilesStructure: true
- name: 'default-alert-provider'
orgId: 1
folder: 'default-alerts'
type: file
disableDeletion: false
updateIntervalSeconds: 10
allowUiUpdates: true
options:
path: /var/lib/grafana/alerts
foldersFromFilesStructure: true

View File

@ -3,6 +3,7 @@ apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
uid: prometheus
access: proxy
orgId: 1
url: http://prometheus:9090
@ -10,6 +11,7 @@ datasources:
editable: false
- name: Loki
type: loki
uid: loki
access: proxy
orgId: 1
url: http://loki:3100

View File

@ -21,7 +21,7 @@ tls_skip_verify_insecure = false
allow_sign_up = true
auto_login = true
client_id = {{ env "OIDC_CLIENT_ID" }}
client_secret = {{ secret "gf_oidc_secret" }}
client_secret = {{ secret "grafana_oidc_client_secret" }}
auth_url = {{ env "OIDC_AUTH_URL" }}
token_url = {{ env "OIDC_TOKEN_URL" }}
api_url = {{ env "OIDC_API_URL" }}

View File

@ -34,6 +34,7 @@ ingester:
max_chunk_age: 1h # All chunks will be flushed when they hit this age, default is 1h
chunk_target_size: 1048576 # Loki will attempt to build chunks up to 1.5MB, flushing first if chunk_idle_period or max_chunk_age is reached first
chunk_retain_period: 30s # Must be greater than index read cache TTL if using an index cache (Default index read cache TTL is 5m)
max_transfer_retries: 0 # Chunk transfers disabled
wal:
dir: "/tmp/wal"
@ -52,7 +53,7 @@ schema_config:
- from: 2020-10-24
store: boltdb-shipper
object_store: filesystem
schema: v13
schema: v11
index:
prefix: index_
period: 24h
@ -62,6 +63,7 @@ storage_config:
active_index_directory: /loki/boltdb-shipper-active
cache_location: /loki/boltdb-shipper-cache
cache_ttl: 24h # Can be increased for faster performance over longer query periods, uses more disk space
shared_store: filesystem
filesystem:
directory: /loki/chunks
{{ end }}
@ -70,6 +72,7 @@ schema_config:
configs:
- from: 2020-11-25
store: boltdb-shipper
object_store: aws
schema: v11
index:
prefix: index_
@ -86,7 +89,7 @@ storage_config:
endpoint: {{ env "LOKI_AWS_ENDPOINT" }}
region: {{ env "LOKI_AWS_REGION" }}
access_key_id: {{ env "LOKI_ACCESS_KEY_ID" }}
secret_access_key: {{ secret "loki_aws_key" }}
secret_access_key: {{ secret "loki_aws_secret_access_key" }}
bucketnames: {{ env "LOKI_BUCKET_NAMES" }}
insecure: false
sse_encryption: false
@ -100,24 +103,19 @@ storage_config:
compactor:
working_directory: /loki/boltdb-shipper-compactor
shared_store: filesystem
compaction_interval: 10m
retention_enabled: true
retention_delete_delay: 2h
retention_delete_worker_count: 150
{{ if eq (env "LOKI_STORAGE_FILESYSTEM") "1" }}
delete_request_store: filesystem
{{ end }}
{{ if eq (env "LOKI_STORAGE_S3") "1" }}
delete_request_store: aws
{{ end }}
limits_config:
enforce_metric_name: false
reject_old_samples: true
reject_old_samples_max_age: 168h
retention_period: {{ env "LOKI_RETENTION_PERIOD" }}
split_queries_by_interval: 24h
max_query_parallelism: 100
allow_structured_metadata: false
query_scheduler:
max_outstanding_requests_per_tenant: 4096
@ -125,6 +123,9 @@ query_scheduler:
frontend:
max_outstanding_per_tenant: 4096
chunk_store_config:
max_look_back_period: 0s
table_manager:
retention_deletes_enabled: false
retention_period: 0s

View File

@ -5,8 +5,3 @@ COMPOSE_FILE="$COMPOSE_FILE:compose.grafana-oidc.yml"
2. SMTP was moved into a seperate compose file. If you have smtp configured you need to add the following line to you .env file:
COMPOSE_FILE="$COMPOSE_FILE:compose.grafana-smtp.yml"
3. The scrape-config.example.yml file and add_node() command were updated to use a secure endpoint for the traefik metrics instead of http. This requires an updated Traefik recipe that publishes the metrics on https.
4. Secret and config names were shortened to max 14 characters to prevent going over Docker's 64 character limit when STACK_NAME and VERSION are added to it.
When upgrading, you need to reinsert the secrets with their shorter names. Run `abra app secret list <domain>` to see which secrets aren't created on the server (because their name was shortened) and run `abra app secret insert <domain> <secret_name> v1 <value>` to reinsert them with the shorter name. Or you can use the migrate_secret_names function in abra.sh to reinsert all existing secrets with their shorter name automatically: `abra app cmd --local <domain> migrate_secret_names`

View File

@ -1,4 +1,4 @@
- targets:
- 'metrics.traefik.example.org'
- 'example.org:8082'
- 'node.monitoring.example.org'
- 'cadvisor.monitoring.example.org'