1 Commits

Author SHA1 Message Date
d269ee8084 some small changes 2024-02-13 16:43:28 +01:00
10 changed files with 22 additions and 65 deletions

View File

@ -1,9 +1,8 @@
TYPE=monitoring-ng
LETS_ENCRYPT_ENV=production
COMPOSE_FILE=compose.yml
DOMAIN=monitoring.example.com
DOMAIN=monitoring-ng.example.com
TIMEOUT=120
ENABLE_BACKUPS=true
## Enable this secret for Promtail / Prometheus
# SECRET_BASIC_AUTH_VERSION=v1
@ -17,10 +16,6 @@ ENABLE_BACKUPS=true
## Prometheus
# COMPOSE_FILE="$COMPOSE_FILE:compose.prometheus.yml"
# PROMETHEUS_RETENTION_TIME=1y
#
## Prometheus Pushgateway
# COMPOSE_FILE="$COMPOSE_FILE:compose.pushgateway.yml"
#
## Loki
# Loki Server
#
@ -41,16 +36,16 @@ ENABLE_BACKUPS=true
## Grafana
#
# COMPOSE_FILE="$COMPOSE_FILE:compose.grafana.yml"
# GF_SERVER_ROOT_URL=https://monitoring.example.com
# GF_SERVER_ROOT_URL=https://${DOMAIN}
# SECRET_GRAFANA_ADMIN_PASSWORD_VERSION=v1
#
## Single-Sign-On with OIDC
# OIDC_ENABLED=1
# SECRET_GRAFANA_OIDC_CLIENT_SECRET_VERSION=v1
# OIDC_CLIENT_ID=grafana
# OIDC_AUTH_URL="https://authentik.example.com/application/o/authorize/"
# OIDC_API_URL="https://authentik.example.com/application/o/userinfo/"
# OIDC_TOKEN_URL="https://authentik.example.com/application/o/token/"
# OIDC_AUTH_URL="https://sso.example.com/auth/realms/autonomic/protocol/openid-connect/auth"
# OIDC_API_URL="https://sso.example.com/auth/realms/autonomic/protocol/openid-connect/userinfo"
# OIDC_TOKEN_URL="https://sso.example.com/auth/realms/autonomic/protocol/openid-connect/token"
#
## Additional grafana settings (unlikely to require editing)
# GF_SECURITY_ALLOW_EMBEDDING=1

View File

@ -36,7 +36,7 @@ Where gathering.org is the node you want to gather metrics from.
SECRET_USERSFILE_VERSION=v1
```
- Generate userslist with httpasswd hashed password
`abra app secret insert traefik.gathering.org userslist v1 'admin:<hashed-secret>'`
`abra app secret insert traefik.gathering.org usersfile v1 'admin:<hashed-secret>'`
make sure there is no whitespace in between `admin:<hashed-secret>`, it seems to break stuff...
- `abra app deploy -f traefik`
1. `abra app new monitoring-ng`
@ -54,16 +54,16 @@ Where gathering.org is the node you want to gather metrics from.
- monitoring.example.org
- prometheus.monitoring.example.org
- loki.monitoring.example.org
2. Setup monitoring stack
1. Setup monitoring stack
- `abra app new monitoring-ng`
- `abra app config monitoring.example.org`
Uncomment all the stuff
- `abra app secret insert monitoring.example.org basic_auth v1 <secret>`
- `abra app secret insert monitoring.example.org basic_auth_admin_password v1 <secret>`
this needs the plaintext traefik basic-auth secret, not the hashed one!
- `abra app secret ls monitoring.example.org`
- `abra app deploy monitoring.example.org`
3. Add scrape config to prometheus
- `abra app cmd monitoring.example.org prometheus gathering.org`
1. add scrape config to prometheus
- `abra app cmd monitoring.example.org prometheus add_node gathering.org`
- or manually
```
cp scrape-config.example.yml gathering.org.yml
@ -101,18 +101,8 @@ $ echo '{
$ systemctl restart docker.service
```
## Setup Push Gateway
1. Enable in the env fiöle by uncommenting the following lines:
```
## Prometheus Pushgateway
# COMPOSE_FILE="$COMPOSE_FILE:compose.pushgateway.yml"
```
2. `abra app deploy monitoring.example.org`
This will expose the pushgateway at `https://pushgateway.${DOMAIN}`.
It is secured behind the same basic auth as the other services.
After that you need to add the `pushgateway.${DOMAIN}` to the scare config.
## Post-setup guide

View File

@ -12,7 +12,6 @@ export PROMETHEUS_YML_VERSION=v2
# creates a default prometheus scrape config for a given node
add_node(){
name=$1
add_domain "$name" "$name:8082"
add_domain "$name" "node.monitoring.$name"
add_domain "$name" "cadvisor.monitoring.$name"
cat "/prometheus/scrape_configs/$name.yml"

View File

@ -2,7 +2,7 @@ version: '3.8'
services:
grafana:
image: grafana/grafana:10.4.14
image: grafana/grafana:10.2.3
volumes:
- grafana-data:/var/lib/grafana:rw
secrets:
@ -62,19 +62,19 @@ configs:
name: ${STACK_NAME}_grafana_custom_ini_${GRAFANA_CUSTOM_INI_VERSION}
file: grafana_custom.ini
grafana_datasources_yml:
name: ${STACK_NAME}_g_datasources_yml_${GRAFANA_DATASOURCES_YML_VERSION}
name: ${STACK_NAME}_grafana_datasources_yml_${GRAFANA_DATASOURCES_YML_VERSION}
file: grafana-datasources.yml
grafana_dashboards_yml:
name: ${STACK_NAME}_g_dashboards_yml_${GRAFANA_DASHBOARDS_YML_VERSION}
name: ${STACK_NAME}_grafana_dashboards_yml_${GRAFANA_DASHBOARDS_YML_VERSION}
file: grafana-dashboards.yml
grafana_swarm_dashboard_json:
name: ${STACK_NAME}_g_swarm_dashboard_json_${GRAFANA_SWARM_DASHBOARD_JSON_VERSION}
name: ${STACK_NAME}_grafana_swarm_dashboard_json_${GRAFANA_SWARM_DASHBOARD_JSON_VERSION}
file: grafana-swarm-dashboard.json
grafana_stacks_dashboard_json:
name: ${STACK_NAME}_g_stacks_dashboard_json_${GRAFANA_STACKS_DASHBOARD_JSON_VERSION}
name: ${STACK_NAME}_grafana_stacks_dashboard_json_${GRAFANA_STACKS_DASHBOARD_JSON_VERSION}
file: grafana-stacks-dashboard.json
grafana_traefik_dashboard_json:
name: ${STACK_NAME}_g_traefik_dashboard_json_${GRAFANA_TRAEFIK_DASHBOARD_JSON_VERSION}
name: ${STACK_NAME}_grafana_traefik_dashboard_json_${GRAFANA_TRAEFIK_DASHBOARD_JSON_VERSION}
file: grafana-traefik-dashboard.json
volumes:

View File

@ -2,7 +2,7 @@ version: '3.8'
services:
loki:
image: grafana/loki:2.9.11
image: grafana/loki:2.9.3
command: -config.file=/etc/loki/local-config.yaml
networks:
- proxy

View File

@ -2,7 +2,7 @@ version: '3.8'
services:
prometheus:
image: prom/prometheus:v2.55.1
image: prom/prometheus:v2.48.1
secrets:
- basic_auth
volumes:

View File

@ -2,7 +2,7 @@ version: "3.8"
services:
promtail:
image: grafana/promtail:2.9.11
image: grafana/promtail:2.9.3
volumes:
- /var/log:/var/log:ro
- /var/run/docker.sock:/var/run/docker.sock

View File

@ -1,25 +0,0 @@
version: '3.8'
services:
pushgateway:
image: prom/pushgateway:v1.10.0
command:
- '--web.listen-address=:9191'
- '--push.disable-consistency-check'
- '--persistence.interval=5m'
ports:
- 9191:9191
networks:
- internal
- proxy
deploy:
restart_policy:
condition: on-failure
labels:
- "traefik.enable=true"
- "traefik.http.services.${STACK_NAME}-pushgateway.loadbalancer.server.port=9191"
- "traefik.http.routers.${STACK_NAME}-pushgateway.rule=Host(`pushgateway.${DOMAIN}`)"
- "traefik.http.routers.${STACK_NAME}-pushgateway.entrypoints=web-secure"
- "traefik.http.routers.${STACK_NAME}-pushgateway.tls=true"
- "traefik.http.routers.${STACK_NAME}-pushgateway.tls.certresolver=${LETS_ENCRYPT_ENV}"
- "traefik.http.routers.${STACK_NAME}-pushgateway.middlewares=basicauth@file"

View File

@ -3,7 +3,7 @@ version: "3.8"
services:
app:
image: prom/node-exporter:v1.8.1
image: prom/node-exporter:v1.7.0
user: root
environment:
- NODE_ID={{.Node.ID}}
@ -30,7 +30,6 @@ services:
restart_policy:
condition: on-failure
labels:
- "backupbot.backup=${ENABLE_BACKUPS:-true}"
- "traefik.enable=true"
- "traefik.http.services.${STACK_NAME}-node.loadbalancer.server.port=9100"
- "traefik.http.routers.${STACK_NAME}-node.rule=Host(`node.${DOMAIN}`)"
@ -38,11 +37,11 @@ services:
- "traefik.http.routers.${STACK_NAME}-node.tls=true"
- "traefik.http.routers.${STACK_NAME}-node.tls.certresolver=${LETS_ENCRYPT_ENV}"
- "traefik.http.routers.${STACK_NAME}-node.middlewares=basicauth@file"
- "coop-cloud.${STACK_NAME}.version=1.3.0+v1.8.1"
- "coop-cloud.${STACK_NAME}.version=1.0.0+v1.7.0"
- "coop-cloud.${STACK_NAME}.timeout=${TIMEOUT:-120}"
cadvisor:
image: gcr.io/cadvisor/cadvisor:v0.49.2
image: gcr.io/cadvisor/cadvisor:v0.47.2
command:
- "-logtostderr"
- "--enable_metrics=cpu,cpuLoad,disk,diskIO,process,memory,network"

View File

@ -1,4 +1,3 @@
- targets:
- 'example.org:8082'
- 'node.monitoring.example.org'
- 'cadvisor.monitoring.example.org'