WIP: BREAKING CHANGES: replace promtail and cadvisor for alloy #21
@ -19,6 +19,7 @@ ENABLE_BACKUPS=true
|
||||
#
|
||||
## Prometheus
|
||||
# COMPOSE_FILE="$COMPOSE_FILE:compose.prometheus.yml"
|
||||
# PROMETHEUS_REMOTE_WRITE_URL=https://prometheus.$DOMAIN/api/v1/write
|
||||
# PROMETHEUS_RETENTION_TIME=1y
|
||||
#
|
||||
## Prometheus Pushgateway
|
||||
|
||||
1
abra.sh
1
abra.sh
@ -12,6 +12,7 @@ export PROMETHEUS_YML_VERSION=v2
|
||||
export MATRIX_ALERTMANAGER_CONFIG_VERSION=e
|
||||
export MATRIX_ALERTMANAGER_ENTRYPOINT_VERSION=a
|
||||
export GRAFANA_ALERTS_NODE_VERSION=v1c
|
||||
export CONFIG_ALLOY_VERSION=v5
|
||||
|
||||
# creates a default prometheus scrape config for a given node
|
||||
add_node(){
|
||||
|
||||
@ -16,6 +16,8 @@ services:
|
||||
- "--web.console.libraries=/usr/share/prometheus/console_libraries"
|
||||
- "--web.console.templates=/usr/share/prometheus/consoles"
|
||||
- "--storage.tsdb.retention.time=${PROMETHEUS_RETENTION_TIME}"
|
||||
- "--enable-feature=remote-write-receiver"
|
||||
- "--web.enable-remote-write-receiver"
|
||||
networks:
|
||||
- proxy
|
||||
- internal
|
||||
|
||||
96
compose.yml
96
compose.yml
@ -3,89 +3,41 @@ version: "3.8"
|
||||
|
||||
services:
|
||||
app:
|
||||
image: prom/node-exporter:v1.10.2
|
||||
user: root
|
||||
environment:
|
||||
- NODE_ID={{.Node.ID}}
|
||||
volumes:
|
||||
- /proc:/host/proc:ro
|
||||
- /sys:/host/sys:ro
|
||||
- /:/rootfs:ro
|
||||
- /etc/hostname:/etc/nodename:ro
|
||||
command:
|
||||
- "--path.sysfs=/host/sys"
|
||||
- "--path.procfs=/host/proc"
|
||||
- "--path.rootfs=/rootfs"
|
||||
- "--collector.textfile.directory=/etc/node-exporter/"
|
||||
- "--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)"
|
||||
- "--no-collector.ipvs"
|
||||
image: grafana/alloy:v1.16.1
|
||||
hostname: "${DOMAIN}"
|
||||
configs:
|
||||
- source: entrypoint
|
||||
target: /entrypoint.sh
|
||||
- source: config_alloy
|
||||
target: /etc/alloy/config.alloy
|
||||
volumes:
|
||||
- alloy-data:/var/lib/alloy/data
|
||||
- /:/rootfs:ro
|
||||
command:
|
||||
- "run"
|
||||
- "--storage.path=/var/lib/alloy/data"
|
||||
- "/etc/alloy/config.alloy"
|
||||
networks:
|
||||
- internal
|
||||
- proxy
|
||||
entrypoint: [ "/bin/sh", "-e", "/entrypoint.sh" ]
|
||||
secrets:
|
||||
- basic_auth
|
||||
deploy:
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
labels:
|
||||
- "backupbot.backup=${ENABLE_BACKUPS:-true}"
|
||||
- "traefik.enable=true"
|
||||
- "traefik.docker.network=proxy"
|
||||
- "traefik.http.services.${STACK_NAME}-node.loadbalancer.server.port=9100"
|
||||
- "traefik.http.routers.${STACK_NAME}-node.rule=Host(`node.${DOMAIN}`)"
|
||||
- "traefik.http.routers.${STACK_NAME}-node.entrypoints=web-secure"
|
||||
- "traefik.http.routers.${STACK_NAME}-node.tls=true"
|
||||
- "traefik.http.routers.${STACK_NAME}-node.tls.certresolver=${LETS_ENCRYPT_ENV}"
|
||||
- "traefik.http.routers.${STACK_NAME}-node.middlewares=basicauth@file"
|
||||
- "traefik.enable=false"
|
||||
- "coop-cloud.${STACK_NAME}.version=1.6.0+v1.8.1"
|
||||
- "coop-cloud.${STACK_NAME}.timeout=${TIMEOUT}"
|
||||
|
||||
cadvisor:
|
||||
image: gcr.io/cadvisor/cadvisor:v0.55.1
|
||||
command:
|
||||
- "-logtostderr"
|
||||
- "--enable_metrics=cpu,cpuLoad,disk,diskIO,process,memory,network"
|
||||
# all possible metrics: advtcp,app,cpu,cpuLoad,cpu_topology,cpuset,disk,diskIO,hugetlb,memory,memory_numa,network,oom_event,percpu,perf_event,process,referenced_memory,resctrl,sched,tcp,udp.
|
||||
- "--housekeeping_interval=120s"
|
||||
- "--docker_only=true"
|
||||
volumes:
|
||||
- /var/lib/docker/:/var/lib/docker:ro
|
||||
- /dev/disk/:/dev/disk:ro
|
||||
- /sys:/sys:ro
|
||||
- /var/run:/var/run:ro
|
||||
- /:/rootfs:ro
|
||||
networks:
|
||||
- internal
|
||||
- proxy
|
||||
deploy:
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.docker.network=proxy"
|
||||
- "traefik.http.services.${STACK_NAME}-cadvisor.loadbalancer.server.port=8080"
|
||||
- "traefik.http.routers.${STACK_NAME}-cadvisor.rule=Host(`cadvisor.${DOMAIN}`)"
|
||||
- "traefik.http.routers.${STACK_NAME}-cadvisor.entrypoints=web-secure"
|
||||
- "traefik.http.routers.${STACK_NAME}-cadvisor.tls=true"
|
||||
- "traefik.http.routers.${STACK_NAME}-cadvisor.tls.certresolver=${LETS_ENCRYPT_ENV}"
|
||||
- "traefik.http.routers.${STACK_NAME}-cadvisor.middlewares=basicauth@file"
|
||||
healthcheck:
|
||||
test: wget --quiet --tries=1 --spider http://localhost:8080/healthz || exit 1
|
||||
interval: 15s
|
||||
timeout: 15s
|
||||
retries: 5
|
||||
start_period: 30s
|
||||
|
||||
configs:
|
||||
entrypoint:
|
||||
name: ${STACK_NAME}_entrypoint_${ENTRYPOINT_VERSION}
|
||||
file: node-exporter-entrypoint.sh
|
||||
|
||||
|
||||
|
||||
config_alloy:
|
||||
template_driver: golang
|
||||
name: ${STACK_NAME}_config_alloy_${CONFIG_ALLOY_VERSION}
|
||||
file: config.alloy.tmpl
|
||||
networks:
|
||||
proxy:
|
||||
external: true
|
||||
internal:
|
||||
volumes:
|
||||
alloy-data:
|
||||
secrets:
|
||||
basic_auth:
|
||||
external: true
|
||||
name: ${STACK_NAME}_basic_auth_${SECRET_BASIC_AUTH_VERSION}
|
||||
|
||||
36
config.alloy.tmpl
Normal file
36
config.alloy.tmpl
Normal file
@ -0,0 +1,36 @@
|
||||
logging {
|
||||
level = "info"
|
||||
format = "logfmt"
|
||||
}
|
||||
|
||||
prometheus.exporter.cadvisor "docker" {
|
||||
}
|
||||
|
||||
prometheus.exporter.unix "default" {
|
||||
include_exporter_metrics = true
|
||||
rootfs_path = "/rootfs"
|
||||
}
|
||||
|
||||
prometheus.scrape "default" {
|
||||
targets = array.concat(
|
||||
[{
|
||||
job = "alloy",
|
||||
__address__ = "127.0.0.1:12345",
|
||||
}],
|
||||
prometheus.exporter.unix.default.targets,
|
||||
prometheus.exporter.cadvisor.docker.targets,
|
||||
)
|
||||
|
||||
forward_to = [prometheus.remote_write.prometheus.receiver]
|
||||
}
|
||||
|
||||
prometheus.remote_write "prometheus" {
|
||||
endpoint {
|
||||
url = "{{ env "PROMETHEUS_REMOTE_WRITE_URL" }}"
|
||||
|
||||
basic_auth {
|
||||
username = "admin"
|
||||
password = "{{ secret "basic_auth" }}"
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user