WIP: BREAKING CHANGES: replace promtail and cadvisor for alloy #21

Draft
fauno wants to merge 8 commits from alloy into main
5 changed files with 64 additions and 72 deletions

View File

@ -19,6 +19,7 @@ ENABLE_BACKUPS=true
#
## Prometheus
# COMPOSE_FILE="$COMPOSE_FILE:compose.prometheus.yml"
# PROMETHEUS_REMOTE_WRITE_URL=https://prometheus.$DOMAIN/api/v1/write
# PROMETHEUS_RETENTION_TIME=1y
#
## Prometheus Pushgateway

View File

@ -12,6 +12,7 @@ export PROMETHEUS_YML_VERSION=v2
export MATRIX_ALERTMANAGER_CONFIG_VERSION=e
export MATRIX_ALERTMANAGER_ENTRYPOINT_VERSION=a
export GRAFANA_ALERTS_NODE_VERSION=v1c
export CONFIG_ALLOY_VERSION=v5
# creates a default prometheus scrape config for a given node
add_node(){

View File

@ -16,6 +16,8 @@ services:
- "--web.console.libraries=/usr/share/prometheus/console_libraries"
- "--web.console.templates=/usr/share/prometheus/consoles"
- "--storage.tsdb.retention.time=${PROMETHEUS_RETENTION_TIME}"
- "--enable-feature=remote-write-receiver"
- "--web.enable-remote-write-receiver"
networks:
- proxy
- internal

View File

@ -3,89 +3,41 @@ version: "3.8"
services:
app:
image: prom/node-exporter:v1.10.2
user: root
environment:
- NODE_ID={{.Node.ID}}
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
- /etc/hostname:/etc/nodename:ro
command:
- "--path.sysfs=/host/sys"
- "--path.procfs=/host/proc"
- "--path.rootfs=/rootfs"
- "--collector.textfile.directory=/etc/node-exporter/"
- "--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)"
- "--no-collector.ipvs"
image: grafana/alloy:v1.16.1
hostname: "${DOMAIN}"
configs:
- source: entrypoint
target: /entrypoint.sh
- source: config_alloy
target: /etc/alloy/config.alloy
volumes:
- alloy-data:/var/lib/alloy/data
- /:/rootfs:ro
command:
- "run"
- "--storage.path=/var/lib/alloy/data"
- "/etc/alloy/config.alloy"
networks:
- internal
- proxy
entrypoint: [ "/bin/sh", "-e", "/entrypoint.sh" ]
secrets:
- basic_auth
deploy:
restart_policy:
condition: on-failure
labels:
- "backupbot.backup=${ENABLE_BACKUPS:-true}"
- "traefik.enable=true"
- "traefik.docker.network=proxy"
- "traefik.http.services.${STACK_NAME}-node.loadbalancer.server.port=9100"
- "traefik.http.routers.${STACK_NAME}-node.rule=Host(`node.${DOMAIN}`)"
- "traefik.http.routers.${STACK_NAME}-node.entrypoints=web-secure"
- "traefik.http.routers.${STACK_NAME}-node.tls=true"
- "traefik.http.routers.${STACK_NAME}-node.tls.certresolver=${LETS_ENCRYPT_ENV}"
- "traefik.http.routers.${STACK_NAME}-node.middlewares=basicauth@file"
- "traefik.enable=false"
- "coop-cloud.${STACK_NAME}.version=1.6.0+v1.8.1"
- "coop-cloud.${STACK_NAME}.timeout=${TIMEOUT}"
cadvisor:
image: gcr.io/cadvisor/cadvisor:v0.55.1
command:
- "-logtostderr"
- "--enable_metrics=cpu,cpuLoad,disk,diskIO,process,memory,network"
# all possible metrics: advtcp,app,cpu,cpuLoad,cpu_topology,cpuset,disk,diskIO,hugetlb,memory,memory_numa,network,oom_event,percpu,perf_event,process,referenced_memory,resctrl,sched,tcp,udp.
- "--housekeeping_interval=120s"
- "--docker_only=true"
volumes:
- /var/lib/docker/:/var/lib/docker:ro
- /dev/disk/:/dev/disk:ro
- /sys:/sys:ro
- /var/run:/var/run:ro
- /:/rootfs:ro
networks:
- internal
- proxy
deploy:
restart_policy:
condition: on-failure
labels:
- "traefik.enable=true"
- "traefik.docker.network=proxy"
- "traefik.http.services.${STACK_NAME}-cadvisor.loadbalancer.server.port=8080"
- "traefik.http.routers.${STACK_NAME}-cadvisor.rule=Host(`cadvisor.${DOMAIN}`)"
- "traefik.http.routers.${STACK_NAME}-cadvisor.entrypoints=web-secure"
- "traefik.http.routers.${STACK_NAME}-cadvisor.tls=true"
- "traefik.http.routers.${STACK_NAME}-cadvisor.tls.certresolver=${LETS_ENCRYPT_ENV}"
- "traefik.http.routers.${STACK_NAME}-cadvisor.middlewares=basicauth@file"
healthcheck:
test: wget --quiet --tries=1 --spider http://localhost:8080/healthz || exit 1
interval: 15s
timeout: 15s
retries: 5
start_period: 30s
configs:
entrypoint:
name: ${STACK_NAME}_entrypoint_${ENTRYPOINT_VERSION}
file: node-exporter-entrypoint.sh
config_alloy:
template_driver: golang
name: ${STACK_NAME}_config_alloy_${CONFIG_ALLOY_VERSION}
file: config.alloy.tmpl
networks:
proxy:
external: true
internal:
volumes:
alloy-data:
secrets:
basic_auth:
external: true
name: ${STACK_NAME}_basic_auth_${SECRET_BASIC_AUTH_VERSION}

36
config.alloy.tmpl Normal file
View File

@ -0,0 +1,36 @@
logging {
level = "info"
format = "logfmt"
}
prometheus.exporter.cadvisor "docker" {
}
prometheus.exporter.unix "default" {
include_exporter_metrics = true
rootfs_path = "/rootfs"
}
prometheus.scrape "default" {
targets = array.concat(
[{
job = "alloy",
__address__ = "127.0.0.1:12345",
}],
prometheus.exporter.unix.default.targets,
prometheus.exporter.cadvisor.docker.targets,
)
forward_to = [prometheus.remote_write.prometheus.receiver]
}
prometheus.remote_write "prometheus" {
endpoint {
url = "{{ env "PROMETHEUS_REMOTE_WRITE_URL" }}"
basic_auth {
username = "admin"
password = "{{ secret "basic_auth" }}"
}
}
}