diff --git a/.env.sample b/.env.sample index 11f6563..3850f0f 100644 --- a/.env.sample +++ b/.env.sample @@ -19,6 +19,7 @@ ENABLE_BACKUPS=true # ## Prometheus # COMPOSE_FILE="$COMPOSE_FILE:compose.prometheus.yml" +# PROMETHEUS_REMOTE_WRITE_URL=https://prometheus.$DOMAIN/api/v1/write # PROMETHEUS_RETENTION_TIME=1y # ## Prometheus Pushgateway diff --git a/abra.sh b/abra.sh index 173b6d9..6bf0f1a 100644 --- a/abra.sh +++ b/abra.sh @@ -12,6 +12,7 @@ export PROMETHEUS_YML_VERSION=v2 export MATRIX_ALERTMANAGER_CONFIG_VERSION=e export MATRIX_ALERTMANAGER_ENTRYPOINT_VERSION=a export GRAFANA_ALERTS_NODE_VERSION=v1c +export CONFIG_ALLOY_VERSION=v5 # creates a default prometheus scrape config for a given node add_node(){ diff --git a/compose.prometheus.yml b/compose.prometheus.yml index eb3f5ce..db94c60 100644 --- a/compose.prometheus.yml +++ b/compose.prometheus.yml @@ -16,6 +16,8 @@ services: - "--web.console.libraries=/usr/share/prometheus/console_libraries" - "--web.console.templates=/usr/share/prometheus/consoles" - "--storage.tsdb.retention.time=${PROMETHEUS_RETENTION_TIME}" + - "--enable-feature=remote-write-receiver" + - "--web.enable-remote-write-receiver" networks: - proxy - internal diff --git a/compose.yml b/compose.yml index 3383d91..1b6007f 100644 --- a/compose.yml +++ b/compose.yml @@ -3,89 +3,41 @@ version: "3.8" services: app: - image: prom/node-exporter:v1.10.2 - user: root - environment: - - NODE_ID={{.Node.ID}} - volumes: - - /proc:/host/proc:ro - - /sys:/host/sys:ro - - /:/rootfs:ro - - /etc/hostname:/etc/nodename:ro - command: - - "--path.sysfs=/host/sys" - - "--path.procfs=/host/proc" - - "--path.rootfs=/rootfs" - - "--collector.textfile.directory=/etc/node-exporter/" - - "--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)" - - "--no-collector.ipvs" + image: grafana/alloy:v1.16.1 + hostname: "${DOMAIN}" configs: - - source: entrypoint - target: /entrypoint.sh + - source: config_alloy + target: /etc/alloy/config.alloy + volumes: + - alloy-data:/var/lib/alloy/data + - /:/rootfs:ro + command: + - "run" + - "--storage.path=/var/lib/alloy/data" + - "/etc/alloy/config.alloy" networks: - internal - - proxy - entrypoint: [ "/bin/sh", "-e", "/entrypoint.sh" ] + secrets: + - basic_auth deploy: restart_policy: condition: on-failure labels: - "backupbot.backup=${ENABLE_BACKUPS:-true}" - - "traefik.enable=true" - - "traefik.docker.network=proxy" - - "traefik.http.services.${STACK_NAME}-node.loadbalancer.server.port=9100" - - "traefik.http.routers.${STACK_NAME}-node.rule=Host(`node.${DOMAIN}`)" - - "traefik.http.routers.${STACK_NAME}-node.entrypoints=web-secure" - - "traefik.http.routers.${STACK_NAME}-node.tls=true" - - "traefik.http.routers.${STACK_NAME}-node.tls.certresolver=${LETS_ENCRYPT_ENV}" - - "traefik.http.routers.${STACK_NAME}-node.middlewares=basicauth@file" + - "traefik.enable=false" - "coop-cloud.${STACK_NAME}.version=1.6.0+v1.8.1" - - "coop-cloud.${STACK_NAME}.timeout=${TIMEOUT}" - - cadvisor: - image: gcr.io/cadvisor/cadvisor:v0.55.1 - command: - - "-logtostderr" - - "--enable_metrics=cpu,cpuLoad,disk,diskIO,process,memory,network" - # all possible metrics: advtcp,app,cpu,cpuLoad,cpu_topology,cpuset,disk,diskIO,hugetlb,memory,memory_numa,network,oom_event,percpu,perf_event,process,referenced_memory,resctrl,sched,tcp,udp. - - "--housekeeping_interval=120s" - - "--docker_only=true" - volumes: - - /var/lib/docker/:/var/lib/docker:ro - - /dev/disk/:/dev/disk:ro - - /sys:/sys:ro - - /var/run:/var/run:ro - - /:/rootfs:ro - networks: - - internal - - proxy - deploy: - restart_policy: - condition: on-failure - labels: - - "traefik.enable=true" - - "traefik.docker.network=proxy" - - "traefik.http.services.${STACK_NAME}-cadvisor.loadbalancer.server.port=8080" - - "traefik.http.routers.${STACK_NAME}-cadvisor.rule=Host(`cadvisor.${DOMAIN}`)" - - "traefik.http.routers.${STACK_NAME}-cadvisor.entrypoints=web-secure" - - "traefik.http.routers.${STACK_NAME}-cadvisor.tls=true" - - "traefik.http.routers.${STACK_NAME}-cadvisor.tls.certresolver=${LETS_ENCRYPT_ENV}" - - "traefik.http.routers.${STACK_NAME}-cadvisor.middlewares=basicauth@file" - healthcheck: - test: wget --quiet --tries=1 --spider http://localhost:8080/healthz || exit 1 - interval: 15s - timeout: 15s - retries: 5 - start_period: 30s - configs: - entrypoint: - name: ${STACK_NAME}_entrypoint_${ENTRYPOINT_VERSION} - file: node-exporter-entrypoint.sh - - - + config_alloy: + template_driver: golang + name: ${STACK_NAME}_config_alloy_${CONFIG_ALLOY_VERSION} + file: config.alloy.tmpl networks: proxy: external: true internal: +volumes: + alloy-data: +secrets: + basic_auth: + external: true + name: ${STACK_NAME}_basic_auth_${SECRET_BASIC_AUTH_VERSION} diff --git a/config.alloy.tmpl b/config.alloy.tmpl new file mode 100644 index 0000000..cc14836 --- /dev/null +++ b/config.alloy.tmpl @@ -0,0 +1,36 @@ +logging { + level = "info" + format = "logfmt" +} + +prometheus.exporter.cadvisor "docker" { +} + +prometheus.exporter.unix "default" { + include_exporter_metrics = true + rootfs_path = "/rootfs" +} + +prometheus.scrape "default" { + targets = array.concat( + [{ + job = "alloy", + __address__ = "127.0.0.1:12345", + }], + prometheus.exporter.unix.default.targets, + prometheus.exporter.cadvisor.docker.targets, + ) + + forward_to = [prometheus.remote_write.prometheus.receiver] +} + +prometheus.remote_write "prometheus" { + endpoint { + url = "{{ env "PROMETHEUS_REMOTE_WRITE_URL" }}" + + basic_auth { + username = "admin" + password = "{{ secret "basic_auth" }}" + } + } +}