diff --git a/.env.sample b/.env.sample index b68f7c4..6689006 100644 --- a/.env.sample +++ b/.env.sample @@ -6,13 +6,23 @@ DOMAIN=monitoring-ng.example.com ENABLE_BACKUPS=true SECRET_BASIC_AUTH_VERSION=v1 +# Enable Live Debugging +LIVE_DEBUGGING=false # Enable this to send logs to a Loki server, adapt DOMAIN if server is # remote # LOKI_PUSH_URL=https://loki.$DOMAIN/loki/api/v1/push -# Enable this on SystemD hosts to read logs +# Enable on systemd hosts to read logs from the journal # JOURNALD=1 -# Enable this on syslogd hosts and configure the syslogd to send logs to -# Alloy on port 514/tcp +# +# Enable on non-systemd hosts (Alpine, older Debian/Ubuntu) to tail +# /var/log/*log files (syslog, auth.log, kern.log, etc.) that a local +# syslogd writes. No syslogd reconfiguration needed. +# SYSLOG_FILES=1 +# +# Enable to receive syslog messages over the network on port 514/tcp. +# Use for remote devices that push syslog to this host, or for a +# local syslogd configured to forward over the network. +# Not needed if you just want to read local log files — use SYSLOG_FILES instead. # SYSLOG=1 # COMPOSE_FILE="$COMPOSE_FILE:compose.syslog.yml" diff --git a/abra.sh b/abra.sh index cd2c005..86c77c9 100644 --- a/abra.sh +++ b/abra.sh @@ -11,7 +11,7 @@ export PROMETHEUS_YML_VERSION=v2 export MATRIX_ALERTMANAGER_CONFIG_VERSION=e export MATRIX_ALERTMANAGER_ENTRYPOINT_VERSION=a export GRAFANA_ALERTS_NODE_VERSION=v1c -export CONFIG_ALLOY_VERSION=v9 +export CONFIG_ALLOY_VERSION=v10 # creates a default prometheus scrape config for a given node add_node(){ diff --git a/compose.yml b/compose.yml index b5f4fa1..5e77608 100644 --- a/compose.yml +++ b/compose.yml @@ -10,17 +10,17 @@ services: target: /etc/alloy/config.alloy volumes: - /:/rootfs:ro - - /var/run:/var/run:rw - - /var/run/docker.sock:/var/run/docker.sock + - /var/run/docker.sock:/var/run/docker.sock:ro - /sys:/sys:ro - /var/lib/docker:/var/lib/docker:ro - - /dev:/dev:ro - alloy-data:/var/lib/alloy/data command: - "run" - "--storage.path=/var/lib/alloy/data" + - "--server.http.listen-addr=0.0.0.0:12345" - "/etc/alloy/config.alloy" networks: + - proxy - internal secrets: - basic_auth @@ -29,8 +29,15 @@ services: condition: on-failure labels: - "backupbot.backup=${ENABLE_BACKUPS:-true}" - - "traefik.enable=false" - "coop-cloud.${STACK_NAME}.version=1.6.0+v1.8.1" + - "traefik.enable=true" + - "traefik.swarm.network=proxy" + - "traefik.http.services.${STACK_NAME}-alloy.loadbalancer.server.port=12345" + - "traefik.http.routers.${STACK_NAME}-alloy.rule=Host(`alloy.${DOMAIN}`)" + - "traefik.http.routers.${STACK_NAME}-alloy.entrypoints=web-secure" + - "traefik.http.routers.${STACK_NAME}-alloy.tls=true" + - "traefik.http.routers.${STACK_NAME}-alloy.tls.certresolver=${LETS_ENCRYPT_ENV}" + - "traefik.http.routers.${STACK_NAME}-alloy.middlewares=basicauth@file" configs: config_alloy: template_driver: golang diff --git a/config.alloy.tmpl b/config.alloy.tmpl index 2083ce6..33a8258 100644 --- a/config.alloy.tmpl +++ b/config.alloy.tmpl @@ -3,27 +3,48 @@ logging { format = "logfmt" } +livedebugging { + enabled = {{ env "LIVE_DEBUGGING" }} +} + discovery.docker "linux" { host = "unix:///var/run/docker.sock" } {{ if ne (env "PROMETHEUS_REMOTE_WRITE_URL") "" }} prometheus.exporter.cadvisor "docker" { + docker_only = true + enabled_metrics = ["cpu", "cpuLoad", "disk", "diskIO", "memory", "network", "process"] } prometheus.exporter.unix "default" { include_exporter_metrics = true rootfs_path = "/rootfs" + procfs_path = "/rootfs/proc" + sysfs_path = "/rootfs/sys" + + disable_collectors = ["ipvs"] + + filesystem { + fs_types_exclude = "^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|tmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$" + mount_points_exclude = "^/(sys|proc|dev|host|etc)($|/)" + mount_timeout = "5s" + } + + netclass { ignored_devices = "^(veth.*)$" } + netdev { device_exclude = "^(veth.*)$" } } +prometheus.exporter.self "alloy" {} + prometheus.scrape "default" { + scrape_interval = "120s" + targets = array.concat( - [{ - job = "alloy", - __address__ = "127.0.0.1:12345", - }], + prometheus.exporter.self.alloy.targets, prometheus.exporter.unix.default.targets, prometheus.exporter.cadvisor.docker.targets, + discovery.docker.containers.targets, ) forward_to = [prometheus.remote_write.prometheus.receiver] @@ -39,24 +60,176 @@ prometheus.remote_write "prometheus" { } } } + +discovery.docker "containers" { + host = "unix:///var/run/docker.sock" + match_first_network = false +} + +// Scrape Prometheus metrics from other containers on this host. +// Containers opt in via Docker labels: +// prometheus.io/scrape=true required: enable scraping +// prometheus.io/port=9090 optional: port exposing /metrics (defaults to first exposed port) +// prometheus.io/path=/metrics optional: path to metrics endpoint (default: /metrics) +// prometheus.io/auth=basic optional: use basic auth with the shared basic_auth secret +discovery.dockerswarm "swarm" { + host = "unix:///var/run/docker.sock" + role = "services" +} + +discovery.relabel "metrics" { + targets = discovery.dockerswarm.swarm.targets + + rule { + source_labels = ["__meta_dockerswarm_network_name"] + regex = "proxy" + action = "keep" + } + + rule { + source_labels = ["__meta_dockerswarm_service_label_prometheus_io_scrape"] + regex = "true" + action = "keep" + } + + rule { + source_labels = ["__address__", "__meta_dockerswarm_service_label_prometheus_io_port"] + regex = `(.+):\d+;(\d+)` + target_label = "__address__" + replacement = "$1:$2" + } + + rule { + source_labels = ["__meta_dockerswarm_service_label_prometheus_io_path"] + regex = `(.+)` + target_label = "__metrics_path__" + } + + rule { + source_labels = ["__meta_dockerswarm_service_name"] + target_label = "job" + } +} + +discovery.relabel "metrics_noauth" { + targets = discovery.relabel.metrics.output + rule { + source_labels = ["__meta_dockerswarm_service_label_prometheus_io_auth"] + regex = "^$" + action = "keep" + } +} + +discovery.relabel "metrics_basicauth" { + targets = discovery.relabel.metrics.output + rule { + source_labels = ["__meta_dockerswarm_service_label_prometheus_io_auth"] + regex = "basic" + action = "keep" + } +} + +discovery.relabel "metrics_bearerauth" { + targets = discovery.relabel.metrics.output + rule { + source_labels = ["__meta_dockerswarm_service_label_prometheus_io_auth"] + regex = "bearer" + action = "keep" + } +} + +prometheus.scrape "containers" { + scrape_interval = "120s" + targets = discovery.relabel.metrics_noauth.output + forward_to = [prometheus.remote_write.prometheus.receiver] +} + +prometheus.scrape "containers_basicauth" { + scrape_interval = "120s" + targets = discovery.relabel.metrics_basicauth.output + forward_to = [prometheus.remote_write.prometheus.receiver] + basic_auth { + username = "admin" + password = "{{ secret "basic_auth" }}" + } +} + +prometheus.scrape "containers_bearerauth" { + scrape_interval = "120s" + targets = discovery.relabel.metrics_bearerauth.output + forward_to = [prometheus.remote_write.prometheus.receiver] + bearer_token = "{{ secret "basic_auth" }}" +} {{ end }} {{ if ne (env "LOKI_PUSH_URL") "" }} +discovery.relabel "docker" { + targets = discovery.docker.linux.targets + + rule { + source_labels = ["__meta_docker_container_name"] + target_label = "container_name" + } + rule { + source_labels = ["__meta_docker_container_id"] + target_label = "container_id" + } + rule { + source_labels = ["__meta_docker_container_label_com_docker_stack_namespace"] + target_label = "stack_namespace" + } + rule { + source_labels = ["__meta_docker_container_label_com_docker_swarm_service_name"] + target_label = "service_name" + } + rule { + source_labels = ["__meta_docker_container_log_stream"] + target_label = "stream" + } +} + loki.source.docker "docker" { host = "unix:///var/run/docker.sock" - targets = discovery.docker.linux.targets + targets = discovery.relabel.docker.output labels = {"app" = "docker"} forward_to = [loki.write.loki.receiver] } +// JOURNALD: reads the systemd journal binary log directly. +// Use on systemd hosts (most modern Linux distros). Requires no syslogd. {{ if eq (env "JOURNALD") "1" }} loki.source.journal "journal" { - path = "/var/log/journal" + path = "/rootfs/var/log/journal" labels = { job = "{{ env "DOMAIN" }}" } forward_to = [loki.write.loki.receiver] } {{ end }} +// SYSLOG_FILES: tails all /var/log/*log files (syslog, auth.log, kern.log, etc.). +// Use on non-systemd hosts where a syslogd writes to /var/log. +{{ if eq (env "SYSLOG_FILES") "1" }} +local.file_match "syslog_files" { + path_targets = [{ __path__ = "/rootfs/var/log/*log" }] +} + +loki.source.file "syslog_files" { + targets = local.file_match.syslog_files.targets + forward_to = [loki.process.syslog_files.receiver] +} + +loki.process "syslog_files" { + stage.static_labels { + values = { job = "syslog" } + } + forward_to = [loki.write.loki.receiver] +} +{{ end }} + +// SYSLOG: opens a network syslog listener on port 514. +// Use when a remote device or a local syslogd configured to +// forward over the network sends logs to this host. +// Requires compose.syslog.yml to publish port 514 to the host. +// This is NOT needed for reading local log files — use SYSLOG_FILES instead. {{ if eq (env "SYSLOG") "1" }} loki.relabel "syslog" { rule { @@ -69,7 +242,7 @@ loki.relabel "syslog" { loki.source.syslog "syslog" { listener { - address = "[::1]:514" + address = "[::]:514" label_structured_data = true labels = { component = "loki.source.syslog" } } @@ -88,5 +261,6 @@ loki.write "loki" { password = "{{ secret "basic_auth" }}" } } + external_labels = { hostname = "{{ env "DOMAIN" }}" } } {{ end }}