Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
2972272303
|
|||
|
4c69cf97ab
|
|||
|
1110786179
|
@ -30,6 +30,9 @@ LIVE_DEBUGGING=false
|
||||
# server is remote
|
||||
# PROMETHEUS_REMOTE_WRITE_URL=https://prometheus.$DOMAIN/api/v1/write
|
||||
|
||||
# Monitor physical disks health
|
||||
# COMPOSE_FILE="$COMPOSE_FILE:compose.smartctl.yml"
|
||||
|
||||
# Monitoring Server
|
||||
#
|
||||
## Prometheus
|
||||
|
||||
@ -158,4 +158,10 @@ It is possible to enable the following alerts, by setting the corresponding env
|
||||
- node disk space: `ALERT_NODE_DISK_SPACE_ENABLED`
|
||||
- node memory usage: `ALERT_NODE_MEMORY_USAGE_ENABLED`
|
||||
|
||||
## smart monitoring
|
||||
|
||||
To be able monitor hard drive health data, you need to configure
|
||||
`smartd` to run on the host system, and also the
|
||||
`collect-smartctl-json.sh` script provided here (via cronjob or as
|
||||
a `smartd` hook). This is a limitation on Docker Swarm, which prevents
|
||||
the `smartctl_exporter` from running on privileged mode.
|
||||
|
||||
6
collect-smartctl-json.service
Normal file
6
collect-smartctl-json.service
Normal file
@ -0,0 +1,6 @@
|
||||
[Unit]
|
||||
Description=Collect SMART data
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/usr/local/bin/collect-smartctl-json.sh
|
||||
69
collect-smartctl-json.sh
Executable file
69
collect-smartctl-json.sh
Executable file
@ -0,0 +1,69 @@
|
||||
#! /bin/bash
|
||||
# Adapted from https://github.com/prometheus-community/smartctl_exporter/blob/master/collect-smartctl-json.sh
|
||||
|
||||
script_dir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||
|
||||
# Data directory to dump smartctl output
|
||||
# This directory will be created if it doesn't exist
|
||||
data_dir="/var/lib/smartmontools/json"
|
||||
|
||||
# The original script used --xall but that doesn't work
|
||||
# This matches the command in readSMARTctl()
|
||||
smartctl_args="--json --info --health --attributes --tolerance=verypermissive \
|
||||
--nocheck=standby --format=brief --log=error"
|
||||
|
||||
# Ignore this devices
|
||||
smartctl_ignore_dev_regex="^(/dev/bus)"
|
||||
|
||||
# Determine the json query tool to use
|
||||
if command -v jq >/dev/null; then
|
||||
json_tool="jq"
|
||||
json_args="--raw-output"
|
||||
elif command -v yq >/dev/null; then
|
||||
json_tool="yq"
|
||||
json_args="--unwrapScalar"
|
||||
else
|
||||
echo -e "One of 'yq' or 'jq' is required. Please try again after \
|
||||
installing one of them"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ ! "${UID}" -eq 0 ]] && ! command -v sudo >/dev/null; then
|
||||
# Not root and sudo doesn't exist
|
||||
echo "sudo does not exist. Please run this as root"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
SUDO="sudo"
|
||||
if [[ "${UID}" -eq 0 ]]; then
|
||||
# Don't use sudo if root
|
||||
SUDO=""
|
||||
fi
|
||||
|
||||
[[ ! -d "${data_dir}" ]] && mkdir --parents "${data_dir}"
|
||||
|
||||
if [[ $# -ne 0 ]]; then
|
||||
devices="${1}"
|
||||
else
|
||||
devices="$(smartctl --scan --json | "${json_tool}" "${json_args}" \
|
||||
".devices[].name | select(test(\"${smartctl_ignore_dev_regex}\") | not)")"
|
||||
mapfile -t devices <<< "${devices[@]}"
|
||||
fi
|
||||
|
||||
for device in "${devices[@]}"
|
||||
do
|
||||
echo -n "Collecting data for '${device}'..."
|
||||
# shellcheck disable=SC2086
|
||||
data="$($SUDO smartctl ${smartctl_args} ${device})"
|
||||
# Accommodate a smartmontools pre-7.3 bug
|
||||
data=${data#" Pending defect count:"}
|
||||
type="$(echo "${data}" | "${json_tool}" "${json_args}" '.device.type')"
|
||||
family="$(echo "${data}" | "${json_tool}" "${json_args}" \
|
||||
'select(.model_family != null) | .model_family | sub(" |/" ; "_" ; "g")
|
||||
| sub("\"|\\(|\\)" ; "" ; "g")')"
|
||||
model="$(echo "${data}" | "${json_tool}" "${json_args}" \
|
||||
'.model_name | sub(" |/" ; "_" ; "g") | sub("\"|\\(|\\)" ; "" ; "g")')"
|
||||
device_name="$(basename "${device}")"
|
||||
echo -e "\tSaving to ${device_name}.json"
|
||||
echo "${data}" > "${data_dir}/${device_name}.json"
|
||||
done
|
||||
9
collect-smartctl-json.timer
Normal file
9
collect-smartctl-json.timer
Normal file
@ -0,0 +1,9 @@
|
||||
[Unit]
|
||||
Description=Collect SMART data
|
||||
|
||||
[Timer]
|
||||
OnCalendar=hourly
|
||||
Persistent=true
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
18
compose.smartctl.yml
Normal file
18
compose.smartctl.yml
Normal file
@ -0,0 +1,18 @@
|
||||
---
|
||||
version: "3.8"
|
||||
services:
|
||||
smartctl:
|
||||
image: "prometheuscommunity/smartctl-exporter:v0.14.0"
|
||||
volumes:
|
||||
- "/dev:/dev"
|
||||
- "/var/lib/smartmontools/json:/debug"
|
||||
command:
|
||||
- "--smartctl.fake-data"
|
||||
- "--smartctl.interval=1h"
|
||||
networks:
|
||||
- "proxy"
|
||||
deploy:
|
||||
labels:
|
||||
- "prometheus.io/scrape=true"
|
||||
- "prometheus.io/port=9633"
|
||||
- "prometheus.io/path=/metrics"
|
||||
Reference in New Issue
Block a user