Skip to content
Snippets Groups Projects
Commit bc080a3f authored by Ben Kochie's avatar Ben Kochie
Browse files

Update githost alerting rules

* Use Prometheus standard CamelCase for alert names.
* Use consistent annotations (`summary`, `description`, `runbook`)
* Use consistent formatting, trailing commas, etc.
* Add `NodeFilesystemFullSoon` alert using `predict_linear()`
  - Two separate alerts for slow and fast disk filling issues.
  - Uses 1:4 ratio for history to prediction to avoid noise.
* Add `NodeFilesystemReadOnly` to detect broken systems.
* Drop warning alerts.
parent 7adb7b68
No related branches found
No related tags found
No related merge requests found
ALERT instance_20_disk
IF ((node_filesystem_avail{fstype=~"ext4|xfs"} / node_filesystem_size{fstype=~"ext4|xfs"}) * 100) < 20 AND ((node_filesystem_avail{fstype=~"ext4|xfs"} / node_filesystem_size{fstype=~"ext4|xfs"}) * 100) > 10
FOR 1m
LABELS { severity = "warn" }
# Use 6 hours of history to predict 24 hours into the future.
ALERT NodeFilesystemFullSoon
IF predict_linear(node_filesystem_avail{fstype=~"ext.|xfs"}[6h], 24*60*60) < 0
FOR 1h
LABELS {
severity = "crit",
}
ANNOTATIONS {
summary = "Filesystem is filling up",
description = "Filesystem '{{ $labels.mountpoint }}' on {{ $labels.instance }} is filling up and will be full in the next 24 hours",
runbook = "troubleshooting/filesystem_alerts.md",
}
# Use 1 hour of history to predict 4 hours into the future.
ALERT NodeFilesystemFullSoon
IF predict_linear(node_filesystem_avail{fstype=~"ext.|xfs"}[1h], 4*60*60) < 0
FOR 10m
LABELS {
severity = "crit",
}
ANNOTATIONS {
title = "Low disk space left on {{ $labels.mountpoint }} on {{ $labels.instance }}: {{ $value | humanize }}%",
description = "",
runbook="troubleshooting/filesystem_alerts.md",
summary = "Filesystem is filling up",
description = "Filesystem '{{ $labels.mountpoint }}' on {{ $labels.instance }} is filling up and will be full in the next 4 hours",
runbook = "troubleshooting/filesystem_alerts.md",
}
 
ALERT instance_10_disk
ALERT NodeFilesystemFull
IF ((node_filesystem_avail{fstype=~"ext4|xfs"} / node_filesystem_size{fstype=~"ext4|xfs"}) * 100) <= 10
FOR 1m
LABELS { severity = "crit" }
LABELS {
severity = "crit",
}
ANNOTATIONS {
title = "Really low disk space left on {{ $labels.mountpoint }} on {{ $labels.instance }}: {{ $value | humanize }}%",
description = "",
runbook="troubleshooting/filesystem_alerts.md"
}
summary = "Filesystem is full",
description = "Filesystem '{{ $labels.mountpoint }}' on {{ $labels.instance }} is full: {{ $value | humanize }}%",
runbook = "troubleshooting/filesystem_alerts.md",
}
 
ALERT down
ALERT NodeFilesystemReadOnly
IF node_filesystem_readonly{fstype=~"ext4|xfs"} == 1
FOR 5m
LABELS {
severity = "crit",
}
ANNOTATIONS {
summary = "Filesystem is read-only",
description = "Filesystem '{{ $labels.mountpoint }}' on {{ $labels.instance }} is read-only",
runbook = "troubleshooting/filesystem_alerts.md",
}
ALERT InstanceDown
IF up == 0
FOR 5m
LABELS { severity = "crit" }
LABELS {
severity = "crit",
}
ANNOTATIONS {
summary = "Instance {{ $labels.instance }} is down.",
description = "{{ $labels.instance }} is reporting as down, prometheus cannot reach the exporter.",
runbook="troubleshooting/instance_down.md"
}
\ No newline at end of file
runbook = "troubleshooting/instance_down.md",
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment