Skip to content
Snippets Groups Projects
Commit 8506f7ae authored by Pablo Carranza's avatar Pablo Carranza
Browse files

Add alerts for fe workers and the rest of the NFS servers

parent 99da3ad7
No related branches found
No related tags found
1 merge request!241Add alerts for fe workers and the rest of the NFS servers
## gitlab.com
ALERT NFSServerDown
IF up{fqdn=~"nfs-file[0-9]+.stor.gitlab.com"} == 0
ALERT GitNFSServerDown
IF min(up{fqdn=~"nfs-file[0-9]+.stor.gitlab.com"}) without (job) == 0
FOR 2m
LABELS {severity="critical", pager="pagerduty"}
ANNOTATIONS {
title="NFS Server {{$labels.instance}} is down",
title="Git NFS Server {{$labels.instance}} is down",
description="One of git NFS servers is down, this locks the whole application and causes downtime",
runbook="troubleshooting/nfs-server.md"
}
ALERT FileNFSServerDown
IF min(up{fqdn=~"nfs-(uploads|share|lfs|artifacts)[0-9]+.stor.gitlab.com"}) without (job) == 0
FOR 2m
LABELS {severity="critical", pager="pagerduty"}
ANNOTATIONS {
title="File NFS Server {{$labels.instance}} is down",
description="File NFS server is down, this locks the whole application and causes downtime",
runbook="troubleshooting/nfs-server.md"
}
ALERT FrontEndWorkerDown
IF min(up{fqdn=~"worker-(web|ssh|api|kiq)*.*"}) without (job) == 0
FOR 5m
LABELS {severity="critical", pager="pagerduty"}
ANNOTATIONS {
title="Front end worker {{$labels.instance}} host is down",
description="One of front end web workers is down for more than 5 minutes, this results in a degraded performance of GitLab.com",
runbook="troubleshooting/worker-down.md"
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment