prometheus: add some alerts

This commit is contained in:
Astro 2022-12-12 02:16:31 +01:00
parent 8f192b81ff
commit fe4deea101
1 changed files with 51 additions and 0 deletions

View File

@ -61,6 +61,57 @@
severity: error
annotations:
summary: service gets oom killed
- alert: disk_free
expr: collectd_df_df_complex{type="free"} < 1024*1024*1024
for: 10m
labels:
severity: warn
annotations:
summary: filesystem has less than 1GB of free space
- alert: load1
expr: node_load1 > 100
for: 1m
labels:
severity: warn
annotations:
summary: high loadavg
- alert: systemd_unit_failed
expr: node_systemd_unit_state{state="failed"} > 0
for: 10m
labels:
severity: error
annotations:
summary: failed systemd units
- name: network
rules:
- alert: load1
expr: collectd_load_0 > 4
for: 1m
labels:
severity: warn
annotations:
summary: high loadavg
- alert: memory_free
expr: collectd_memory{memory="free"} < 4*1024*1024
for: 10m
labels:
severity: warn
annotations:
summary: memory full
- alert: throughput0
expr: increase(collectd_interface_if_octets_0_total[10m]) > 600 * 60 * 1024 * 1024
for: 2h
labels:
severity: warn
annotations:
summary: sustained throughput
- alert: throughput1
expr: increase(collectd_interface_if_octets_1_total[10m]) > 600 * 60 * 1024 * 1024
for: 2h
labels:
severity: warn
annotations:
summary: sustained throughput
''
];