42 lines
1.5 KiB
YAML
42 lines
1.5 KiB
YAML
groups:
|
|
- name: system.rules
|
|
interval: 30s
|
|
rules:
|
|
- alert: HighCPUUsage
|
|
expr: 100 - (avg by(instance)(rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 90
|
|
for: 2m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "High CPU usage on {{ $labels.instance }}"
|
|
description: "CPU usage > 90% for 2 minutes."
|
|
|
|
- alert: HighMemoryUsage
|
|
expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100 > 85
|
|
for: 2m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "High memory usage on {{ $labels.instance }}"
|
|
description: "Memory usage > 85% for 2 minutes."
|
|
|
|
- alert: LowDiskSpace
|
|
expr: (node_filesystem_avail_bytes{fstype!="tmpfs"} / node_filesystem_size_bytes{fstype!="tmpfs"}) * 100 < 15
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "Low disk space on {{ $labels.instance }} {{ $labels.mountpoint }}"
|
|
description: "Disk space < 15% available for 5 minutes."
|
|
|
|
- name: docker.rules
|
|
interval: 30s
|
|
rules:
|
|
- alert: ContainerRestartingFrequently
|
|
expr: rate(container_restart_count[10m]) > 3
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "Container restarting frequently: {{ $labels.container_label_com_docker_swarm_service_name }}"
|
|
description: "Container restarted more than 3 times in 10 minutes."
|