update compose files for grafana alertmanager and prometheus
Some checks are pending
PVE2 Infrastructure Deploy / terraform (push) Waiting to run

This commit is contained in:
admin 2026-02-06 10:20:03 -07:00
parent 232830b06c
commit ba2d7e3fe3
3 changed files with 72 additions and 46 deletions

View file

@ -1,11 +1,30 @@
services: services:
alertmanager: alertmanager:
image: prom/alertmanager:latest image: prom/alertmanager:latest
container_name: alertmanager container_name: alertmanager_node${AM_NODE_ID}
restart: unless-stopped
user: "1000:1000"
ports: ports:
- "9093:9093" - "9093:9093"
command:
- '--config.file=/etc/alertmanager/config.yml'
- '--storage.path=/alertmanager'
volumes: volumes:
- /docker/monitoring/alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro - /mnt/shared/alertmanager/config:/etc/alertmanager
- /docker/monitoring/alertmanager-db:/alertmanager - /mnt/shared/alertmanager/data:/alertmanager
user: "1000:1000" networks:
restart: unless-stopped - am_external
- am_internal
labels:
- "traefik.enable=true"
- "traefik.docker.network=am_external"
- "traefik.http.routers.alertmanager.rule=Host(`${AM_HOST_NAME}.${TRAEFIK_DNS_SUFFIX}`)"
- "traefik.http.routers.alertmanager.tls=true"
- "traefik.http.routers.alertmanager.tls.certresolver=dns_resolver"
- "traefik.http.services.alertmanager.loadbalancer.server.port=9093"
networks:
am_internal:
driver: bridge
am_external:
external: true

View file

@ -1,28 +1,32 @@
services: services:
grafana: grafana:
image: grafana/grafana-oss:latest image: grafana/grafana-oss:latest
container_name: grafana container_name: grafana_node${GRAFANA_NODE_ID}
user: "1000:1000" user: "1000:1000"
restart: always restart: always
environment: environment:
- GF_SECURITY_ADMIN_USER=${GF_USER_NAME} - GF_SERVER_ROOT_URL=https://${GRAFANA_HOST_NAME}.${TRAEFIK_DNS_SUFFIX}https://grafana.mapletree.email
- GF_SECURITY_ADMIN_PASSWORD=${GF_USER_PASS} - GF_SECURITY_ADMIN_USER=${GRAFANA_USER}
- GF_DATABASE_TYPE=${GF_DB_TYPE} - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASS}
- GF_DATABASE_HOST=${GF_DB_HOST} - GF_USERS_ALLOW_SIGN_UP=false
- GF_DATABASE_PORT=${GF_DB_PORT}
- GF_DATABASE_NAME=${GF_DB_NAME}
- GF_DATABASE_USER=${GF_DB_USER}
- GF_DATABASE_PASSWORD=${GF_DB_PASS}
- GF_DATABASE_MAX_IDLE_CONN=${GF_DB_IDLE}
- GF_DATABASE_CONN_MAX_LIFETIME=${GF_DB_CONN}
- GF_EXTERNAL_IMAGE_STORAGE_PROVIDER=${GF_STOR_PROVIDER}
- GF_EXTERNAL_IMAGE_STORAGE_S3_BUCKET=${GF_S3_BUCKET}
- GF_EXTERNAL_IMAGE_STORAGE_S3_REGION=${GF_S3_REGION}
- GF_EXTERNAL_IMAGE_STORAGE_S3_ENDPOINT=${GF_S3_ENDPOINT}
- GF_EXTERNAL_IMAGE_STORAGE_S3_ACCESS_KEY=${S3_KEY}
- GF_EXTERNAL_IMAGE_STORAGE_S3_SECRET_KEY=${S3_SECRET}
volumes: volumes:
- ./grafana:/etc/grafana - /shared/grafana/data:/var/lib/grafana
- ./grafana-db:/var/lib/grafana
ports: ports:
- "2999:3000" - "2999:3000"
networks:
- grafana_external
- grafana_internal
labels:
- "traefik.enable=true"
- "traefik.docker.network=grafana_external"
- "traefik.http.routers.grafana.rule=Host(`${GRAFANA_HOST_NAME}.${TRAEFIK_DNS_SUFFIX}`)"
- "traefik.http.routers.grafana.entrypoints=websecure"
- "traefik.http.routers.grafana.tls=true"
- "traefik.http.routers.grafana.tls.certresolver=dns_resolver"
- "traefik.http.services.grafana.loadbalancer.server.port=3000"
networks:
grafana_internal:
driver: bridge
grafana_external:
external: true

View file

@ -1,33 +1,36 @@
services: services:
prometheus: prometheus:
image: prom/prometheus:latest image: prom/prometheus:latest
container_name: prometheus container_name: prometheus_node${PROM_HOST_ID}
user: "1000:1000" restart: unless-stopped
user: "1000:1000" # Matches your Gluster permissions
command: command:
- '--config.file=/etc/prometheus/prometheus.yml' - '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus' - '--storage.tsdb.path=/prometheus'
- '--storage.tsdb.retention.time=2h' # CHANGED: Increased from 2h to 15d since Thanos is gone
- '--storage.tsdb.min-block-duration=2h' - '--storage.tsdb.retention.time=15d'
- '--storage.tsdb.max-block-duration=2h' - '--web.enable-lifecycle' # Keeps API reload capability
- '--web.enable-lifecycle'
volumes: volumes:
- /docker/monitoring/prometheus/config:/etc/prometheus - /docker/monitoring/prometheus/config:/etc/prometheus
- /docker/monitoring/prometheus/data:/prometheus - /docker/monitoring/prometheus/data:/prometheus
ports: ports:
- "9090:9090" - "9090:9090"
networks:
- prometheus_external
- prometheus_internal
extra_hosts:
# Allows Prometheus to scrape the "Agent" on this same node
- "host.docker.internal:host-gateway"
labels:
- "traefik.enable=true"
- "traefik.docker.network=prometheus_external"
- "traefik.http.routers.prom.rule=Host(`${PROM_HOST_NAME}.${TRAEFIK_DNS_SUFFIX}`)"
- "traefik.http.routers.prom.tls=true"
- "traefik.http.routers.prom.tls.certresolver=dns_resolver"
- "traefik.http.services.prom.loadbalancer.server.port=9090"
thanos-sidecar: networks:
image: thanosio/thanos:v0.34.0 prometheus_internal:
container_name: thanos-sidecar driver: bridge
user: "1000:1000" prometheus_external:
environment: external: true
- OBJSTORE_CONFIG={"type":"${THANOS_STOR_TYPE}","config":{"bucket":"THANOS_S3_BUCKET","endpoint":"$THANOS_S3_ENDPOINT","access_key":"${S3_KEY}","secret_key":"${S3_SECRET}","insecure":${THANOS_S3_INSECURE}}}
command:
- 'sidecar'
- '--tsdb.path=/prometheus'
- '--prometheus.url=$PROMETHEUS_URL'
- '--objstore.config=$$(OBJSTORE_CONFIG)'
volumes:
- /docker/monitoring/prometheus/data:/prometheus
ports:
- "10901:10901"