From ba2d7e3fe30bf048a640b5c544c70ffa548e0719 Mon Sep 17 00:00:00 2001 From: admin Date: Fri, 6 Feb 2026 10:20:03 -0700 Subject: [PATCH] update compose files for grafana alertmanager and prometheus --- lxc1/p4-monitor/alertmanager.yml | 29 ++++++++++++++++---- lxc1/p4-monitor/grafana.yml | 44 +++++++++++++++++-------------- lxc1/p4-monitor/prometheus.yml | 45 +++++++++++++++++--------------- 3 files changed, 72 insertions(+), 46 deletions(-) diff --git a/lxc1/p4-monitor/alertmanager.yml b/lxc1/p4-monitor/alertmanager.yml index cf3b1d9..7ae835c 100644 --- a/lxc1/p4-monitor/alertmanager.yml +++ b/lxc1/p4-monitor/alertmanager.yml @@ -1,11 +1,30 @@ services: alertmanager: image: prom/alertmanager:latest - container_name: alertmanager + container_name: alertmanager_node${AM_NODE_ID} + restart: unless-stopped + user: "1000:1000" ports: - "9093:9093" + command: + - '--config.file=/etc/alertmanager/config.yml' + - '--storage.path=/alertmanager' volumes: - - /docker/monitoring/alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro - - /docker/monitoring/alertmanager-db:/alertmanager - user: "1000:1000" - restart: unless-stopped + - /mnt/shared/alertmanager/config:/etc/alertmanager + - /mnt/shared/alertmanager/data:/alertmanager + networks: + - am_external + - am_internal + labels: + - "traefik.enable=true" + - "traefik.docker.network=am_external" + - "traefik.http.routers.alertmanager.rule=Host(`${AM_HOST_NAME}.${TRAEFIK_DNS_SUFFIX}`)" + - "traefik.http.routers.alertmanager.tls=true" + - "traefik.http.routers.alertmanager.tls.certresolver=dns_resolver" + - "traefik.http.services.alertmanager.loadbalancer.server.port=9093" + +networks: + am_internal: + driver: bridge + am_external: + external: true \ No newline at end of file diff --git a/lxc1/p4-monitor/grafana.yml b/lxc1/p4-monitor/grafana.yml index 05c6ba1..ccbc1c8 100644 --- a/lxc1/p4-monitor/grafana.yml +++ b/lxc1/p4-monitor/grafana.yml @@ -1,28 +1,32 @@ services: grafana: image: grafana/grafana-oss:latest - container_name: grafana + container_name: grafana_node${GRAFANA_NODE_ID} user: "1000:1000" restart: always environment: - - GF_SECURITY_ADMIN_USER=${GF_USER_NAME} - - GF_SECURITY_ADMIN_PASSWORD=${GF_USER_PASS} - - GF_DATABASE_TYPE=${GF_DB_TYPE} - - GF_DATABASE_HOST=${GF_DB_HOST} - - GF_DATABASE_PORT=${GF_DB_PORT} - - GF_DATABASE_NAME=${GF_DB_NAME} - - GF_DATABASE_USER=${GF_DB_USER} - - GF_DATABASE_PASSWORD=${GF_DB_PASS} - - GF_DATABASE_MAX_IDLE_CONN=${GF_DB_IDLE} - - GF_DATABASE_CONN_MAX_LIFETIME=${GF_DB_CONN} - - GF_EXTERNAL_IMAGE_STORAGE_PROVIDER=${GF_STOR_PROVIDER} - - GF_EXTERNAL_IMAGE_STORAGE_S3_BUCKET=${GF_S3_BUCKET} - - GF_EXTERNAL_IMAGE_STORAGE_S3_REGION=${GF_S3_REGION} - - GF_EXTERNAL_IMAGE_STORAGE_S3_ENDPOINT=${GF_S3_ENDPOINT} - - GF_EXTERNAL_IMAGE_STORAGE_S3_ACCESS_KEY=${S3_KEY} - - GF_EXTERNAL_IMAGE_STORAGE_S3_SECRET_KEY=${S3_SECRET} + - GF_SERVER_ROOT_URL=https://${GRAFANA_HOST_NAME}.${TRAEFIK_DNS_SUFFIX}https://grafana.mapletree.email + - GF_SECURITY_ADMIN_USER=${GRAFANA_USER} + - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASS} + - GF_USERS_ALLOW_SIGN_UP=false volumes: - - ./grafana:/etc/grafana - - ./grafana-db:/var/lib/grafana + - /shared/grafana/data:/var/lib/grafana ports: - - "2999:3000" \ No newline at end of file + - "2999:3000" + networks: + - grafana_external + - grafana_internal + labels: + - "traefik.enable=true" + - "traefik.docker.network=grafana_external" + - "traefik.http.routers.grafana.rule=Host(`${GRAFANA_HOST_NAME}.${TRAEFIK_DNS_SUFFIX}`)" + - "traefik.http.routers.grafana.entrypoints=websecure" + - "traefik.http.routers.grafana.tls=true" + - "traefik.http.routers.grafana.tls.certresolver=dns_resolver" + - "traefik.http.services.grafana.loadbalancer.server.port=3000" + +networks: + grafana_internal: + driver: bridge + grafana_external: + external: true \ No newline at end of file diff --git a/lxc1/p4-monitor/prometheus.yml b/lxc1/p4-monitor/prometheus.yml index dfae8fd..81ec556 100644 --- a/lxc1/p4-monitor/prometheus.yml +++ b/lxc1/p4-monitor/prometheus.yml @@ -1,33 +1,36 @@ services: prometheus: image: prom/prometheus:latest - container_name: prometheus - user: "1000:1000" + container_name: prometheus_node${PROM_HOST_ID} + restart: unless-stopped + user: "1000:1000" # Matches your Gluster permissions command: - '--config.file=/etc/prometheus/prometheus.yml' - '--storage.tsdb.path=/prometheus' - - '--storage.tsdb.retention.time=2h' - - '--storage.tsdb.min-block-duration=2h' - - '--storage.tsdb.max-block-duration=2h' - - '--web.enable-lifecycle' + # CHANGED: Increased from 2h to 15d since Thanos is gone + - '--storage.tsdb.retention.time=15d' + - '--web.enable-lifecycle' # Keeps API reload capability volumes: - /docker/monitoring/prometheus/config:/etc/prometheus - /docker/monitoring/prometheus/data:/prometheus ports: - "9090:9090" + networks: + - prometheus_external + - prometheus_internal + extra_hosts: + # Allows Prometheus to scrape the "Agent" on this same node + - "host.docker.internal:host-gateway" + labels: + - "traefik.enable=true" + - "traefik.docker.network=prometheus_external" + - "traefik.http.routers.prom.rule=Host(`${PROM_HOST_NAME}.${TRAEFIK_DNS_SUFFIX}`)" + - "traefik.http.routers.prom.tls=true" + - "traefik.http.routers.prom.tls.certresolver=dns_resolver" + - "traefik.http.services.prom.loadbalancer.server.port=9090" - thanos-sidecar: - image: thanosio/thanos:v0.34.0 - container_name: thanos-sidecar - user: "1000:1000" - environment: - - OBJSTORE_CONFIG={"type":"${THANOS_STOR_TYPE}","config":{"bucket":"THANOS_S3_BUCKET","endpoint":"$THANOS_S3_ENDPOINT","access_key":"${S3_KEY}","secret_key":"${S3_SECRET}","insecure":${THANOS_S3_INSECURE}}} - command: - - 'sidecar' - - '--tsdb.path=/prometheus' - - '--prometheus.url=$PROMETHEUS_URL' - - '--objstore.config=$$(OBJSTORE_CONFIG)' - volumes: - - /docker/monitoring/prometheus/data:/prometheus - ports: - - "10901:10901" \ No newline at end of file +networks: + prometheus_internal: + driver: bridge + prometheus_external: + external: true \ No newline at end of file