File: //usr/lib/netdata/conf.d/health.d/azure_monitor_container_apps.conf
# you can disable an alarm notification by setting the 'to' line to: silent
# CPU utilization
template: am_container_apps_cpu_utilization
on: azure_monitor.container_apps.cpu_percentage
class: Utilization
type: Containers
component: Container Apps
lookup: average -5m unaligned of average
units: percentage
every: 1m
warn: $this > (($status >= $WARNING) ? (75) : (85))
crit: $this > (($status == $CRITICAL) ? (85) : (95))
delay: down 5m multiplier 1.5 max 1h
summary: Container Apps CPU on ${label:resource_name}
info: Average CPU utilization of Container App ${label:resource_name} \
in ${label:resource_group} (${label:region})
to: sysadmin
# Memory utilization
template: am_container_apps_memory_utilization
on: azure_monitor.container_apps.memory_percentage
class: Utilization
type: Containers
component: Container Apps
lookup: average -5m unaligned of average
units: percentage
every: 1m
warn: $this > (($status >= $WARNING) ? (75) : (85))
crit: $this > (($status == $CRITICAL) ? (85) : (95))
delay: down 5m multiplier 1.5 max 1h
summary: Container Apps memory on ${label:resource_name}
info: Average memory utilization of Container App ${label:resource_name} \
in ${label:resource_group} (${label:region})
to: sysadmin
# Replica restarts
template: am_container_apps_restarts
on: azure_monitor.container_apps.restart_count
class: Errors
type: Containers
component: Container Apps
lookup: sum -5m unaligned of restarts
units: restarts
every: 1m
warn: $this > (($status >= $WARNING) ? (3) : (5))
crit: $this > (($status == $CRITICAL) ? (10) : (15))
delay: down 5m multiplier 1.5 max 1h
summary: Container Apps restarts on ${label:resource_name}
info: Number of replica restarts in the last 5 minutes for Container App ${label:resource_name} \
in ${label:resource_group} (${label:region})
to: sysadmin
# Response time (latency)
template: am_container_apps_response_time
on: azure_monitor.container_apps.response_time
class: Latency
type: Containers
component: Container Apps
lookup: average -5m unaligned of average
units: milliseconds
every: 1m
warn: $this != nan AND $this > (($status >= $WARNING) ? (3000) : (5000))
crit: $this != nan AND $this > (($status == $CRITICAL) ? (5000) : (10000))
delay: down 5m multiplier 1.5 max 1h
summary: Container Apps response time on ${label:resource_name}
info: Average response time of Container App ${label:resource_name} \
in ${label:resource_group} (${label:region})
to: sysadmin
# Resiliency - timeouts
template: am_container_apps_resiliency_timeouts
on: azure_monitor.container_apps.resiliency_timeouts
class: Errors
type: Containers
component: Container Apps
lookup: sum -5m unaligned
units: timeouts
every: 1m
warn: $this != nan AND $this > (($status >= $WARNING) ? (3) : (5))
crit: $this != nan AND $this > (($status == $CRITICAL) ? (10) : (20))
delay: down 5m multiplier 1.5 max 1h
summary: Container Apps resiliency timeouts on ${label:resource_name}
info: Connection and request timeouts in the last 5 minutes for Container App ${label:resource_name} \
in ${label:resource_group} (${label:region})
to: sysadmin
# Resiliency - retries
template: am_container_apps_resiliency_retries
on: azure_monitor.container_apps.resiliency_retries
class: Errors
type: Containers
component: Container Apps
lookup: sum -5m unaligned of retries
units: retries
every: 1m
warn: $this != nan AND $this > (($status >= $WARNING) ? (10) : (20))
delay: down 5m multiplier 1.5 max 1h
summary: Container Apps resiliency retries on ${label:resource_name}
info: Request retries in the last 5 minutes for Container App ${label:resource_name} \
in ${label:resource_group} (${label:region})
to: sysadmin
# Resiliency - pending connection pool
template: am_container_apps_pending_connections
on: azure_monitor.container_apps.resiliency_pending_connections
class: Workload
type: Containers
component: Container Apps
lookup: sum -5m unaligned of pending
units: requests
every: 1m
warn: $this != nan AND $this > (($status >= $WARNING) ? (50) : (100))
delay: down 5m multiplier 1.5 max 1h
summary: Container Apps pending connections on ${label:resource_name}
info: Requests pending in the connection pool in the last 5 minutes for Container App ${label:resource_name} \
in ${label:resource_group} (${label:region})
to: sysadmin
# Resiliency - host ejections
template: am_container_apps_host_ejections
on: azure_monitor.container_apps.resiliency_ejections
class: Errors
type: Containers
component: Container Apps
lookup: sum -5m unaligned of ejected
units: ejections
every: 1m
warn: $this != nan AND $this > (($status >= $WARNING) ? (0) : (1))
delay: down 5m multiplier 1.5 max 1h
summary: Container Apps host ejections on ${label:resource_name}
info: Upstream hosts ejected from the load balancing pool in the last 5 minutes for Container App ${label:resource_name} \
in ${label:resource_group} (${label:region})
to: sysadmin
# Replica count — zero replicas means the app is scaled down or crashed
template: am_container_apps_replica_count
on: azure_monitor.container_apps.replicas
class: Availability
type: Containers
component: Container Apps
lookup: average -5m unaligned of average
units: replicas
every: 1m
crit: $this != nan AND $this < 1
delay: down 5m multiplier 1.5 max 1h
summary: Container Apps replicas on ${label:resource_name}
info: Average replica count for Container App ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
Zero replicas means the app is either scaled to zero or all replicas have crashed
to: sysadmin
# GPU utilization (optional - only present when GPU workload profiles are used)
template: am_container_apps_gpu_utilization
on: azure_monitor.container_apps.gpu_utilization
class: Utilization
type: Containers
component: Container Apps
lookup: average -5m unaligned of average
units: percentage
every: 1m
warn: $this != nan AND $this > (($status >= $WARNING) ? (85) : (95))
delay: down 5m multiplier 1.5 max 1h
summary: Container Apps GPU on ${label:resource_name}
info: Average GPU utilization of Container App ${label:resource_name} \
in ${label:resource_group} (${label:region})
to: sysadmin
# JVM GC duration (optional - only present for Java workloads)
template: am_container_apps_jvm_gc_duration
on: azure_monitor.container_apps.jvm_gc_duration
class: Latency
type: Containers
component: Container Apps
lookup: sum -1m unaligned of duration
units: milliseconds
every: 1m
warn: $this != nan AND $this > (($status >= $WARNING) ? (3000) : (5000))
crit: $this != nan AND $this > (($status == $CRITICAL) ? (5000) : (10000))
delay: down 5m multiplier 1.5 max 1h
summary: Container Apps JVM GC duration on ${label:resource_name}
info: Time spent in JVM garbage collection in the last minute for Container App ${label:resource_name} \
in ${label:resource_group} (${label:region})
to: sysadmin