File: //usr/lib/netdata/conf.d/health.d/azure_monitor_event_grid.conf
# you can disable an alarm notification by setting the 'to' line to: silent
# --- Publish Failures ---
template: am_event_grid_publish_failures
on: azure_monitor.event_grid.publish_rate
class: Errors
type: Messaging
component: Event Grid
lookup: average -5m unaligned of failed
units: events/s
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (5))
delay: down 5m multiplier 1.5 max 1h
summary: Event Grid publish failures on ${label:resource_name}
info: Rate of failed event publish operations on Event Grid topic ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
Failed publishes indicate authentication, authorization, or schema validation errors
to: sysadmin
# --- Delivery Failures ---
template: am_event_grid_delivery_failures
on: azure_monitor.event_grid.delivery
class: Errors
type: Messaging
component: Event Grid
lookup: average -5m unaligned of failed
units: events/s
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (5))
crit: $this > (($status == $CRITICAL) ? (5) : (50))
delay: down 5m multiplier 1.5 max 1h
summary: Event Grid delivery failures on ${label:resource_name}
info: Rate of failed event delivery attempts on Event Grid topic ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
Persistent delivery failures indicate subscriber endpoint issues
to: sysadmin
template: am_event_grid_dropped_events
on: azure_monitor.event_grid.delivery
class: Errors
type: Messaging
component: Event Grid
lookup: average -5m unaligned of dropped
units: events/s
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (1))
crit: $this > (($status == $CRITICAL) ? (1) : (10))
delay: down 5m multiplier 1.5 max 1h
summary: Event Grid dropped events on ${label:resource_name}
info: Rate of dropped events on Event Grid topic ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
Dropped events are permanently lost and indicate exhausted retry attempts \
without a dead-letter destination configured
to: sysadmin
template: am_event_grid_dead_lettered_events
on: azure_monitor.event_grid.delivery
class: Errors
type: Messaging
component: Event Grid
lookup: average -5m unaligned of dead_lettered
units: events/s
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (1))
crit: $this > (($status == $CRITICAL) ? (1) : (10))
delay: down 5m multiplier 1.5 max 1h
summary: Event Grid dead-lettered events on ${label:resource_name}
info: Rate of events sent to the dead-letter destination on Event Grid topic ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
Dead-lettered events failed all delivery retries and require manual investigation
to: sysadmin
# --- Routing ---
template: am_event_grid_unmatched_events
on: azure_monitor.event_grid.routing
class: Errors
type: Messaging
component: Event Grid
lookup: average -5m unaligned of unmatched
units: events/s
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (10))
delay: down 5m multiplier 1.5 max 1h
summary: Event Grid unmatched events on ${label:resource_name}
info: Rate of events that did not match any subscription on Event Grid topic ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
Unmatched events indicate missing or misconfigured event subscriptions
to: sysadmin
# --- Latency ---
template: am_event_grid_destination_processing_duration
on: azure_monitor.event_grid.destination_processing_duration
class: Latency
type: Messaging
component: Event Grid
lookup: average -5m unaligned of average
units: milliseconds
every: 1m
warn: $this != nan AND $this > (($status >= $WARNING) ? (3000) : (5000))
crit: $this != nan AND $this > (($status == $CRITICAL) ? (5000) : (10000))
delay: down 5m multiplier 1.5 max 1h
summary: Event Grid destination processing duration on ${label:resource_name}
info: Average time taken by the subscriber endpoint to process events from Event Grid topic ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
High processing duration indicates slow or overloaded event subscribers
to: sysadmin