File: //usr/lib/netdata/conf.d/health.d/azure_monitor_data_explorer.conf
# you can disable an alarm notification by setting the 'to' line to: silent
# --- Cluster Health ---
template: am_data_explorer_keep_alive
on: azure_monitor.data_explorer.keep_alive
class: Availability
type: Database
component: Data Explorer
lookup: average -5m unaligned of average
units: count
every: 1m
crit: $this < (($status == $CRITICAL) ? (1) : (0.5))
delay: down 5m multiplier 1.5 max 1h
summary: Data Explorer keep alive on ${label:resource_name}
info: Cluster keep-alive health signal for Azure Data Explorer cluster ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
A value below 1 indicates the cluster is not responding properly.
to: sysadmin
template: am_data_explorer_cpu
on: azure_monitor.data_explorer.cpu_utilization
class: Utilization
type: Database
component: Data Explorer
lookup: average -5m unaligned of average
units: percentage
every: 1m
warn: $this > (($status >= $WARNING) ? (70) : (80))
crit: $this > (($status == $CRITICAL) ? (80) : (95))
delay: down 5m multiplier 1.5 max 1h
summary: Data Explorer CPU on ${label:resource_name}
info: Average CPU utilization of Azure Data Explorer cluster ${label:resource_name} \
in ${label:resource_group} (${label:region})
to: sysadmin
template: am_data_explorer_ingestion_utilization
on: azure_monitor.data_explorer.utilization
class: Utilization
type: Database
component: Data Explorer
lookup: average -5m unaligned of ingestion
units: percentage
every: 1m
warn: $this > (($status >= $WARNING) ? (70) : (80))
crit: $this > (($status == $CRITICAL) ? (80) : (95))
delay: down 5m multiplier 1.5 max 1h
summary: Data Explorer ingestion utilization on ${label:resource_name}
info: Average ingestion utilization of Azure Data Explorer cluster ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
High utilization indicates the cluster is approaching ingestion capacity.
to: sysadmin
template: am_data_explorer_cache_utilization
on: azure_monitor.data_explorer.utilization
class: Utilization
type: Database
component: Data Explorer
lookup: average -5m unaligned of cache
units: percentage
every: 1m
warn: $this > (($status >= $WARNING) ? (75) : (80))
crit: $this > (($status == $CRITICAL) ? (85) : (95))
delay: down 5m multiplier 1.5 max 1h
summary: Data Explorer cache utilization on ${label:resource_name}
info: Average cache utilization factor of Azure Data Explorer cluster ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
High cache utilization may cause queries to read from cold storage.
to: sysadmin
template: am_data_explorer_throttled_commands
on: azure_monitor.data_explorer.throttled_commands
class: Errors
type: Database
component: Data Explorer
lookup: average -5m unaligned of total
units: commands/s
every: 1m
warn: $this > (($status >= $WARNING) ? (1) : (5))
crit: $this > (($status == $CRITICAL) ? (5) : (20))
delay: down 5m multiplier 1.5 max 1h
summary: Data Explorer throttled commands on ${label:resource_name}
info: Rate of throttled commands on Azure Data Explorer cluster ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
Throttling indicates the cluster is overloaded.
to: sysadmin
# --- Query Performance ---
template: am_data_explorer_query_duration
on: azure_monitor.data_explorer.query_duration
class: Latency
type: Database
component: Data Explorer
lookup: average -5m unaligned of average
units: milliseconds
every: 1m
warn: $this > (($status >= $WARNING) ? (15000) : (30000))
crit: $this > (($status == $CRITICAL) ? (30000) : (60000))
delay: down 5m multiplier 1.5 max 1h
summary: Data Explorer query duration on ${label:resource_name}
info: Average query duration on Azure Data Explorer cluster ${label:resource_name} \
in ${label:resource_group} (${label:region})
to: sysadmin
template: am_data_explorer_throttled_queries
on: azure_monitor.data_explorer.throttled_queries
class: Errors
type: Database
component: Data Explorer
lookup: average -5m unaligned of total
units: queries/s
every: 1m
warn: $this > (($status >= $WARNING) ? (1) : (5))
crit: $this > (($status == $CRITICAL) ? (5) : (20))
delay: down 5m multiplier 1.5 max 1h
summary: Data Explorer throttled queries on ${label:resource_name}
info: Rate of throttled queries on Azure Data Explorer cluster ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
Throttling indicates insufficient query capacity.
to: sysadmin
# --- Ingestion Health ---
template: am_data_explorer_ingestion_latency
on: azure_monitor.data_explorer.ingestion_latency
class: Latency
type: Database
component: Data Explorer
lookup: average -5m unaligned of average
units: seconds
every: 1m
warn: $this > (($status >= $WARNING) ? (300) : (600))
crit: $this > (($status == $CRITICAL) ? (600) : (1800))
delay: down 5m multiplier 1.5 max 1h
summary: Data Explorer ingestion latency on ${label:resource_name}
info: Average ingestion latency on Azure Data Explorer cluster ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
High latency means data takes longer to become queryable.
to: sysadmin
template: am_data_explorer_events_dropped
on: azure_monitor.data_explorer.events
class: Errors
type: Database
component: Data Explorer
lookup: average -5m unaligned of dropped
units: events/s
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (1))
crit: $this > (($status == $CRITICAL) ? (1) : (10))
delay: down 5m multiplier 1.5 max 1h
summary: Data Explorer events dropped on ${label:resource_name}
info: Rate of dropped ingestion events on Azure Data Explorer cluster ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
Dropped events indicate data loss during ingestion.
to: sysadmin
template: am_data_explorer_blobs_dropped
on: azure_monitor.data_explorer.blobs
class: Errors
type: Database
component: Data Explorer
lookup: average -5m unaligned of dropped
units: blobs/s
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (1))
crit: $this > (($status == $CRITICAL) ? (1) : (10))
delay: down 5m multiplier 1.5 max 1h
summary: Data Explorer blobs dropped on ${label:resource_name}
info: Rate of dropped blobs during ingestion on Azure Data Explorer cluster ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
Dropped blobs indicate data loss during ingestion.
to: sysadmin
template: am_data_explorer_ingestion_queue_length
on: azure_monitor.data_explorer.ingestion_queue
class: Workload
type: Database
component: Data Explorer
lookup: average -5m unaligned of length
units: messages
every: 1m
warn: $this > (($status >= $WARNING) ? (500) : (1000))
crit: $this > (($status == $CRITICAL) ? (1000) : (5000))
delay: down 5m multiplier 1.5 max 1h
summary: Data Explorer ingestion queue on ${label:resource_name}
info: Average ingestion queue length on Azure Data Explorer cluster ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
A growing queue indicates ingestion is not keeping up with incoming data.
to: sysadmin
template: am_data_explorer_queue_oldest_message
on: azure_monitor.data_explorer.queue_oldest_message
class: Latency
type: Database
component: Data Explorer
lookup: average -5m unaligned of age
units: seconds
every: 1m
warn: $this > (($status >= $WARNING) ? (300) : (600))
crit: $this > (($status == $CRITICAL) ? (600) : (1800))
delay: down 5m multiplier 1.5 max 1h
summary: Data Explorer queue oldest message age on ${label:resource_name}
info: Age of the oldest message in the ingestion queue of Azure Data Explorer cluster ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
Old messages indicate a significant ingestion backlog.
to: sysadmin
# --- Export Health ---
template: am_data_explorer_export_utilization
on: azure_monitor.data_explorer.export_utilization
class: Utilization
type: Database
component: Data Explorer
lookup: average -5m unaligned of maximum
units: percentage
every: 1m
warn: $this > (($status >= $WARNING) ? (70) : (80))
crit: $this > (($status == $CRITICAL) ? (80) : (95))
delay: down 5m multiplier 1.5 max 1h
summary: Data Explorer export utilization on ${label:resource_name}
info: Export utilization of Azure Data Explorer cluster ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
High export utilization may cause export jobs to be delayed or fail.
to: sysadmin
template: am_data_explorer_continuous_export_pending
on: azure_monitor.data_explorer.continuous_export_pending
class: Workload
type: Database
component: Data Explorer
lookup: average -5m unaligned of maximum
units: jobs
every: 1m
warn: $this != nan AND $this > (($status >= $WARNING) ? (5) : (10))
crit: $this != nan AND $this > (($status == $CRITICAL) ? (10) : (50))
delay: down 5m multiplier 1.5 max 1h
summary: Data Explorer continuous export pending on ${label:resource_name}
info: Number of pending continuous export jobs on Azure Data Explorer cluster ${label:resource_name} \
in ${label:resource_group} (${label:region})
to: sysadmin
template: am_data_explorer_continuous_export_lateness
on: azure_monitor.data_explorer.continuous_export_lateness
class: Latency
type: Database
component: Data Explorer
lookup: average -5m unaligned of maximum
units: minutes
every: 1m
warn: $this != nan AND $this > (($status >= $WARNING) ? (10) : (30))
crit: $this != nan AND $this > (($status == $CRITICAL) ? (30) : (60))
delay: down 5m multiplier 1.5 max 1h
summary: Data Explorer continuous export lateness on ${label:resource_name}
info: Maximum continuous export lateness on Azure Data Explorer cluster ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
High lateness means exported data is significantly behind real-time.
to: sysadmin
# --- Streaming Ingest ---
template: am_data_explorer_streaming_ingest_utilization
on: azure_monitor.data_explorer.streaming_ingest_utilization
class: Utilization
type: Database
component: Data Explorer
lookup: average -5m unaligned of average
units: percentage
every: 1m
warn: $this != nan AND $this > (($status >= $WARNING) ? (70) : (80))
crit: $this != nan AND $this > (($status == $CRITICAL) ? (80) : (95))
delay: down 5m multiplier 1.5 max 1h
summary: Data Explorer streaming ingest utilization on ${label:resource_name}
info: Average streaming ingest utilization on Azure Data Explorer cluster ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
High utilization indicates the cluster is approaching streaming ingest capacity.
to: sysadmin
template: am_data_explorer_streaming_ingest_duration
on: azure_monitor.data_explorer.streaming_ingest_duration
class: Latency
type: Database
component: Data Explorer
lookup: average -5m unaligned of average
units: milliseconds
every: 1m
warn: $this != nan AND $this > (($status >= $WARNING) ? (500) : (1000))
crit: $this != nan AND $this > (($status == $CRITICAL) ? (1000) : (5000))
delay: down 5m multiplier 1.5 max 1h
summary: Data Explorer streaming ingest duration on ${label:resource_name}
info: Average streaming ingest duration on Azure Data Explorer cluster ${label:resource_name} \
in ${label:resource_group} (${label:region})
to: sysadmin
# --- Materialized Views ---
template: am_data_explorer_materialized_view_health
on: azure_monitor.data_explorer.materialized_view_health
class: Availability
type: Database
component: Data Explorer
lookup: average -5m unaligned of health
units: status
every: 1m
crit: $this != nan AND $this < 1
delay: down 5m multiplier 1.5 max 1h
summary: Data Explorer materialized view health on ${label:resource_name}
info: Materialized view health status on Azure Data Explorer cluster ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
A value below 1 indicates the materialized view is unhealthy.
to: sysadmin
template: am_data_explorer_materialized_view_age
on: azure_monitor.data_explorer.materialized_view_age
class: Latency
type: Database
component: Data Explorer
lookup: average -5m unaligned of minutes
units: minutes
every: 1m
warn: $this != nan AND $this > (($status >= $WARNING) ? (30) : (60))
crit: $this != nan AND $this > (($status == $CRITICAL) ? (60) : (120))
delay: down 5m multiplier 1.5 max 1h
summary: Data Explorer materialized view age on ${label:resource_name}
info: Age of the materialized view on Azure Data Explorer cluster ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
High age means the view is significantly behind the source data.
to: sysadmin
template: am_data_explorer_materialized_view_data_loss
on: azure_monitor.data_explorer.materialized_view_data_loss
class: Errors
type: Database
component: Data Explorer
lookup: max -5m unaligned of maximum
units: status
every: 1m
crit: $this != nan AND $this > 0
delay: down 5m multiplier 1.5 max 1h
summary: Data Explorer materialized view data loss on ${label:resource_name}
info: Materialized view is reporting data loss on Azure Data Explorer cluster ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
This indicates potential data inconsistency in the view.
to: sysadmin
# --- Follower Latency ---
template: am_data_explorer_follower_latency
on: azure_monitor.data_explorer.follower_latency
class: Latency
type: Database
component: Data Explorer
lookup: average -5m unaligned of average
units: milliseconds
every: 1m
warn: $this != nan AND $this > (($status >= $WARNING) ? (30000) : (60000))
crit: $this != nan AND $this > (($status == $CRITICAL) ? (60000) : (300000))
delay: down 5m multiplier 1.5 max 1h
summary: Data Explorer follower latency on ${label:resource_name}
info: Average follower replication latency on Azure Data Explorer cluster ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
High latency means follower databases are behind the leader.
to: sysadmin