File: //usr/lib/netdata/conf.d/health.d/azure_monitor_postgres_flexible.conf
# you can disable an alarm notification by setting the 'to' line to: silent
# --- Availability ---
template: am_postgres_flexible_availability
on: azure_monitor.postgres_flexible.availability
class: Availability
type: Database
component: Azure PostgreSQL Flexible
lookup: average -5m unaligned of maximum
units: state
every: 1m
crit: $this < 1
delay: down 5m multiplier 1.5 max 1h
summary: PostgreSQL Flexible Server down on ${label:resource_name}
info: Database is not alive on Azure PostgreSQL Flexible Server ${label:resource_name} \
in ${label:resource_group} (${label:region})
to: dba
# --- Utilization ---
template: am_postgres_flexible_cpu
on: azure_monitor.postgres_flexible.cpu
class: Utilization
type: Database
component: Azure PostgreSQL Flexible
lookup: average -5m unaligned of average
units: percentage
every: 1m
warn: $this > (($status >= $WARNING) ? (75) : (85))
crit: $this > (($status == $CRITICAL) ? (85) : (95))
delay: down 5m multiplier 1.5 max 1h
summary: PostgreSQL Flexible CPU on ${label:resource_name}
info: Average CPU utilization on Azure PostgreSQL Flexible Server ${label:resource_name} \
in ${label:resource_group} (${label:region})
to: dba
template: am_postgres_flexible_memory
on: azure_monitor.postgres_flexible.memory
class: Utilization
type: Database
component: Azure PostgreSQL Flexible
lookup: average -5m unaligned of average
units: percentage
every: 1m
warn: $this > (($status >= $WARNING) ? (80) : (90))
crit: $this > (($status == $CRITICAL) ? (90) : (95))
delay: down 5m multiplier 1.5 max 1h
summary: PostgreSQL Flexible memory on ${label:resource_name}
info: Average memory utilization on Azure PostgreSQL Flexible Server ${label:resource_name} \
in ${label:resource_group} (${label:region})
to: dba
# --- Storage ---
template: am_postgres_flexible_storage_utilization
on: azure_monitor.postgres_flexible.storage_utilization
class: Utilization
type: Database
component: Azure PostgreSQL Flexible
lookup: average -5m unaligned of average
units: percentage
every: 1m
warn: $this > (($status >= $WARNING) ? (75) : (85))
crit: $this > (($status == $CRITICAL) ? (85) : (95))
delay: down 5m multiplier 1.5 max 1h
summary: PostgreSQL Flexible storage on ${label:resource_name}
info: Storage utilization on Azure PostgreSQL Flexible Server ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
Running out of storage causes the server to become read-only
to: dba
# --- I/O ---
template: am_postgres_flexible_disk_bandwidth_saturation
on: azure_monitor.postgres_flexible.disk_saturation
class: Utilization
type: Database
component: Azure PostgreSQL Flexible
lookup: average -5m unaligned of bandwidth
units: percentage
every: 1m
warn: $this > (($status >= $WARNING) ? (75) : (85))
crit: $this > (($status == $CRITICAL) ? (85) : (95))
delay: down 5m multiplier 1.5 max 1h
summary: PostgreSQL Flexible disk bandwidth saturation on ${label:resource_name}
info: Disk bandwidth consumed percentage on Azure PostgreSQL Flexible Server ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
Sustained high values indicate I/O throttling
to: dba
template: am_postgres_flexible_disk_iops_saturation
on: azure_monitor.postgres_flexible.disk_saturation
class: Utilization
type: Database
component: Azure PostgreSQL Flexible
lookup: average -5m unaligned of iops
units: percentage
every: 1m
warn: $this > (($status >= $WARNING) ? (75) : (85))
crit: $this > (($status == $CRITICAL) ? (85) : (95))
delay: down 5m multiplier 1.5 max 1h
summary: PostgreSQL Flexible disk IOPS saturation on ${label:resource_name}
info: Disk IOPS consumed percentage on Azure PostgreSQL Flexible Server ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
Sustained high values indicate I/O throttling
to: dba
template: am_postgres_flexible_disk_queue_depth
on: azure_monitor.postgres_flexible.disk_queue_depth
class: Workload
type: Database
component: Azure PostgreSQL Flexible
lookup: average -5m unaligned of average
units: operations
every: 1m
warn: $this > (($status >= $WARNING) ? (32) : (64))
crit: $this > (($status == $CRITICAL) ? (64) : (128))
delay: down 5m multiplier 1.5 max 1h
summary: PostgreSQL Flexible disk queue depth on ${label:resource_name}
info: Disk queue depth on Azure PostgreSQL Flexible Server ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
High queue depth indicates I/O subsystem is saturated
to: dba
# --- Connections ---
template: am_postgres_flexible_failed_connections
on: azure_monitor.postgres_flexible.connection_rate
class: Errors
type: Database
component: Azure PostgreSQL Flexible
lookup: average -5m unaligned of failed
units: connections/s
every: 1m
warn: $this > (($status >= $WARNING) ? (1) : (5))
crit: $this > (($status == $CRITICAL) ? (5) : (20))
delay: down 5m multiplier 1.5 max 1h
summary: PostgreSQL Flexible failed connections on ${label:resource_name}
info: Rate of failed connection attempts on Azure PostgreSQL Flexible Server ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
May indicate authentication failures, connection limit exhaustion, or network issues
to: dba
template: am_postgres_flexible_tcp_connection_backlog
on: azure_monitor.postgres_flexible.tcp_connection_backlog
class: Workload
type: Database
component: Azure PostgreSQL Flexible
lookup: average -5m unaligned of maximum
units: connections
every: 1m
warn: $this != nan AND $this > (($status >= $WARNING) ? (50) : (100))
crit: $this != nan AND $this > (($status == $CRITICAL) ? (100) : (200))
delay: down 5m multiplier 1.5 max 1h
summary: PostgreSQL Flexible TCP connection backlog on ${label:resource_name}
info: TCP connection backlog on Azure PostgreSQL Flexible Server ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
High backlog indicates the server cannot accept connections fast enough
to: dba
# --- Transactions ---
template: am_postgres_flexible_deadlocks
on: azure_monitor.postgres_flexible.deadlocks
class: Errors
type: Database
component: Azure PostgreSQL Flexible
lookup: average -5m unaligned of total
units: deadlocks/s
every: 1m
warn: $this != nan AND $this > (($status >= $WARNING) ? (0) : (1))
delay: down 5m multiplier 1.5 max 1h
summary: PostgreSQL Flexible deadlocks on ${label:resource_name}
info: Deadlock rate on Azure PostgreSQL Flexible Server ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
Deadlocks indicate conflicting lock acquisition patterns in concurrent transactions
to: dba
template: am_postgres_flexible_rollback_ratio
on: azure_monitor.postgres_flexible.transactions
class: Errors
type: Database
component: Azure PostgreSQL Flexible
lookup: sum -5m unaligned of committed,rolled_back
calc: ($this > 100) ? ($rolled_back * 100 / $this) : (0)
units: %
every: 1m
warn: $this != nan AND $this > (($status >= $WARNING) ? (3) : (5))
crit: $this != nan AND $this > (($status == $CRITICAL) ? (10) : (20))
delay: down 5m multiplier 1.5 max 1h
summary: PostgreSQL Flexible rollback ratio on ${label:resource_name}
info: Percentage of rolled back transactions on Azure PostgreSQL Flexible Server ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
High rollback rates indicate application errors or excessive contention
to: dba
# --- Latency / Long Running ---
template: am_postgres_flexible_longest_query
on: azure_monitor.postgres_flexible.long_running
class: Latency
type: Database
component: Azure PostgreSQL Flexible
lookup: max -5m unaligned of query
units: seconds
every: 1m
warn: $this != nan AND $this > (($status >= $WARNING) ? (300) : (600))
crit: $this != nan AND $this > (($status == $CRITICAL) ? (600) : (1800))
delay: down 5m multiplier 1.5 max 1h
summary: PostgreSQL Flexible long running query on ${label:resource_name}
info: Longest running query duration on Azure PostgreSQL Flexible Server ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
Long running queries can hold locks and bloat WAL
to: dba
template: am_postgres_flexible_longest_transaction
on: azure_monitor.postgres_flexible.long_running
class: Latency
type: Database
component: Azure PostgreSQL Flexible
lookup: max -5m unaligned of transaction
units: seconds
every: 1m
warn: $this != nan AND $this > (($status >= $WARNING) ? (300) : (600))
crit: $this != nan AND $this > (($status == $CRITICAL) ? (600) : (1800))
delay: down 5m multiplier 1.5 max 1h
summary: PostgreSQL Flexible long running transaction on ${label:resource_name}
info: Longest running transaction duration on Azure PostgreSQL Flexible Server ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
Long running transactions prevent autovacuum from reclaiming dead tuples
to: dba
# --- Safety (Transaction ID wraparound) ---
template: am_postgres_flexible_xid_usage
on: azure_monitor.postgres_flexible.xid_usage
class: Utilization
type: Database
component: Azure PostgreSQL Flexible
lookup: average -5m unaligned of max_used
units: transactions
every: 1m
warn: $this > (($status >= $WARNING) ? (500000000) : (1000000000))
crit: $this > (($status == $CRITICAL) ? (1000000000) : (1500000000))
delay: down 5m multiplier 1.5 max 1h
summary: PostgreSQL Flexible transaction ID usage on ${label:resource_name}
info: Maximum used transaction IDs on Azure PostgreSQL Flexible Server ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
PostgreSQL wraps around at ~2.1 billion XIDs. High values require urgent VACUUM FREEZE
to: dba
template: am_postgres_flexible_xmin_age
on: azure_monitor.postgres_flexible.xmin_age
class: Utilization
type: Database
component: Azure PostgreSQL Flexible
lookup: average -5m unaligned of maximum
units: transactions
every: 1m
warn: $this != nan AND $this > (($status >= $WARNING) ? (200000000) : (500000000))
crit: $this != nan AND $this > (($status == $CRITICAL) ? (500000000) : (1000000000))
delay: down 5m multiplier 1.5 max 1h
summary: PostgreSQL Flexible backend xmin age on ${label:resource_name}
info: Oldest backend xmin age on Azure PostgreSQL Flexible Server ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
A large xmin age prevents autovacuum from cleaning dead tuples and increases XID wraparound risk
to: dba
# --- Bloat ---
template: am_postgres_flexible_bloat
on: azure_monitor.postgres_flexible.bloat
class: Utilization
type: Database
component: Azure PostgreSQL Flexible
lookup: average -5m unaligned of maximum
units: percentage
every: 1m
warn: $this != nan AND $this > (($status >= $WARNING) ? (40) : (50))
crit: $this != nan AND $this > (($status == $CRITICAL) ? (60) : (70))
delay: down 5m multiplier 1.5 max 1h
summary: PostgreSQL Flexible table bloat on ${label:resource_name}
info: Table bloat percentage on Azure PostgreSQL Flexible Server ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
High bloat wastes storage and degrades query performance. Consider running VACUUM FULL
to: dba
# --- Replication ---
template: am_postgres_flexible_replication_lag
on: azure_monitor.postgres_flexible.replication_lag_time
class: Latency
type: Database
component: Azure PostgreSQL Flexible
lookup: average -5m unaligned of average
units: seconds
every: 1m
warn: $this != nan AND $this > (($status >= $WARNING) ? (10) : (30))
crit: $this != nan AND $this > (($status == $CRITICAL) ? (30) : (60))
delay: down 5m multiplier 1.5 max 1h
summary: PostgreSQL Flexible replication lag on ${label:resource_name}
info: Physical replication lag on Azure PostgreSQL Flexible Server ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
High lag means replicas serve stale data
to: dba
# --- CPU Credits (burstable tiers) ---
template: am_postgres_flexible_cpu_credits_remaining
on: azure_monitor.postgres_flexible.cpu_credits
class: Utilization
type: Database
component: Azure PostgreSQL Flexible
lookup: average -5m unaligned of remaining
units: credits
every: 1m
warn: $this != nan AND $this < (($status >= $WARNING) ? (30) : (20))
crit: $this != nan AND $this < (($status == $CRITICAL) ? (20) : (10))
delay: down 5m multiplier 1.5 max 1h
summary: PostgreSQL Flexible CPU credits low on ${label:resource_name}
info: Remaining CPU credits on Azure PostgreSQL Flexible Server ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
Only applicable to burstable tiers. When credits are exhausted, CPU is capped at baseline
to: dba
# --- Temp Files ---
template: am_postgres_flexible_temp_bytes
on: azure_monitor.postgres_flexible.temp_bytes
class: Workload
type: Database
component: Azure PostgreSQL Flexible
lookup: average -5m unaligned of total
units: bytes/s
every: 1m
warn: $this != nan AND $this > (($status >= $WARNING) ? (52428800) : (104857600))
crit: $this != nan AND $this > (($status == $CRITICAL) ? (104857600) : (209715200))
delay: down 5m multiplier 1.5 max 1h
summary: PostgreSQL Flexible temp file I/O on ${label:resource_name}
info: Rate of temporary file bytes written on Azure PostgreSQL Flexible Server ${label:resource_name} \
in ${label:resource_group} (${label:region}). \
Excessive temp file usage indicates queries spilling to disk due to insufficient work_mem
to: dba