Intrusion Exploit
Server: LiteSpeed
System: Linux cisadane.iixcp.rumahweb.net 5.14.0-427.42.1.el9_4.x86_64 #1 SMP PREEMPT_DYNAMIC Fri Nov 1 14:58:02 EDT 2024 x86_64
User: lenf4658 (1805)
PHP: 8.4.19
Disabled: NONE
Upload Files
File: //usr/lib/netdata/conf.d/health.d/azure_monitor_data_factory.conf
# you can disable an alarm notification by setting the 'to' line to: silent

# --- Pipeline Runs ---

 template: am_data_factory_pipeline_failed_runs
       on: azure_monitor.data_factory.pipeline_runs
    class: Errors
     type: Other
component: Azure Data Factory
   lookup: sum -5m unaligned of failed
    units: runs
    every: 1m
     warn: $this > (($status >= $WARNING) ? (0) : (5))
    delay: down 5m multiplier 1.5 max 1h
  summary: Data Factory pipeline failures on ${label:resource_name}
     info: Failed pipeline runs on Data Factory ${label:resource_name} \
           in ${label:resource_group} (${label:region}) over the last 5 minutes
       to: sysadmin

 template: am_data_factory_pipeline_cancelled_runs
       on: azure_monitor.data_factory.pipeline_runs
    class: Errors
     type: Other
component: Azure Data Factory
   lookup: sum -10m unaligned of cancelled
    units: runs
    every: 1m
     warn: $this > (($status >= $WARNING) ? (5) : (10))
    delay: down 5m multiplier 1.5 max 1h
  summary: Data Factory pipeline cancellations on ${label:resource_name}
     info: Cancelled pipeline runs on Data Factory ${label:resource_name} \
           in ${label:resource_group} (${label:region}) over the last 10 minutes. \
           Frequent cancellations may indicate configuration or dependency issues
       to: sysadmin

# --- Activity Runs ---

 template: am_data_factory_activity_failed_runs
       on: azure_monitor.data_factory.activity_runs
    class: Errors
     type: Other
component: Azure Data Factory
   lookup: sum -5m unaligned of failed
    units: runs
    every: 1m
     warn: $this > (($status >= $WARNING) ? (0) : (5))
    delay: down 5m multiplier 1.5 max 1h
  summary: Data Factory activity failures on ${label:resource_name}
     info: Failed activity runs on Data Factory ${label:resource_name} \
           in ${label:resource_group} (${label:region}) over the last 5 minutes
       to: sysadmin

# --- Trigger Runs ---

 template: am_data_factory_trigger_failed_runs
       on: azure_monitor.data_factory.trigger_runs
    class: Errors
     type: Other
component: Azure Data Factory
   lookup: sum -5m unaligned of failed
    units: runs
    every: 1m
     warn: $this > (($status >= $WARNING) ? (0) : (5))
    delay: down 5m multiplier 1.5 max 1h
  summary: Data Factory trigger failures on ${label:resource_name}
     info: Failed trigger runs on Data Factory ${label:resource_name} \
           in ${label:resource_group} (${label:region}) over the last 5 minutes
       to: sysadmin

# --- SSIS Integration Runtime ---

 template: am_data_factory_ssis_ir_start_failures
       on: azure_monitor.data_factory.ssis_ir_starts
    class: Errors
     type: Other
component: Azure Data Factory
   lookup: sum -5m unaligned of failed
    units: runs
    every: 1m
     warn: $this != nan AND $this > (($status >= $WARNING) ? (0) : (1))
    delay: down 5m multiplier 1.5 max 1h
  summary: Data Factory SSIS IR start failures on ${label:resource_name}
     info: Failed SSIS Integration Runtime start operations on Data Factory \
           ${label:resource_name} in ${label:resource_group} (${label:region})
       to: sysadmin

 template: am_data_factory_ssis_ir_stop_stuck
       on: azure_monitor.data_factory.ssis_ir_stops
    class: Errors
     type: Other
component: Azure Data Factory
   lookup: sum -5m unaligned of stuck
    units: runs
    every: 1m
     warn: $this != nan AND $this > 0
    delay: down 5m multiplier 1.5 max 1h
  summary: Data Factory SSIS IR stuck stops on ${label:resource_name}
     info: SSIS Integration Runtime stop operations stuck on Data Factory \
           ${label:resource_name} in ${label:resource_group} (${label:region})
       to: sysadmin

 template: am_data_factory_ssis_package_failures
       on: azure_monitor.data_factory.ssis_package_executions
    class: Errors
     type: Other
component: Azure Data Factory
   lookup: sum -5m unaligned of failed
    units: executions
    every: 1m
     warn: $this != nan AND $this > (($status >= $WARNING) ? (0) : (1))
    delay: down 5m multiplier 1.5 max 1h
  summary: Data Factory SSIS package failures on ${label:resource_name}
     info: Failed SSIS package executions on Data Factory ${label:resource_name} \
           in ${label:resource_group} (${label:region})
       to: sysadmin

# --- Integration Runtime Resources ---

 template: am_data_factory_ir_cpu
       on: azure_monitor.data_factory.ir_cpu
    class: Utilization
     type: Other
component: Azure Data Factory
   lookup: average -5m unaligned of average
    units: percentage
    every: 1m
     warn: $this > (($status >= $WARNING)  ? (75) : (85))
     crit: $this > (($status == $CRITICAL) ? (85) : (95))
    delay: down 5m multiplier 1.5 max 1h
  summary: Data Factory IR CPU on ${label:resource_name}
     info: Integration Runtime CPU utilization on Data Factory ${label:resource_name} \
           in ${label:resource_group} (${label:region})
       to: sysadmin

 template: am_data_factory_ir_queue_length
       on: azure_monitor.data_factory.ir_queue
    class: Workload
     type: Other
component: Azure Data Factory
   lookup: average -5m unaligned of queue_length
    units: tasks
    every: 1m
     warn: $this > (($status >= $WARNING)  ? (10) : (20))
     crit: $this > (($status == $CRITICAL) ? (20) : (50))
    delay: down 5m multiplier 1.5 max 1h
  summary: Data Factory IR queue depth on ${label:resource_name}
     info: Integration Runtime queue length on Data Factory ${label:resource_name} \
           in ${label:resource_group} (${label:region}). \
           Growing queues indicate the runtime cannot keep up with submitted work
       to: sysadmin

 template: am_data_factory_ir_task_pickup_delay
       on: azure_monitor.data_factory.ir_task_pickup_delay
    class: Latency
     type: Other
component: Azure Data Factory
   lookup: average -5m unaligned of average
    units: seconds
    every: 1m
     warn: $this > (($status >= $WARNING)  ? (30) : (60))
     crit: $this > (($status == $CRITICAL) ? (60) : (120))
    delay: down 5m multiplier 1.5 max 1h
  summary: Data Factory IR task pickup delay on ${label:resource_name}
     info: Average task pickup delay for Integration Runtime on Data Factory \
           ${label:resource_name} in ${label:resource_group} (${label:region}). \
           High delay indicates insufficient runtime capacity
       to: sysadmin

# --- Factory Capacity ---

 template: am_data_factory_size_utilization
       on: azure_monitor.data_factory.factory_size
    class: Utilization
     type: Other
component: Azure Data Factory
     calc: ($max_allowed > 0) ? ($current * 100 / $max_allowed) : (0)
    units: %
    every: 5m
     warn: $this > (($status >= $WARNING)  ? (70) : (80))
     crit: $this > (($status == $CRITICAL) ? (80) : (90))
    delay: down 5m multiplier 1.5 max 1h
  summary: Data Factory size utilization on ${label:resource_name}
     info: Factory size as percentage of maximum allowed on Data Factory \
           ${label:resource_name} in ${label:resource_group} (${label:region})
       to: sysadmin

 template: am_data_factory_entity_utilization
       on: azure_monitor.data_factory.entity_count
    class: Utilization
     type: Other
component: Azure Data Factory
     calc: ($max_allowed > 0) ? ($current * 100 / $max_allowed) : (0)
    units: %
    every: 5m
     warn: $this > (($status >= $WARNING)  ? (70) : (80))
     crit: $this > (($status == $CRITICAL) ? (80) : (90))
    delay: down 5m multiplier 1.5 max 1h
  summary: Data Factory entity count on ${label:resource_name}
     info: Entity count (pipelines, datasets, etc.) as percentage of maximum allowed \
           on Data Factory ${label:resource_name} in ${label:resource_group} (${label:region})
       to: sysadmin

# --- MVNet IR Capacity ---

 template: am_data_factory_mvnet_ir_copy_utilization
       on: azure_monitor.data_factory.mvnet_ir_copy_capacity
    class: Utilization
     type: Other
component: Azure Data Factory
   lookup: average -5m unaligned of utilization
    units: percentage
    every: 1m
     warn: $this != nan AND $this > (($status >= $WARNING)  ? (70) : (80))
     crit: $this != nan AND $this > (($status == $CRITICAL) ? (80) : (90))
    delay: down 5m multiplier 1.5 max 1h
  summary: Data Factory MVNet IR copy utilization on ${label:resource_name}
     info: Managed VNet Integration Runtime copy capacity utilization on Data Factory \
           ${label:resource_name} in ${label:resource_group} (${label:region})
       to: sysadmin

 template: am_data_factory_mvnet_ir_external_utilization
       on: azure_monitor.data_factory.mvnet_ir_external_capacity
    class: Utilization
     type: Other
component: Azure Data Factory
   lookup: average -5m unaligned of utilization
    units: percentage
    every: 1m
     warn: $this != nan AND $this > (($status >= $WARNING)  ? (70) : (80))
     crit: $this != nan AND $this > (($status == $CRITICAL) ? (80) : (90))
    delay: down 5m multiplier 1.5 max 1h
  summary: Data Factory MVNet IR external utilization on ${label:resource_name}
     info: Managed VNet Integration Runtime external activity capacity utilization \
           on Data Factory ${label:resource_name} in ${label:resource_group} (${label:region})
       to: sysadmin

 template: am_data_factory_mvnet_ir_pipeline_utilization
       on: azure_monitor.data_factory.mvnet_ir_pipeline_capacity
    class: Utilization
     type: Other
component: Azure Data Factory
   lookup: average -5m unaligned of utilization
    units: percentage
    every: 1m
     warn: $this != nan AND $this > (($status >= $WARNING)  ? (70) : (80))
     crit: $this != nan AND $this > (($status == $CRITICAL) ? (80) : (90))
    delay: down 5m multiplier 1.5 max 1h
  summary: Data Factory MVNet IR pipeline utilization on ${label:resource_name}
     info: Managed VNet Integration Runtime pipeline capacity utilization \
           on Data Factory ${label:resource_name} in ${label:resource_group} (${label:region})
       to: sysadmin

# --- Airflow IR Resources ---

 template: am_data_factory_airflow_ir_cpu
       on: azure_monitor.data_factory.airflow_ir_cpu
    class: Utilization
     type: Other
component: Azure Data Factory
   lookup: average -5m unaligned of percentage
    units: percentage
    every: 1m
     warn: $this != nan AND $this > (($status >= $WARNING)  ? (75) : (85))
     crit: $this != nan AND $this > (($status == $CRITICAL) ? (85) : (95))
    delay: down 5m multiplier 1.5 max 1h
  summary: Data Factory Airflow IR CPU on ${label:resource_name}
     info: Airflow Integration Runtime CPU utilization on Data Factory \
           ${label:resource_name} in ${label:resource_group} (${label:region})
       to: sysadmin

 template: am_data_factory_airflow_ir_memory
       on: azure_monitor.data_factory.airflow_ir_memory
    class: Utilization
     type: Other
component: Azure Data Factory
   lookup: average -5m unaligned of percentage
    units: percentage
    every: 1m
     warn: $this != nan AND $this > (($status >= $WARNING)  ? (75) : (85))
     crit: $this != nan AND $this > (($status == $CRITICAL) ? (85) : (95))
    delay: down 5m multiplier 1.5 max 1h
  summary: Data Factory Airflow IR memory on ${label:resource_name}
     info: Airflow Integration Runtime memory utilization on Data Factory \
           ${label:resource_name} in ${label:resource_group} (${label:region})
       to: sysadmin

# --- Airflow IR DAG Errors ---

 template: am_data_factory_airflow_ir_dag_errors
       on: azure_monitor.data_factory.airflow_ir_dag_errors
    class: Errors
     type: Other
component: Azure Data Factory
   lookup: sum -5m unaligned of callback_exceptions,file_refresh,import
    units: errors
    every: 1m
     warn: $this != nan AND $this > (($status >= $WARNING) ? (0) : (5))
    delay: down 5m multiplier 1.5 max 1h
  summary: Data Factory Airflow DAG errors on ${label:resource_name}
     info: DAG processing errors (callback exceptions, file refresh errors, import errors) \
           on Airflow IR of Data Factory ${label:resource_name} \
           in ${label:resource_group} (${label:region})
       to: sysadmin

# --- Airflow IR Operators ---

 template: am_data_factory_airflow_ir_operator_failures
       on: azure_monitor.data_factory.airflow_ir_operators
    class: Errors
     type: Other
component: Azure Data Factory
   lookup: sum -5m unaligned of failures
    units: operations
    every: 1m
     warn: $this != nan AND $this > (($status >= $WARNING) ? (0) : (5))
    delay: down 5m multiplier 1.5 max 1h
  summary: Data Factory Airflow operator failures on ${label:resource_name}
     info: Failed Airflow operator executions on Data Factory ${label:resource_name} \
           in ${label:resource_group} (${label:region})
       to: sysadmin

# --- Airflow IR Jobs ---

 template: am_data_factory_airflow_ir_job_heartbeat_failures
       on: azure_monitor.data_factory.airflow_ir_jobs
    class: Errors
     type: Other
component: Azure Data Factory
   lookup: sum -5m unaligned of heartbeat_failures
    units: failures
    every: 1m
     warn: $this != nan AND $this > (($status >= $WARNING) ? (0) : (1))
    delay: down 5m multiplier 1.5 max 1h
  summary: Data Factory Airflow job heartbeat failures on ${label:resource_name}
     info: Airflow job heartbeat failures on Data Factory ${label:resource_name} \
           in ${label:resource_group} (${label:region}). \
           Heartbeat failures indicate scheduler or worker health issues
       to: sysadmin

# --- Airflow IR Pool Starvation ---

 template: am_data_factory_airflow_ir_pool_starving
       on: azure_monitor.data_factory.airflow_ir_pool_starving
    class: Workload
     type: Other
component: Azure Data Factory
   lookup: sum -5m unaligned of starving
    units: tasks
    every: 1m
     warn: $this != nan AND $this > (($status >= $WARNING) ? (0) : (5))
    delay: down 5m multiplier 1.5 max 1h
  summary: Data Factory Airflow pool starvation on ${label:resource_name}
     info: Starving tasks in Airflow pool on Data Factory ${label:resource_name} \
           in ${label:resource_group} (${label:region}). \
           Tasks are waiting for pool slots, consider increasing pool size
       to: sysadmin

# --- Airflow IR Scheduler Tasks ---

 template: am_data_factory_airflow_ir_tasks_killed_externally
       on: azure_monitor.data_factory.airflow_ir_scheduler_tasks
    class: Errors
     type: Other
component: Azure Data Factory
   lookup: sum -5m unaligned of killed_externally
    units: tasks
    every: 1m
     warn: $this != nan AND $this > (($status >= $WARNING) ? (0) : (3))
    delay: down 5m multiplier 1.5 max 1h
  summary: Data Factory Airflow tasks killed externally on ${label:resource_name}
     info: Tasks killed externally by the Airflow scheduler on Data Factory \
           ${label:resource_name} in ${label:resource_group} (${label:region}). \
           May indicate OOM kills or infrastructure issues
       to: sysadmin

 template: am_data_factory_airflow_ir_tasks_starving
       on: azure_monitor.data_factory.airflow_ir_scheduler_tasks
    class: Workload
     type: Other
component: Azure Data Factory
   lookup: sum -5m unaligned of starving
    units: tasks
    every: 1m
     warn: $this != nan AND $this > (($status >= $WARNING) ? (0) : (10))
    delay: down 5m multiplier 1.5 max 1h
  summary: Data Factory Airflow scheduler starving tasks on ${label:resource_name}
     info: Starving tasks reported by Airflow scheduler on Data Factory \
           ${label:resource_name} in ${label:resource_group} (${label:region}). \
           Tasks cannot be scheduled due to resource constraints
       to: sysadmin

# --- Airflow IR Task Instances ---

 template: am_data_factory_airflow_ir_task_instance_failures
       on: azure_monitor.data_factory.airflow_ir_task_instances
    class: Errors
     type: Other
component: Azure Data Factory
   lookup: sum -5m unaligned of failed
    units: instances
    every: 1m
     warn: $this != nan AND $this > (($status >= $WARNING) ? (0) : (5))
    delay: down 5m multiplier 1.5 max 1h
  summary: Data Factory Airflow task failures on ${label:resource_name}
     info: Failed Airflow task instances on Data Factory ${label:resource_name} \
           in ${label:resource_group} (${label:region})
       to: sysadmin

# --- Airflow IR Trigger Issues ---

 template: am_data_factory_airflow_ir_trigger_issues
       on: azure_monitor.data_factory.airflow_ir_trigger_issues
    class: Errors
     type: Other
component: Azure Data Factory
   lookup: sum -5m unaligned of blocked_main_thread,celery_timeout_errors
    units: events
    every: 1m
     warn: $this != nan AND $this > (($status >= $WARNING) ? (0) : (3))
    delay: down 5m multiplier 1.5 max 1h
  summary: Data Factory Airflow trigger issues on ${label:resource_name}
     info: Trigger issues (blocked main thread, Celery timeouts) on Airflow IR \
           of Data Factory ${label:resource_name} in ${label:resource_group} (${label:region})
       to: sysadmin

# --- Airflow IR Zombies ---

 template: am_data_factory_airflow_ir_zombies
       on: azure_monitor.data_factory.airflow_ir_zombies
    class: Errors
     type: Other
component: Azure Data Factory
   lookup: sum -5m unaligned of killed
    units: tasks
    every: 1m
     warn: $this != nan AND $this > (($status >= $WARNING) ? (0) : (3))
    delay: down 5m multiplier 1.5 max 1h
  summary: Data Factory Airflow zombie tasks on ${label:resource_name}
     info: Zombie tasks killed by Airflow on Data Factory ${label:resource_name} \
           in ${label:resource_group} (${label:region}). \
           Zombies occur when tasks are marked running but no process is executing them
       to: sysadmin