alerts.json 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. {
  2. "DOLPHIN": {
  3. "service": [],
  4. "DOLPHIN_API": [
  5. {
  6. "name": "dolphin_api_port_check",
  7. "label": "dolphin_api_port_check",
  8. "description": "dolphin_api_port_check.",
  9. "interval": 10,
  10. "scope": "ANY",
  11. "source": {
  12. "type": "PORT",
  13. "uri": "{{dolphin-application-api/server.port}}",
  14. "default_port": 12345,
  15. "reporting": {
  16. "ok": {
  17. "text": "TCP OK - {0:.3f}s response on port {1}"
  18. },
  19. "warning": {
  20. "text": "TCP OK - {0:.3f}s response on port {1}",
  21. "value": 1.5
  22. },
  23. "critical": {
  24. "text": "Connection failed: {0} to {1}:{2}",
  25. "value": 5.0
  26. }
  27. }
  28. }
  29. }
  30. ],
  31. "DOLPHIN_LOGGER": [
  32. {
  33. "name": "dolphin_logger_port_check",
  34. "label": "dolphin_logger_port_check",
  35. "description": "dolphin_logger_port_check.",
  36. "interval": 10,
  37. "scope": "ANY",
  38. "source": {
  39. "type": "PORT",
  40. "uri": "{{dolphin-common/loggerserver.rpc.port}}",
  41. "default_port": 50051,
  42. "reporting": {
  43. "ok": {
  44. "text": "TCP OK - {0:.3f}s response on port {1}"
  45. },
  46. "warning": {
  47. "text": "TCP OK - {0:.3f}s response on port {1}",
  48. "value": 1.5
  49. },
  50. "critical": {
  51. "text": "Connection failed: {0} to {1}:{2}",
  52. "value": 5.0
  53. }
  54. }
  55. }
  56. }
  57. ],
  58. "DOLPHIN_MASTER": [
  59. {
  60. "name": "DOLPHIN_MASTER_CHECK",
  61. "label": "check dolphin scheduler master status",
  62. "description": "",
  63. "interval":10,
  64. "scope": "HOST",
  65. "enabled": true,
  66. "source": {
  67. "type": "SCRIPT",
  68. "path": "DOLPHIN/1.3.3/package/alerts/alert_dolphin_scheduler_status.py",
  69. "parameters": [
  70. {
  71. "name": "connection.timeout",
  72. "display_name": "Connection Timeout",
  73. "value": 5.0,
  74. "type": "NUMERIC",
  75. "description": "The maximum time before this alert is considered to be CRITICAL",
  76. "units": "seconds",
  77. "threshold": "CRITICAL"
  78. },
  79. {
  80. "name": "alertName",
  81. "display_name": "alertName",
  82. "value": "DOLPHIN_MASTER",
  83. "type": "STRING",
  84. "description": "alert name"
  85. }
  86. ]
  87. }
  88. }
  89. ],
  90. "DOLPHIN_WORKER": [
  91. {
  92. "name": "DOLPHIN_WORKER_CHECK",
  93. "label": "check dolphin scheduler worker status",
  94. "description": "",
  95. "interval":10,
  96. "scope": "HOST",
  97. "enabled": true,
  98. "source": {
  99. "type": "SCRIPT",
  100. "path": "DOLPHIN/1.3.3/package/alerts/alert_dolphin_scheduler_status.py",
  101. "parameters": [
  102. {
  103. "name": "connection.timeout",
  104. "display_name": "Connection Timeout",
  105. "value": 5.0,
  106. "type": "NUMERIC",
  107. "description": "The maximum time before this alert is considered to be CRITICAL",
  108. "units": "seconds",
  109. "threshold": "CRITICAL"
  110. },
  111. {
  112. "name": "alertName",
  113. "display_name": "alertName",
  114. "value": "DOLPHIN_WORKER",
  115. "type": "STRING",
  116. "description": "alert name"
  117. }
  118. ]
  119. }
  120. }
  121. ],
  122. "DOLPHIN_ALERT": [
  123. {
  124. "name": "DOLPHIN_DOLPHIN_ALERT_CHECK",
  125. "label": "check dolphin scheduler alert status",
  126. "description": "",
  127. "interval":10,
  128. "scope": "HOST",
  129. "enabled": true,
  130. "source": {
  131. "type": "SCRIPT",
  132. "path": "DOLPHIN/1.3.3/package/alerts/alert_dolphin_scheduler_status.py",
  133. "parameters": [
  134. {
  135. "name": "connection.timeout",
  136. "display_name": "Connection Timeout",
  137. "value": 5.0,
  138. "type": "NUMERIC",
  139. "description": "The maximum time before this alert is considered to be CRITICAL",
  140. "units": "seconds",
  141. "threshold": "CRITICAL"
  142. },
  143. {
  144. "name": "alertName",
  145. "display_name": "alertName",
  146. "value": "DOLPHIN_ALERT",
  147. "type": "STRING",
  148. "description": "alert name"
  149. }
  150. ]
  151. }
  152. }
  153. ]
  154. }
  155. }