monitor-server.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124
  1. #!/usr/bin/env python
  2. # -*- coding:utf-8 -*-
  3. #
  4. # Licensed to the Apache Software Foundation (ASF) under one or more
  5. # contributor license agreements. See the NOTICE file distributed with
  6. # this work for additional information regarding copyright ownership.
  7. # The ASF licenses this file to You under the Apache License, Version 2.0
  8. # (the "License"); you may not use this file except in compliance with
  9. # the License. You may obtain a copy of the License at
  10. #
  11. # http://www.apache.org/licenses/LICENSE-2.0
  12. #
  13. # Unless required by applicable law or agreed to in writing, software
  14. # distributed under the License is distributed on an "AS IS" BASIS,
  15. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. # See the License for the specific language governing permissions and
  17. # limitations under the License.
  18. #
  19. '''
  20. 1, yum install pip
  21. yum -y install python-pip
  22. 2, pip install kazoo
  23. pip install kazoo
  24. or
  25. 3, conda install kazoo
  26. conda install -c conda-forge kazoo
  27. run script and parameter description:
  28. nohup python -u monitor_server.py /data1_1T/dolphinscheduler 192.168.xx.xx:2181,192.168.xx.xx:2181,192.168.xx.xx:2181 /dolphinscheduler/masters /dolphinscheduler/workers> monitor_server.log 2>&1 &
  29. the parameters are as follows:
  30. /data1_1T/dolphinscheduler : the value comes from the installPath in install.sh
  31. 192.168.xx.xx:2181,192.168.xx.xx:2181,192.168.xx.xx:2181 : the value comes from zkQuorum in install.sh
  32. the value comes from zkWorkers in install.sh
  33. /dolphinscheduler/masters : the value comes from zkMasters in install.sh
  34. /dolphinscheduler/workers : the value comes from zkWorkers in install.sh
  35. '''
  36. import sys
  37. import socket
  38. import os
  39. import sched
  40. import time
  41. from datetime import datetime
  42. from kazoo.client import KazooClient
  43. schedule = sched.scheduler(time.time, time.sleep)
  44. class ZkClient:
  45. def __init__(self):
  46. # hosts configuration zk address cluster
  47. self.zk = KazooClient(hosts=zookeepers)
  48. self.zk.start()
  49. # read configuration files and assemble them into a dictionary
  50. def read_file(self,path):
  51. with open(path, 'r') as f:
  52. dict = {}
  53. for line in f.readlines():
  54. arr = line.strip().split('=')
  55. if (len(arr) == 2):
  56. dict[arr[0]] = arr[1]
  57. return dict
  58. # get the ip address according to hostname
  59. def get_ip_by_hostname(self,hostname):
  60. return socket.gethostbyname(hostname)
  61. # restart server
  62. def restart_server(self,inc):
  63. config_dict = self.read_file(install_path + '/conf/config/run_config.conf')
  64. master_list = config_dict.get('masters').split(',')
  65. print master_list
  66. master_list = list(map(lambda item : self.get_ip_by_hostname(item),master_list))
  67. worker_list = config_dict.get('workers').split(',')
  68. print worker_list
  69. worker_list = list(map(lambda item: self.get_ip_by_hostname(item), worker_list))
  70. ssh_port = config_dict.get("sshPort")
  71. print ssh_port
  72. if (self.zk.exists(masters_zk_path)):
  73. zk_master_list = []
  74. zk_master_nodes = self.zk.get_children(masters_zk_path)
  75. for zk_master_node in zk_master_nodes:
  76. zk_master_list.append(zk_master_node.split('_')[0])
  77. restart_master_list = list(set(master_list) - set(zk_master_list))
  78. if (len(restart_master_list) != 0):
  79. for master in restart_master_list:
  80. print("master " + self.get_ip_by_hostname(master) + " server has down")
  81. os.system('ssh -p ' + ssh_port + ' ' + self.get_ip_by_hostname(master) + ' sh ' + install_path + '/bin/dolphinscheduler-daemon.sh start master-server')
  82. if (self.zk.exists(workers_zk_path)):
  83. zk_worker_list = []
  84. zk_worker_nodes = self.zk.get_children(workers_zk_path)
  85. for zk_worker_node in zk_worker_nodes:
  86. zk_worker_list.append(zk_worker_node.split('_')[0])
  87. restart_worker_list = list(set(worker_list) - set(zk_worker_list))
  88. if (len(restart_worker_list) != 0):
  89. for worker in restart_worker_list:
  90. print("worker " + self.get_ip_by_hostname(worker) + " server has down")
  91. os.system('ssh -p ' + ssh_port + ' ' + self.get_ip_by_hostname(worker) + ' sh ' + install_path + '/bin/dolphinscheduler-daemon.sh start worker-server')
  92. print(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
  93. schedule.enter(inc, 0, self.restart_server, (inc,))
  94. # default parameter 60s
  95. def main(self,inc=60):
  96. # the enter four parameters are: interval event, priority (sequence for simultaneous execution of two events arriving at the same time), function triggered by the call,
  97. # the argument to the trigger function (tuple form)
  98. schedule.enter(0, 0, self.restart_server, (inc,))
  99. schedule.run()
  100. if __name__ == '__main__':
  101. if (len(sys.argv) < 4):
  102. print('please input install_path,zookeepers,masters_zk_path and worker_zk_path')
  103. install_path = sys.argv[1]
  104. zookeepers = sys.argv[2]
  105. masters_zk_path = sys.argv[3]
  106. workers_zk_path = sys.argv[4]
  107. zkClient = ZkClient()
  108. zkClient.main(300)