monitor_server.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. #!/usr/bin/env python
  2. # -*- coding:utf-8 -*-
  3. '''
  4. 1, yum install pip
  5. yum -y install python-pip
  6. 2, pip install kazoo
  7. pip install kazoo
  8. or
  9. 3, conda install kazoo
  10. conda install -c conda-forge kazoo
  11. run script and parameter description:
  12. nohup python -u monitor_server.py /data1_1T/escheduler 192.168.xx.xx:2181,192.168.xx.xx:2181,192.168.xx.xx:2181 /escheduler/masters /escheduler/workers> monitor_server.log 2>&1 &
  13. the parameters are as follows:
  14. /data1_1T/escheduler : the value comes from the installPath in install.sh
  15. 192.168.xx.xx:2181,192.168.xx.xx:2181,192.168.xx.xx:2181 : the value comes from zkQuorum in install.sh
  16. the value comes from zkWorkers in install.sh
  17. /escheduler/masters : the value comes from zkMasters in install.sh
  18. /escheduler/workers : the value comes from zkWorkers in install.sh
  19. '''
  20. import sys
  21. import socket
  22. import os
  23. import sched
  24. import time
  25. from datetime import datetime
  26. from kazoo.client import KazooClient
  27. schedule = sched.scheduler(time.time, time.sleep)
  28. class ZkClient:
  29. def __init__(self):
  30. # hosts configuration zk address cluster
  31. self.zk = KazooClient(hosts=zookeepers)
  32. self.zk.start()
  33. # read configuration files and assemble them into a dictionary
  34. def read_file(self,path):
  35. with open(path, 'r') as f:
  36. dict = {}
  37. for line in f.readlines():
  38. arr = line.strip().split('=')
  39. if (len(arr) == 2):
  40. dict[arr[0]] = arr[1]
  41. return dict
  42. # get the ip address according to hostname
  43. def get_ip_by_hostname(self,hostname):
  44. return socket.gethostbyname(hostname)
  45. # restart server
  46. def restart_server(self,inc):
  47. config_dict = self.read_file(install_path + '/conf/config/run_config.conf')
  48. master_list = config_dict.get('masters').split(',')
  49. print master_list
  50. master_list = list(map(lambda item : self.get_ip_by_hostname(item),master_list))
  51. worker_list = config_dict.get('workers').split(',')
  52. print worker_list
  53. worker_list = list(map(lambda item: self.get_ip_by_hostname(item), worker_list))
  54. if (self.zk.exists(masters_zk_path)):
  55. zk_master_list = []
  56. zk_master_nodes = self.zk.get_children(masters_zk_path)
  57. for zk_master_node in zk_master_nodes:
  58. zk_master_list.append(zk_master_node.split('_')[0])
  59. restart_master_list = list(set(master_list) - set(zk_master_list))
  60. if (len(restart_master_list) != 0):
  61. for master in restart_master_list:
  62. print("master " + self.get_ip_by_hostname(master) + " server has down")
  63. os.system('ssh ' + self.get_ip_by_hostname(master) + ' sh ' + install_path + '/bin/escheduler-daemon.sh start master-server')
  64. if (self.zk.exists(workers_zk_path)):
  65. zk_worker_list = []
  66. zk_worker_nodes = self.zk.get_children(workers_zk_path)
  67. for zk_worker_node in zk_worker_nodes:
  68. zk_worker_list.append(zk_worker_node.split('_')[0])
  69. restart_worker_list = list(set(worker_list) - set(zk_worker_list))
  70. if (len(restart_worker_list) != 0):
  71. for worker in restart_worker_list:
  72. print("worker " + self.get_ip_by_hostname(worker) + " server has down")
  73. os.system('ssh ' + self.get_ip_by_hostname(worker) + ' sh ' + install_path + '/bin/escheduler-daemon.sh start worker-server')
  74. print(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
  75. schedule.enter(inc, 0, self.restart_server, (inc,))
  76. # default parameter 60s
  77. def main(self,inc=60):
  78. # the enter four parameters are: interval event, priority (sequence for simultaneous execution of two events arriving at the same time), function triggered by the call,
  79. # the argument to the trigger function (tuple form)
  80. schedule.enter(0, 0, self.restart_server, (inc,))
  81. schedule.run()
  82. if __name__ == '__main__':
  83. if (len(sys.argv) < 4):
  84. print('please input install_path,zookeepers,masters_zk_path and worker_zk_path')
  85. install_path = sys.argv[1]
  86. zookeepers = sys.argv[2]
  87. masters_zk_path = sys.argv[3]
  88. workers_zk_path = sys.argv[4]
  89. zkClient = ZkClient()
  90. zkClient.main(300)