monitor_server.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. #!/usr/bin/env python
  2. # -*- coding:utf-8 -*-
  3. # Author:qiaozhanwei
  4. '''
  5. yum 安装pip
  6. yum -y install python-pip
  7. pip install kazoo 安装
  8. conda install -c conda-forge kazoo 安装
  9. 运行脚本:
  10. /data1_1T/escheduler的值来自install.sh中的installPath
  11. 192.168.xx.xx:2181,192.168.xx.xx:2181,192.168.xx.xx:2181的值来自install.sh中的zkQuorum
  12. /escheduler/masters的值来自install.sh中的zkMasters
  13. /escheduler/workers的值来自install.sh中的zkWorkers
  14. nohup python -u monitor_server.py /data1_1T/escheduler 192.168.xx.xx:2181,192.168.xx.xx:2181,192.168.xx.xx:2181 /escheduler/masters /escheduler/workers> nohup.out 2>&1 &
  15. '''
  16. import sys
  17. import socket
  18. import os
  19. import sched
  20. import time
  21. from datetime import datetime
  22. from kazoo.client import KazooClient
  23. schedule = sched.scheduler(time.time, time.sleep)
  24. class ZkClient:
  25. def __init__(self):
  26. # hosts配置zk地址集群
  27. #self.zk = KazooClient(hosts='192.168.220.188:2181,192.168.220.189:2181,192.168.220.190:2181')
  28. print zookeepers
  29. #zookeepers1 = zookeepers
  30. self.zk = KazooClient(hosts=zookeepers)
  31. print "ready start"
  32. self.zk.start()
  33. # 读取配置文件,组装成字典
  34. def read_file(self,path):
  35. with open(path, 'r') as f:
  36. dict = {}
  37. for line in f.readlines():
  38. arr = line.strip().split('=')
  39. if (len(arr) == 2):
  40. dict[arr[0]] = arr[1]
  41. return dict
  42. # 根据hostname获取ip地址
  43. def get_ip_by_hostname(self,hostname):
  44. return socket.gethostbyname(hostname)
  45. # 重启服务
  46. def restart_server(self,inc):
  47. config_dict = self.read_file(install_path + '/conf/config/run_config.conf')
  48. master_list = config_dict.get('masters').split(',')
  49. print master_list
  50. master_list = list(map(lambda item : self.get_ip_by_hostname(item),master_list))
  51. worker_list = config_dict.get('workers').split(',')
  52. print worker_list
  53. worker_list = list(map(lambda item: self.get_ip_by_hostname(item), worker_list))
  54. if (self.zk.exists(masters_zk_path)):
  55. zk_master_list = []
  56. zk_master_nodes = self.zk.get_children(masters_zk_path)
  57. for zk_master_node in zk_master_nodes:
  58. zk_master_list.append(zk_master_node.split('_')[0])
  59. restart_master_list = list(set(master_list) - set(zk_master_list))
  60. if (len(restart_master_list) != 0):
  61. for master in restart_master_list:
  62. print("master " + self.get_ip_by_hostname(master) + " 服务已经掉了")
  63. os.system('ssh ' + self.get_ip_by_hostname(master) + ' sh ' + install_path + '/bin/escheduler-daemon.sh start master-server')
  64. if (self.zk.exists(workers_zk_path)):
  65. zk_worker_list = []
  66. zk_worker_nodes = self.zk.get_children(workers_zk_path)
  67. for zk_worker_node in zk_worker_nodes:
  68. zk_worker_list.append(zk_worker_node.split('_')[0])
  69. restart_worker_list = list(set(worker_list) - set(zk_worker_list))
  70. if (len(restart_worker_list) != 0):
  71. for worker in restart_worker_list:
  72. print("worker " + self.get_ip_by_hostname(worker) + " 服务已经掉了")
  73. os.system('ssh ' + self.get_ip_by_hostname(worker) + ' sh ' + install_path + '/bin/escheduler-daemon.sh start worker-server')
  74. print(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
  75. schedule.enter(inc, 0, self.restart_server, (inc,))
  76. # 默认参数60s
  77. def main(self,inc=60):
  78. # enter四个参数分别为:间隔事件、优先级(用于同时间到达的两个事件同时执行时定序)、被调用触发的函数,
  79. # 给该触发函数的参数(tuple形式)
  80. schedule.enter(0, 0, self.restart_server, (inc,))
  81. schedule.run()
  82. if __name__ == '__main__':
  83. if (len(sys.argv) < 4):
  84. print('please input install_path,zookeepers,masters_zk_path and worker_zk_path')
  85. install_path = sys.argv[1]
  86. #zookeepers = "'" + sys.argv[2] + "'"
  87. zookeepers = sys.argv[2]
  88. masters_zk_path = sys.argv[3]
  89. workers_zk_path = sys.argv[4]
  90. zkClient = ZkClient()
  91. zkClient.main(300)