Przeglądaj źródła

add monitorServerState

ligang 6 lat temu
rodzic
commit
5739eb0de2
2 zmienionych plików z 38 dodań i 13 usunięć
  1. 9 0
      install.sh
  2. 29 13
      script/monitor_server.py

+ 9 - 0
install.sh

@@ -98,6 +98,8 @@ xlsFilePath="/tmp/xls"
 # 不启动设置为false,如果为false,以下配置不需要修改
 hdfsStartupSate="false"
 
+#是否启动自启动脚本
+monitorServerState="true"
 # namenode地址,支持HA,需要将core-site.xml和hdfs-site.xml放到conf目录下
 namenodeFs="hdfs://mycluster:8020"
 
@@ -364,3 +366,10 @@ fi
 # 6,启动
 echo "6,启动"
 sh ${workDir}/script/start_all.sh
+
+# 7启动自启动脚本
+if [ "true" = $monitorServerState ];then
+        echo 'start monitor server'
+        nohup python -u ${workDir}/script/monitor_server.py $installPath $zkQuorum $zkMasters $zkWorkers > ${workDir}/monitor_server.log 2>&1 &
+fi
+

+ 29 - 13
script/monitor_server.py

@@ -10,9 +10,13 @@ pip install kazoo 安装
 conda install -c conda-forge kazoo 安装
 
 运行脚本:
-nohup python -u monitor_server.py > nohup.out 2>&1 &
+/data1_1T/escheduler的值来自install.sh中的installPath
+192.168.xx.xx:2181,192.168.xx.xx:2181,192.168.xx.xx:2181的值来自install.sh中的zkQuorum
+/escheduler/masters的值来自install.sh中的zkMasters
+/escheduler/workers的值来自install.sh中的zkWorkers
+nohup python -u monitor_server.py /data1_1T/escheduler 192.168.xx.xx:2181,192.168.xx.xx:2181,192.168.xx.xx:2181 /escheduler/masters /escheduler/workers> nohup.out 2>&1 &
 '''
-
+import sys
 import socket
 import os
 import sched
@@ -20,14 +24,17 @@ import time
 from datetime import datetime
 from kazoo.client import KazooClient
 
-
 schedule = sched.scheduler(time.time, time.sleep)
 
 class ZkClient:
     def __init__(self):
         # hosts配置zk地址集群
-        self.zk = KazooClient(hosts='ark0:2181,ark1:2181,ark2:2181')
-        self.zk.start()
+        #self.zk = KazooClient(hosts='192.168.220.188:2181,192.168.220.189:2181,192.168.220.190:2181')
+        print zookeepers
+	#zookeepers1 = zookeepers
+	self.zk = KazooClient(hosts=zookeepers)
+        print "ready start"
+	self.zk.start()
 
     # 读取配置文件,组装成字典
     def read_file(self,path):
@@ -45,35 +52,37 @@ class ZkClient:
 
     # 重启服务
     def restart_server(self,inc):
-        config_dict = self.read_file('/data1_1T/escheduler/conf/config/run_config.conf')
+        config_dict = self.read_file(install_path + '/conf/config/run_config.conf')
 
         master_list = config_dict.get('masters').split(',')
+        print master_list
         master_list = list(map(lambda item : self.get_ip_by_hostname(item),master_list))
 
         worker_list = config_dict.get('workers').split(',')
+	print worker_list
         worker_list = list(map(lambda item: self.get_ip_by_hostname(item), worker_list))
 
-        if (self.zk.exists('/escheduler/masters')):
+        if (self.zk.exists(masters_zk_path)):
             zk_master_list = []
-            zk_master_nodes = self.zk.get_children('/escheduler/masters')
+            zk_master_nodes = self.zk.get_children(masters_zk_path)
             for zk_master_node in zk_master_nodes:
                 zk_master_list.append(zk_master_node.split('_')[0])
             restart_master_list = list(set(master_list) - set(zk_master_list))
             if (len(restart_master_list) != 0):
                 for master in restart_master_list:
                     print("master " + self.get_ip_by_hostname(master) + " 服务已经掉了")
-                    os.system('ssh ' + self.get_ip_by_hostname(master) + ' sh /data1_1T/escheduler/bin/escheduler-daemon.sh start master-server')
+                    os.system('ssh ' + self.get_ip_by_hostname(master) + ' sh ' + install_path + '/bin/escheduler-daemon.sh start master-server')
 
-        if (self.zk.exists('/escheduler/workers')):
+        if (self.zk.exists(workers_zk_path)):
             zk_worker_list = []
-            zk_worker_nodes = self.zk.get_children('/escheduler/workers')
+            zk_worker_nodes = self.zk.get_children(workers_zk_path)
             for zk_worker_node in zk_worker_nodes:
                 zk_worker_list.append(zk_worker_node.split('_')[0])
             restart_worker_list = list(set(worker_list) - set(zk_worker_list))
             if (len(restart_worker_list) != 0):
                 for worker in restart_worker_list:
                     print("worker " + self.get_ip_by_hostname(worker) + " 服务已经掉了")
-                    os.system('ssh  ' + self.get_ip_by_hostname(worker) + ' sh /data1_1T/escheduler/bin/escheduler-daemon.sh start worker-server')
+                    os.system('ssh  ' + self.get_ip_by_hostname(worker) + ' sh ' + install_path + '/bin/escheduler-daemon.sh start worker-server')
 
         print(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
         schedule.enter(inc, 0, self.restart_server, (inc,))
@@ -84,5 +93,12 @@ class ZkClient:
         schedule.enter(0, 0, self.restart_server, (inc,))
         schedule.run()
 if __name__ == '__main__':
+    if (len(sys.argv) < 4):
+        print('please input install_path,zookeepers,masters_zk_path and worker_zk_path')
+    install_path = sys.argv[1]
+    #zookeepers = "'" + sys.argv[2] + "'"
+    zookeepers = sys.argv[2]
+    masters_zk_path = sys.argv[3]
+    workers_zk_path = sys.argv[4]
     zkClient = ZkClient()
-    zkClient.main(300)
+    zkClient.main(300)