|
@@ -1,124 +0,0 @@
|
|
|
-#!/usr/bin/env python
|
|
|
-# -*- coding:utf-8 -*-
|
|
|
-#
|
|
|
-# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
|
-# contributor license agreements. See the NOTICE file distributed with
|
|
|
-# this work for additional information regarding copyright ownership.
|
|
|
-# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
|
-# (the "License"); you may not use this file except in compliance with
|
|
|
-# the License. You may obtain a copy of the License at
|
|
|
-#
|
|
|
-# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
-#
|
|
|
-# Unless required by applicable law or agreed to in writing, software
|
|
|
-# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
-# See the License for the specific language governing permissions and
|
|
|
-# limitations under the License.
|
|
|
-#
|
|
|
-
|
|
|
-'''
|
|
|
-1, yum install pip
|
|
|
-yum -y install python-pip
|
|
|
-
|
|
|
-2, pip install kazoo
|
|
|
-pip install kazoo
|
|
|
-
|
|
|
-or
|
|
|
-
|
|
|
-3, conda install kazoo
|
|
|
-conda install -c conda-forge kazoo
|
|
|
-
|
|
|
-run script and parameter description:
|
|
|
-nohup python -u monitor_server.py /data1_1T/dolphinscheduler 192.168.xx.xx:2181,192.168.xx.xx:2181,192.168.xx.xx:2181 /dolphinscheduler/masters /dolphinscheduler/workers> monitor_server.log 2>&1 &
|
|
|
-the parameters are as follows:
|
|
|
-/data1_1T/dolphinscheduler : the value comes from the installPath in install.sh
|
|
|
-192.168.xx.xx:2181,192.168.xx.xx:2181,192.168.xx.xx:2181 : the value comes from zkQuorum in install.sh
|
|
|
-the value comes from zkWorkers in install.sh
|
|
|
-/dolphinscheduler/masters : the value comes from zkMasters in install.sh
|
|
|
-/dolphinscheduler/workers : the value comes from zkWorkers in install.sh
|
|
|
-'''
|
|
|
-import sys
|
|
|
-import socket
|
|
|
-import os
|
|
|
-import sched
|
|
|
-import time
|
|
|
-from datetime import datetime
|
|
|
-from kazoo.client import KazooClient
|
|
|
-
|
|
|
-schedule = sched.scheduler(time.time, time.sleep)
|
|
|
-
|
|
|
-class ZkClient:
|
|
|
- def __init__(self):
|
|
|
- # hosts configuration zk address cluster
|
|
|
- self.zk = KazooClient(hosts=zookeepers)
|
|
|
- self.zk.start()
|
|
|
-
|
|
|
- # read configuration files and assemble them into a dictionary
|
|
|
- def read_file(self,path):
|
|
|
- with open(path, 'r') as f:
|
|
|
- dict = {}
|
|
|
- for line in f.readlines():
|
|
|
- arr = line.strip().split('=')
|
|
|
- if (len(arr) == 2):
|
|
|
- dict[arr[0]] = arr[1]
|
|
|
- return dict
|
|
|
-
|
|
|
- # get the ip address according to hostname
|
|
|
- def get_ip_by_hostname(self,hostname):
|
|
|
- return socket.gethostbyname(hostname)
|
|
|
-
|
|
|
- # restart server
|
|
|
- def restart_server(self,inc):
|
|
|
- config_dict = self.read_file(install_path + '/conf/config/run_config.conf')
|
|
|
-
|
|
|
- master_list = config_dict.get('masters').split(',')
|
|
|
- print master_list
|
|
|
- master_list = list(map(lambda item : self.get_ip_by_hostname(item),master_list))
|
|
|
-
|
|
|
- worker_list = config_dict.get('workers').split(',')
|
|
|
- print worker_list
|
|
|
- worker_list = list(map(lambda item: self.get_ip_by_hostname(item), worker_list))
|
|
|
-
|
|
|
- ssh_port = config_dict.get("sshPort")
|
|
|
- print ssh_port
|
|
|
-
|
|
|
- if (self.zk.exists(masters_zk_path)):
|
|
|
- zk_master_list = []
|
|
|
- zk_master_nodes = self.zk.get_children(masters_zk_path)
|
|
|
- for zk_master_node in zk_master_nodes:
|
|
|
- zk_master_list.append(zk_master_node.split('_')[0])
|
|
|
- restart_master_list = list(set(master_list) - set(zk_master_list))
|
|
|
- if (len(restart_master_list) != 0):
|
|
|
- for master in restart_master_list:
|
|
|
- print("master " + self.get_ip_by_hostname(master) + " server has down")
|
|
|
- os.system('ssh -p ' + ssh_port + ' ' + self.get_ip_by_hostname(master) + ' sh ' + install_path + '/bin/dolphinscheduler-daemon.sh start master-server')
|
|
|
-
|
|
|
- if (self.zk.exists(workers_zk_path)):
|
|
|
- zk_worker_list = []
|
|
|
- zk_worker_nodes = self.zk.get_children(workers_zk_path)
|
|
|
- for zk_worker_node in zk_worker_nodes:
|
|
|
- zk_worker_list.append(zk_worker_node.split('_')[0])
|
|
|
- restart_worker_list = list(set(worker_list) - set(zk_worker_list))
|
|
|
- if (len(restart_worker_list) != 0):
|
|
|
- for worker in restart_worker_list:
|
|
|
- print("worker " + self.get_ip_by_hostname(worker) + " server has down")
|
|
|
- os.system('ssh -p ' + ssh_port + ' ' + self.get_ip_by_hostname(worker) + ' sh ' + install_path + '/bin/dolphinscheduler-daemon.sh start worker-server')
|
|
|
-
|
|
|
- print(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
|
|
|
- schedule.enter(inc, 0, self.restart_server, (inc,))
|
|
|
- # default parameter 60s
|
|
|
- def main(self,inc=60):
|
|
|
- # the enter four parameters are: interval event, priority (sequence for simultaneous execution of two events arriving at the same time), function triggered by the call,
|
|
|
- # the argument to the trigger function (tuple form)
|
|
|
- schedule.enter(0, 0, self.restart_server, (inc,))
|
|
|
- schedule.run()
|
|
|
-if __name__ == '__main__':
|
|
|
- if (len(sys.argv) < 4):
|
|
|
- print('please input install_path,zookeepers,masters_zk_path and worker_zk_path')
|
|
|
- install_path = sys.argv[1]
|
|
|
- zookeepers = sys.argv[2]
|
|
|
- masters_zk_path = sys.argv[3]
|
|
|
- workers_zk_path = sys.argv[4]
|
|
|
- zkClient = ZkClient()
|
|
|
- zkClient.main(300)
|