Kaynağa Gözat

[dev] add cluster info cleaner

tjq 4 yıl önce
ebeveyn
işleme
24d098d863

+ 2 - 2
others/script/jenkins_auto_build.sh

@@ -26,7 +26,7 @@ docker run -d \
        --name powerjob-server \
        -p 7700:7700 -p 10086:10086 -p 5001:5005 -p 10001:10000 \
        -e JVMOPTIONS="-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=5005 -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.port=10000 -Dcom.sun.management.jmxremote.rmi.port=10000 -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false" \
-       -e PARAMS="--spring.profiles.active=product --spring.datasource.core.jdbc-url=jdbc:mysql://124.70.67.79:3306/powerjob-product?useUnicode=true&characterEncoding=UTF-8 --spring.data.mongodb.uri=mongodb://124.70.67.79:27017/powerjob-product" \
+       -e PARAMS="--spring.profiles.active=product --spring.datasource.core.jdbc-url=jdbc:mysql://139.224.83.134:3306/powerjob-product?useUnicode=true&characterEncoding=UTF-8 --spring.data.mongodb.uri=mongodb://139.224.83.134:27017/powerjob-product" \
        -v ~/docker/powerjob-server:/root/powerjob-server -v ~/.m2:/root/.m2 \
        tjqq/powerjob-server:latest
 sleep 60
@@ -45,8 +45,8 @@ docker run -d \
        tjqq/powerjob-agent:latest
 
 docker run -d \
-       --name powerjob-agent2 \
        --restart=always \
+       --name powerjob-agent2 \
        -p 27778:27777 -p 5003:5005 -p 10003:10000 \
        -e JVMOPTIONS="-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=5005 -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.port=10000 -Dcom.sun.management.jmxremote.rmi.port=10000 -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false" \
        -e PARAMS="--app powerjob-agent-test --server $serverAddress" \

+ 1 - 1
powerjob-common/src/main/java/com/github/kfcfans/powerjob/common/model/InstanceDetail.java

@@ -43,7 +43,7 @@ public class InstanceDetail implements OmsSerializable {
         private String startTime;
         private String finishedTime;
         private String result;
-        private String status;
+        private int status;
     }
 
     // MapReduce 和 Broadcast 任务的 extra ->

+ 17 - 2
powerjob-server/src/main/java/com/github/kfcfans/powerjob/server/service/ha/ClusterStatusHolder.java

@@ -30,7 +30,7 @@ public class ClusterStatusHolder {
     // 集群中所有机器的最后心跳时间
     private Map<String, Long> address2ActiveTime;
 
-    private static final long WORKER_TIMEOUT_MS = 30000;
+    private static final long WORKER_TIMEOUT_MS = 60000;
 
     public ClusterStatusHolder(String appName) {
         this.appName = appName;
@@ -131,10 +131,25 @@ public class ClusterStatusHolder {
     /**
      * 释放所有本地存储的容器信息(该操作会导致短暂的 listDeployedContainer 服务不可用)
      */
-    public void releaseContainerInfos() {
+    public void release() {
         log.info("[ClusterStatusHolder-{}] clean the containerInfos, listDeployedContainer service may down about 1min~", appName);
         // 丢弃原来的所有数据,准备重建
         containerId2Infos = Maps.newConcurrentMap();
+
+        // 丢弃超时机器的信息
+        List<String> timeoutAddress = Lists.newLinkedList();
+        address2Metrics.forEach((addr, lastActiveTime) -> {
+            if (timeout(addr)) {
+                timeoutAddress.add(addr);
+            }
+        });
+        if (!timeoutAddress.isEmpty()) {
+            log.info("[ClusterStatusHolder-{}] detective timeout workers({}), try to release their infos.", appName, timeoutAddress);
+            timeoutAddress.forEach(addr -> {
+                address2ActiveTime.remove(addr);
+                address2Metrics.remove(addr);
+            });
+        }
     }
 
     private boolean timeout(String address) {

+ 3 - 3
powerjob-server/src/main/java/com/github/kfcfans/powerjob/server/service/ha/WorkerManagerService.java

@@ -94,9 +94,9 @@ public class WorkerManagerService {
     }
 
     /**
-     * 释放所有本地存储的容器信息(该操作会导致短暂的 listDeployedContainer 服务不可用)
+     * 清理缓存信息,防止 OOM
      */
-    public static void releaseContainerInfos() {
-        appId2ClusterStatus.values().forEach(ClusterStatusHolder::releaseContainerInfos);
+    public static void cleanUp() {
+        appId2ClusterStatus.values().forEach(ClusterStatusHolder::release);
     }
 }

+ 1 - 2
powerjob-server/src/main/java/com/github/kfcfans/powerjob/server/service/timing/CleanService.java

@@ -5,7 +5,6 @@ import com.github.kfcfans.powerjob.server.persistence.core.repository.InstanceIn
 import com.github.kfcfans.powerjob.server.persistence.core.repository.WorkflowInstanceInfoRepository;
 import com.github.kfcfans.powerjob.server.persistence.mongodb.GridFsManager;
 import com.github.kfcfans.powerjob.server.service.ha.WorkerManagerService;
-import com.github.kfcfans.powerjob.server.service.instance.InstanceManager;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Stopwatch;
 import lombok.extern.slf4j.Slf4j;
@@ -59,7 +58,7 @@ public class CleanService {
     public void timingClean() {
 
         // 释放本地缓存
-        WorkerManagerService.releaseContainerInfos();
+        WorkerManagerService.cleanUp();
 
         // 删除数据库运行记录
         cleanInstanceLog();

Dosya farkı çok büyük olduğundan ihmal edildi
+ 2 - 2
powerjob-server/src/main/resources/static/js/1.js


Dosya farkı çok büyük olduğundan ihmal edildi
+ 1 - 1
powerjob-server/src/main/resources/static/js/6.js


Dosya farkı çok büyük olduğundan ihmal edildi
+ 1 - 1
powerjob-server/src/main/resources/static/js/app.js


+ 21 - 10
powerjob-worker/src/main/java/com/github/kfcfans/powerjob/worker/common/utils/LRUCache.java

@@ -1,26 +1,37 @@
 package com.github.kfcfans.powerjob.worker.common.utils;
 
-import java.util.LinkedHashMap;
-import java.util.Map;
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
+
+import java.util.function.BiConsumer;
 
 /**
  * LRU(Least Recently Used) 缓存
+ * before v3.1.1 使用 LinkedHashMap,但存在修改时访问报错问题,改用 Guava
  *
  * @author tjq
  * @since 2020/4/8
  */
-public class LRUCache<K, V> extends LinkedHashMap<K, V> {
+public class LRUCache<K, V> {
 
-    private final int cacheSize;
+    private final Cache<K, V> innerCache;
 
     public LRUCache(int cacheSize) {
-        super((int) Math.ceil(cacheSize / 0.75) + 1, 0.75f, false);
-        this.cacheSize = cacheSize;
+        innerCache = CacheBuilder.newBuilder()
+                .concurrencyLevel(2)
+                .initialCapacity(cacheSize)
+                .build();
+    }
+
+    public void forEach(BiConsumer<? super K, ? super V> action) {
+        innerCache.asMap().forEach(action);
+    }
+
+    public V get(K key) {
+        return innerCache.getIfPresent(key);
     }
 
-    @Override
-    protected boolean removeEldestEntry(Map.Entry<K,V> eldest) {
-        // 超过阈值时返回true,进行LRU淘汰
-        return size() > cacheSize;
+    public void put(K key, V value) {
+        innerCache.put(key, value);
     }
 }

+ 3 - 3
powerjob-worker/src/main/java/com/github/kfcfans/powerjob/worker/core/tracker/task/FrequentTaskTracker.java

@@ -114,7 +114,7 @@ public class FrequentTaskTracker extends TaskTracker {
             InstanceDetail.SubInstanceDetail subDetail = new InstanceDetail.SubInstanceDetail();
             BeanUtils.copyProperties(subInstanceInfo, subDetail);
             InstanceStatus status = InstanceStatus.of(subInstanceInfo.status);
-            subDetail.setStatus(status.getDes());
+            subDetail.setStatus(status.getV());
             subDetail.setSubInstanceId(subId);
 
             // 设置时间
@@ -347,8 +347,8 @@ public class FrequentTaskTracker extends TaskTracker {
         subInstanceId2TimeHolder.remove(subInstanceId);
 
         // 更新缓存数据
-        if (recentSubInstanceInfo.containsKey(subInstanceId)) {
-            SubInstanceInfo subInstanceInfo = recentSubInstanceInfo.get(subInstanceId);
+        SubInstanceInfo subInstanceInfo = recentSubInstanceInfo.get(subInstanceId);
+        if (subInstanceInfo != null) {
             subInstanceInfo.status = success ? InstanceStatus.SUCCEED.getV() : InstanceStatus.FAILED.getV();
             subInstanceInfo.result = result;
             subInstanceInfo.finishedTime = System.currentTimeMillis();