Procházet zdrojové kódy

[Feature-13429][Remote Logging] Add support for writing task logs to Google Cloud Storage (#13777)

Rick Cheng před 2 roky
rodič
revize
7ee66f2d02

+ 11 - 0
docs/docs/en/guide/remote-logging.md

@@ -50,3 +50,14 @@ remote.logging.s3.endpoint=<endpoint>
 remote.logging.s3.region=<region>
 ```
 
+## Writing task logs to [Google Cloud Storage (GCS)](https://cloud.google.com/storage)
+
+Configure `common.properties` as follows:
+
+```properties
+# the location of the google cloud credential, required if you set remote.logging.target=GCS
+remote.logging.google.cloud.storage.credential=/path/to/credential
+# gcs bucket name, required if you set remote.logging.target=GCS
+remote.logging.google.cloud.storage.bucket.name=<your-bucket>
+```
+

+ 12 - 1
docs/docs/zh/guide/remote-logging.md

@@ -10,7 +10,7 @@ Apache DolphinScheduler支持将任务日志传输到远端存储上。当配置
 ```properties
 # 是否开启远程日志存储
 remote.logging.enable=true
-# 任务日志写入的远端存储,目前支持OSS, S3
+# 任务日志写入的远端存储,目前支持OSS, S3, GCS
 remote.logging.target=OSS
 # 任务日志在远端存储上的目录
 remote.logging.base.dir=logs
@@ -50,3 +50,14 @@ remote.logging.s3.endpoint=<endpoint>
 remote.logging.s3.region=<region>
 ```
 
+## 将任务日志写入[Google Cloud Storage (GCS)](https://cloud.google.com/storage)
+
+配置`common.propertis`如下:
+
+```properties
+# the location of the google cloud credential, required if you set remote.logging.target=GCS
+remote.logging.google.cloud.storage.credential=/path/to/credential
+# gcs bucket name, required if you set remote.logging.target=GCS
+remote.logging.google.cloud.storage.bucket.name=<your-bucket>
+```
+

+ 1 - 0
dolphinscheduler-api/src/main/resources/logback-spring.xml

@@ -18,6 +18,7 @@
 
 <configuration scan="true" scanPeriod="120 seconds">
     <property name="log.base" value="logs"/>
+    <property scope="context" name="log.base.ctx" value="${log.base}" />
 
     <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
         <encoder>

+ 28 - 0
dolphinscheduler-common/pom.xml

@@ -129,5 +129,33 @@
             <version>6.1.26</version>
             <scope>test</scope>
         </dependency>
+
+        <dependency>
+            <groupId>com.google.cloud</groupId>
+            <artifactId>google-cloud-storage</artifactId>
+            <optional>true</optional>
+            <exclusions>
+                <exclusion>
+                    <groupId>org.codehaus.mojo</groupId>
+                    <artifactId>animal-sniffer-annotations</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>io.perfmark</groupId>
+                    <artifactId>perfmark-api</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>com.google.api.grpc</groupId>
+                    <artifactId>proto-google-common-protos</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>com.google.protobuf</groupId>
+                    <artifactId>protobuf-java</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>com.google.protobuf</groupId>
+                    <artifactId>protobuf-java-util</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
     </dependencies>
 </project>

+ 7 - 0
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/constants/Constants.java

@@ -839,4 +839,11 @@ public final class Constants {
     public static final String REMOTE_LOGGING_S3_ENDPOINT = "remote.logging.s3.endpoint";
 
     public static final String REMOTE_LOGGING_S3_REGION = "remote.logging.s3.region";
+
+    /**
+     * remote logging for GCS
+     */
+    public static final String REMOTE_LOGGING_GCS_CREDENTIAL = "remote.logging.google.cloud.storage.credential";
+
+    public static final String REMOTE_LOGGING_GCS_BUCKET_NAME = "remote.logging.google.cloud.storage.bucket.name";
 }

+ 153 - 0
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/log/remote/GcsRemoteLogHandler.java

@@ -0,0 +1,153 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.dolphinscheduler.common.log.remote;
+
+import org.apache.dolphinscheduler.common.constants.Constants;
+import org.apache.dolphinscheduler.common.utils.PropertyUtils;
+
+import org.apache.commons.lang3.StringUtils;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+
+import lombok.extern.slf4j.Slf4j;
+
+import com.google.auth.oauth2.ServiceAccountCredentials;
+import com.google.cloud.storage.Blob;
+import com.google.cloud.storage.BlobId;
+import com.google.cloud.storage.BlobInfo;
+import com.google.cloud.storage.Bucket;
+import com.google.cloud.storage.Storage;
+import com.google.cloud.storage.StorageOptions;
+
+@Slf4j
+public class GcsRemoteLogHandler implements RemoteLogHandler, Closeable {
+
+    private Storage gcsStorage;
+
+    private String bucketName;
+
+    private String credential;
+
+    private static GcsRemoteLogHandler instance;
+
+    private GcsRemoteLogHandler() {
+
+    }
+
+    public static synchronized GcsRemoteLogHandler getInstance() {
+        if (instance == null) {
+            instance = new GcsRemoteLogHandler();
+            instance.init();
+        }
+
+        return instance;
+    }
+
+    public void init() {
+        try {
+            credential = readCredentials();
+            bucketName = readBucketName();
+            gcsStorage = buildGcsStorage(credential);
+
+            checkBucketNameExists(bucketName);
+        } catch (IOException e) {
+            log.error("GCS Remote Log Handler init failed", e);
+        }
+    }
+
+    @Override
+    public void close() throws IOException {
+        try {
+            if (gcsStorage != null) {
+                gcsStorage.close();
+            }
+        } catch (Exception e) {
+            throw new IOException(e);
+        }
+    }
+
+    @Override
+    public void sendRemoteLog(String logPath) {
+        String objectName = RemoteLogUtils.getObjectNameFromLogPath(logPath);
+
+        try {
+            log.info("send remote log {} to GCS {}", logPath, objectName);
+
+            BlobInfo blobInfo = BlobInfo.newBuilder(
+                    BlobId.of(bucketName, objectName)).build();
+
+            gcsStorage.create(blobInfo, Files.readAllBytes(Paths.get(logPath)));
+        } catch (Exception e) {
+            log.error("error while sending remote log {} to GCS {}", logPath, objectName, e);
+        }
+    }
+
+    @Override
+    public void getRemoteLog(String logPath) {
+        String objectName = RemoteLogUtils.getObjectNameFromLogPath(logPath);
+
+        try {
+            log.info("get remote log on GCS {} to {}", objectName, logPath);
+
+            Blob blob = gcsStorage.get(BlobId.of(bucketName, objectName));
+            blob.downloadTo(Paths.get(logPath));
+        } catch (Exception e) {
+            log.error("error while getting remote log on GCS {} to {}", objectName, logPath, e);
+        }
+    }
+
+    protected Storage buildGcsStorage(String credential) throws IOException {
+        return StorageOptions.newBuilder()
+                .setCredentials(ServiceAccountCredentials.fromStream(
+                        Files.newInputStream(Paths.get(credential))))
+                .build()
+                .getService();
+    }
+
+    protected String readCredentials() {
+        return PropertyUtils.getString(Constants.REMOTE_LOGGING_GCS_CREDENTIAL);
+    }
+
+    protected String readBucketName() {
+        return PropertyUtils.getString(Constants.REMOTE_LOGGING_GCS_BUCKET_NAME);
+    }
+
+    public void checkBucketNameExists(String bucketName) {
+        if (StringUtils.isBlank(bucketName)) {
+            throw new IllegalArgumentException(Constants.REMOTE_LOGGING_GCS_BUCKET_NAME + " is blank");
+        }
+
+        boolean exist = false;
+        for (Bucket bucket : gcsStorage.list().iterateAll()) {
+            if (bucketName.equals(bucket.getName())) {
+                exist = true;
+                break;
+            }
+        }
+
+        if (!exist) {
+            log.error(
+                    "bucketName: {} does not exist, you need to create them by yourself", bucketName);
+        } else {
+            log.info("bucketName: {} has been found", bucketName);
+        }
+    }
+}

+ 2 - 0
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/log/remote/RemoteLogHandlerFactory.java

@@ -37,6 +37,8 @@ public class RemoteLogHandlerFactory {
             return OssRemoteLogHandler.getInstance();
         } else if ("S3".equals(target)) {
             return S3RemoteLogHandler.getInstance();
+        } else if ("GCS".equals(target)) {
+            return GcsRemoteLogHandler.getInstance();
         }
 
         log.error("No suitable remote logging target for {}", target);

+ 7 - 10
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/log/remote/RemoteLogUtils.java

@@ -17,6 +17,8 @@
 
 package org.apache.dolphinscheduler.common.log.remote;
 
+import static org.apache.dolphinscheduler.common.utils.LogUtils.getLocalLogBaseDir;
+
 import org.apache.dolphinscheduler.common.constants.Constants;
 import org.apache.dolphinscheduler.common.utils.PropertyUtils;
 
@@ -36,8 +38,6 @@ public class RemoteLogUtils {
 
     private static RemoteLogService remoteLogService;
 
-    private static final int OBJECT_NAME_COUNT = 2;
-
     @Autowired
     private RemoteLogService autowiredRemoteLogService;
 
@@ -79,16 +79,13 @@ public class RemoteLogUtils {
     }
 
     public static String getObjectNameFromLogPath(String logPath) {
+        Path localLogBaseDirPath = Paths.get(getLocalLogBaseDir()).toAbsolutePath();
+
         Path path = Paths.get(logPath);
         int nameCount = path.getNameCount();
 
-        String logBaseDir = PropertyUtils.getString(Constants.REMOTE_LOGGING_BASE_DIR);
-
-        if (nameCount < OBJECT_NAME_COUNT) {
-            return Paths.get(logBaseDir, logPath).toString();
-        } else {
-            return Paths.get(logBaseDir, path.subpath(nameCount - OBJECT_NAME_COUNT, nameCount).toString())
-                    .toString();
-        }
+        String remoteLogBaseDir = PropertyUtils.getString(Constants.REMOTE_LOGGING_BASE_DIR);
+        return Paths.get(remoteLogBaseDir, path.subpath(localLogBaseDirPath.getNameCount(), nameCount).toString())
+                .toString();
     }
 }

+ 9 - 0
dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/utils/LogUtils.java

@@ -35,6 +35,10 @@ import java.util.stream.Stream;
 
 import lombok.extern.slf4j.Slf4j;
 
+import org.slf4j.LoggerFactory;
+
+import ch.qos.logback.classic.LoggerContext;
+
 @Slf4j
 public class LogUtils {
 
@@ -160,4 +164,9 @@ public class LogUtils {
 
         return builder.toString();
     }
+    public static String getLocalLogBaseDir() {
+        LoggerContext loggerContext = (LoggerContext) LoggerFactory.getILoggerFactory();
+        return loggerContext.getProperty("log.base.ctx");
+    }
+
 }

+ 4 - 0
dolphinscheduler-common/src/main/resources/common.properties

@@ -173,4 +173,8 @@ remote.logging.s3.bucket.name=<bucket.name>
 remote.logging.s3.endpoint=<endpoint>
 # s3 region, required if you set remote.logging.target=S3
 remote.logging.s3.region=<region>
+# the location of the google cloud credential, required if you set remote.logging.target=GCS
+remote.logging.google.cloud.storage.credential=/path/to/credential
+# gcs bucket name, required if you set remote.logging.target=GCS
+remote.logging.google.cloud.storage.bucket.name=<your-bucket>
 

+ 17 - 4
dolphinscheduler-common/src/test/java/org/apache/dolphinscheduler/common/log/remote/RemoteLogHandlerTest.java

@@ -18,8 +18,14 @@
 package org.apache.dolphinscheduler.common.log.remote;
 
 import org.apache.dolphinscheduler.common.constants.Constants;
+import org.apache.dolphinscheduler.common.utils.LogUtils;
 import org.apache.dolphinscheduler.common.utils.PropertyUtils;
 
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+import lombok.extern.slf4j.Slf4j;
+
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.extension.ExtendWith;
@@ -27,20 +33,27 @@ import org.mockito.MockedStatic;
 import org.mockito.Mockito;
 import org.mockito.junit.jupiter.MockitoExtension;
 
+@Slf4j
 @ExtendWith(MockitoExtension.class)
 public class RemoteLogHandlerTest {
 
     @Test
     public void testGetObjectNameFromLogPath() {
-        final String logPath = "/path/to/dolphinscheduler/logs/20230116/8245922982496_1-1-3.log";
-        final String expectedObjectName = "logs/20230116/8245922982496_1-1-3.log";
+        Path currentRelativePath = Paths.get("");
+        String currentDir = currentRelativePath.toAbsolutePath().toString();
+        final String logPath = currentDir + "/logs/20230116/8245922982496/1/1/1.log";
+        log.info("logPath is: {}", logPath);
 
-        try (MockedStatic<PropertyUtils> propertyUtilsMockedStatic = Mockito.mockStatic(PropertyUtils.class)) {
+        final String expectedObjectName = "logs/20230116/8245922982496/1/1/1.log";
+
+        try (
+                MockedStatic<PropertyUtils> propertyUtilsMockedStatic = Mockito.mockStatic(PropertyUtils.class);
+                MockedStatic<LogUtils> remoteLogUtilsMockedStatic = Mockito.mockStatic(LogUtils.class)) {
             propertyUtilsMockedStatic.when(() -> PropertyUtils.getString(Constants.REMOTE_LOGGING_BASE_DIR))
                     .thenReturn("logs");
+            remoteLogUtilsMockedStatic.when(LogUtils::getLocalLogBaseDir).thenReturn("logs");
 
             String objectName = RemoteLogUtils.getObjectNameFromLogPath(logPath);
-
             Assertions.assertEquals(expectedObjectName, objectName);
         }
     }

+ 2 - 0
dolphinscheduler-master/src/main/resources/logback-spring.xml

@@ -18,6 +18,8 @@
 
 <configuration scan="true" scanPeriod="120 seconds">
     <property name="log.base" value="logs"/>
+    <property scope="context" name="log.base.ctx" value="${log.base}" />
+
     <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
         <encoder>
             <pattern>

+ 1 - 0
dolphinscheduler-standalone-server/src/main/resources/logback-spring.xml

@@ -18,6 +18,7 @@
 
 <configuration scan="true" scanPeriod="120 seconds">
     <property name="log.base" value="logs"/>
+    <property scope="context" name="log.base.ctx" value="${log.base}" />
 
     <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
         <encoder>

+ 1 - 0
dolphinscheduler-worker/src/main/resources/logback-spring.xml

@@ -18,6 +18,7 @@
 
 <configuration scan="true" scanPeriod="120 seconds">
     <property name="log.base" value="logs"/>
+    <property scope="context" name="log.base.ctx" value="${log.base}" />
 
     <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
         <encoder>