task_datax_example.py 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. # Licensed to the Apache Software Foundation (ASF) under one
  2. # or more contributor license agreements. See the NOTICE file
  3. # distributed with this work for additional information
  4. # regarding copyright ownership. The ASF licenses this file
  5. # to you under the Apache License, Version 2.0 (the
  6. # "License"); you may not use this file except in compliance
  7. # with the License. You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing,
  12. # software distributed under the License is distributed on an
  13. # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  14. # KIND, either express or implied. See the License for the
  15. # specific language governing permissions and limitations
  16. # under the License.
  17. """
  18. A example workflow for task datax.
  19. This example will create a workflow named `task_datax`.
  20. `task_datax` is true workflow define and run task task_datax.
  21. You can create data sources `first_mysql` and `first_mysql` through UI.
  22. It creates a task to synchronize datax from the source database to the target database.
  23. """
  24. from pydolphinscheduler.core.process_definition import ProcessDefinition
  25. from pydolphinscheduler.tasks.datax import CustomDataX, DataX
  26. # datax json template
  27. JSON_TEMPLATE = ""
  28. with ProcessDefinition(
  29. name="task_datax_example",
  30. tenant="tenant_exists",
  31. ) as pd:
  32. # This task synchronizes the data in `t_ds_project`
  33. # of `first_mysql` database to `target_project` of `second_mysql` database.
  34. task1 = DataX(
  35. name="task_datax",
  36. datasource_name="first_mysql",
  37. datatarget_name="second_mysql",
  38. sql="select id, name, code, description from source_table",
  39. target_table="target_table",
  40. )
  41. # you can custom json_template of datax to sync data.
  42. task2 = CustomDataX(name="task_custom_datax", json=JSON_TEMPLATE)
  43. pd.run()