task_datax_example.py 3.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. # Licensed to the Apache Software Foundation (ASF) under one
  2. # or more contributor license agreements. See the NOTICE file
  3. # distributed with this work for additional information
  4. # regarding copyright ownership. The ASF licenses this file
  5. # to you under the Apache License, Version 2.0 (the
  6. # "License"); you may not use this file except in compliance
  7. # with the License. You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing,
  12. # software distributed under the License is distributed on an
  13. # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  14. # KIND, either express or implied. See the License for the
  15. # specific language governing permissions and limitations
  16. # under the License.
  17. """
  18. A example workflow for task datax.
  19. This example will create a workflow named `task_datax`.
  20. `task_datax` is true workflow define and run task task_datax.
  21. You can create data sources `first_mysql` and `first_mysql` through UI.
  22. It creates a task to synchronize datax from the source database to the target database.
  23. """
  24. from pydolphinscheduler.core.process_definition import ProcessDefinition
  25. from pydolphinscheduler.tasks.datax import CustomDataX, DataX
  26. # datax json template
  27. JSON_TEMPLATE = {
  28. "job": {
  29. "content": [
  30. {
  31. "reader": {
  32. "name": "mysqlreader",
  33. "parameter": {
  34. "username": "usr",
  35. "password": "pwd",
  36. "column": ["id", "name", "code", "description"],
  37. "splitPk": "id",
  38. "connection": [
  39. {
  40. "table": ["source_table"],
  41. "jdbcUrl": ["jdbc:mysql://127.0.0.1:3306/source_db"],
  42. }
  43. ],
  44. },
  45. },
  46. "writer": {
  47. "name": "mysqlwriter",
  48. "parameter": {
  49. "writeMode": "insert",
  50. "username": "usr",
  51. "password": "pwd",
  52. "column": ["id", "name"],
  53. "connection": [
  54. {
  55. "jdbcUrl": "jdbc:mysql://127.0.0.1:3306/target_db",
  56. "table": ["target_table"],
  57. }
  58. ],
  59. },
  60. },
  61. }
  62. ],
  63. "setting": {
  64. "errorLimit": {"percentage": 0, "record": 0},
  65. "speed": {"channel": 1, "record": 1000},
  66. },
  67. }
  68. }
  69. with ProcessDefinition(
  70. name="task_datax_example",
  71. tenant="tenant_exists",
  72. ) as pd:
  73. # This task synchronizes the data in `t_ds_project`
  74. # of `first_mysql` database to `target_project` of `second_mysql` database.
  75. # You have to make sure data source named `first_mysql` and `second_mysql` exists
  76. # in your environment.
  77. task1 = DataX(
  78. name="task_datax",
  79. datasource_name="first_mysql",
  80. datatarget_name="second_mysql",
  81. sql="select id, name, code, description from source_table",
  82. target_table="target_table",
  83. )
  84. # You can custom json_template of datax to sync data. This task create a new
  85. # datax job same as task1, transfer record from `first_mysql` to `second_mysql`
  86. task2 = CustomDataX(name="task_custom_datax", json=str(JSON_TEMPLATE))
  87. pd.run()