job.py 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. import os
  2. import stat
  3. import time
  4. from app.core.airflow.task import *
  5. from app.core.airflow.af_util import get_job_path
  6. from app.schemas import AirflowJob
  7. class AirflowJobSubmitter:
  8. @staticmethod
  9. def submit_dag(item: AirflowJob):
  10. m_compilers = {'python': PythonTaskCompiler,
  11. 'datax': DataXTaskCompiler,
  12. 'sparks': SparksTaskCompiler,
  13. 'java': JavaTaskCompiler}
  14. nodes = [m_compilers[task.task_type](item=task).translate(job_id=item.id, task_mode=item.job_mode or 1) for task
  15. in item.tasks if
  16. task.task_type != 'sparks']
  17. spark_nodes = [SparksTaskCompiler(item=task).translate(job_id=item.id, task_mode=item.job_mode or 1) for task in
  18. item.tasks if
  19. task.task_type == 'sparks']
  20. edges = []
  21. for edge in item.dependence:
  22. edges.append({"source_operator_name": f'op_{edge[0]}',
  23. "target_operator_name": f'op_{edge[1]}'})
  24. #
  25. # m_interval = {
  26. # "None": "None",
  27. # "@once": "@once",
  28. # "0 * * * *": "@hourly",
  29. # "0 0 * * *": "@daily",
  30. # "0 0 * * 0": "@weekly",
  31. # "0 0 1 * *": "@monthly",
  32. # "0 0 1 1 *": "@yearly"
  33. # }
  34. # print(f" image pull key is : {config.get('K8S', 'image_pull_key')}")
  35. parameters = {'nodes': nodes, 'spark_nodes': spark_nodes, 'edges': edges, 'dag_id': f'dag_{item.id}',
  36. 'user_name': item.user_id, 'job_id': item.id, 'trigger_status': bool(item.trigger_status),
  37. 'interval': item.cron if item.cron != 'None' else None,
  38. 'af_backend_uri': config.get('AF_BACKEND', 'uri'),
  39. 'image_pull_key': config.get('K8S', 'image_pull_key', fallback=None),
  40. 'enable_notify': True
  41. }
  42. # env = Environment(
  43. # loader=PackageLoader('app.core.airflow'),
  44. # autoescape=select_autoescape()
  45. # )
  46. # template = env.get_template("dag_template.py.jinja2")
  47. # dag_content = template.render(parameters)
  48. # print(f'finish build:{dag_content}')
  49. #
  50. # output_path = get_job_path(job_id=item.id)
  51. # with open(output_path, "w") as fh:
  52. # fh.write(dag_content)
  53. #
  54. # os.chmod(output_path, stat.S_IRWXO | stat.S_IRWXG | stat.S_IRWXU)
  55. # print(f'write dag to {output_path}')
  56. AirflowJobSubmitter.generate_dag_on_airflow(parameters=parameters, save_path=get_job_path(job_id=item.id))
  57. @staticmethod
  58. def generate_dag_on_airflow(parameters, save_path):
  59. env = Environment(
  60. loader=PackageLoader('app.core.airflow'),
  61. autoescape=select_autoescape()
  62. )
  63. template = env.get_template("dag_template.py.jinja2")
  64. dag_content = template.render(parameters)
  65. print(f'finish build:{dag_content}')
  66. with open(save_path, "w") as fh:
  67. fh.write(dag_content)
  68. os.chmod(save_path, stat.S_IRWXO | stat.S_IRWXG | stat.S_IRWXU)
  69. print(f'write dag to {save_path}')
  70. @staticmethod
  71. def auto_submit_data_transfer():
  72. job_id = 0
  73. user_id = 0
  74. spark_task_demo = SparksTaskCompiler(item=None)
  75. spark_nodes = [
  76. {
  77. "sub_nodes": [{
  78. "name": 'read_and_save',
  79. "id": 0,
  80. "image": spark_task_demo.default_image,
  81. "cmds": ['/bin/bash', '-c', spark_task_demo.cmd_str(name='spark_data_transfer')],
  82. "env": {"SCRIPT": spark_task_demo.render_spark_script(
  83. parameters={"hive_metastore_uris": config.get('HIVE_METASTORE', 'uris')},
  84. template_file="data_transfer_dag_template.py.jinja2")
  85. },
  86. }],
  87. "edges": [],
  88. "name": 'data_save',
  89. "desc": 'task for data saving',
  90. "id": 0,
  91. }
  92. ]
  93. print(spark_nodes[0]['sub_nodes'][0]['env']['SCRIPT'])
  94. parameters = {'nodes': [], 'spark_nodes': spark_nodes, 'edges': [], 'dag_id': f'dag_{job_id}',
  95. 'user_name': user_id, 'job_id': job_id, 'trigger_status': False,
  96. 'interval': None,
  97. 'af_backend_uri': config.get('AF_BACKEND', 'uri'),
  98. 'image_pull_key': config.get('K8S', 'image_pull_key', fallback=None),
  99. 'enable_notify':False
  100. }
  101. AirflowJobSubmitter.generate_dag_on_airflow(parameters=parameters, save_path=get_job_path(job_id=job_id))
  102. print('create data transfer job success!')
  103. # @staticmethod
  104. # def auto_submit_data_transfer2():
  105. # # name: str
  106. # # file_urls: Optional[List[str]] = []
  107. # # script: str
  108. # # cmd: Optional[str] = ""
  109. # # cmd_parameters: str
  110. # # envs: Optional[Dict[str, str]] = {}
  111. # # run_image: str
  112. # # task_type: str
  113. #
  114. # df_task = AirflowTask(name='data_save', task_type='sparks', file_urls=[], script='', cmd='', env={})
  115. # # id: int
  116. # # job_type: int
  117. # # create_time: int
  118. # # update_time: int
  119. # # user_id: int
  120. # # job_mode: int
  121. # # tasks: List[AirflowTask]
  122. # # name: str
  123. # # dependence: List = []
  124. # # cron: str
  125. # # desc: str
  126. # # route_strategy: str
  127. # # block_strategy: str
  128. # # executor_timeout: int
  129. # # executor_fail_retry_count: int
  130. # # trigger_status: int
  131. #
  132. # job_item = AirflowJob(id=0, job_type=1, tasks=[df_task], create_time=int(time.time()), user_id=0, job_mode=1,
  133. # name='data_transfer', dependence=[], cron="None", trigger_status=0)