|
@@ -1,5 +1,4 @@
|
|
|
import json
|
|
|
-
|
|
|
from app.core.airflow.uri import spark_result_tb_name
|
|
|
from app.schemas import AirflowTask
|
|
|
from jinja2 import Environment, PackageLoader, select_autoescape
|
|
@@ -12,7 +11,16 @@ class TaskCompiler:
|
|
|
self.default_image = None
|
|
|
self.default_cmd = None
|
|
|
|
|
|
- def translate(self):
|
|
|
+ @staticmethod
|
|
|
+ def render_spark_script(parameters, template_file):
|
|
|
+ env = Environment(
|
|
|
+ loader=PackageLoader('app.core.airflow'),
|
|
|
+ autoescape=select_autoescape()
|
|
|
+ )
|
|
|
+ template = env.get_template(template_file)
|
|
|
+ return template.render(parameters)
|
|
|
+
|
|
|
+ def translate(self, task_mode=1):
|
|
|
return {'image': self.task.run_image or self.default_image,
|
|
|
'cmds': ["/bin/bash", "-c", f"{self.task.cmd or self.default_cmd} "],
|
|
|
'script': self.task.script,
|
|
@@ -31,6 +39,16 @@ class TaskCompiler:
|
|
|
return minio_handler.put_byte_file(file_name=oss_path, file_content=context)
|
|
|
|
|
|
|
|
|
+class JavaTaskCompiler(TaskCompiler):
|
|
|
+ def __init__(self, item: AirflowTask):
|
|
|
+ super(JavaTaskCompiler, self).__init__(item)
|
|
|
+ self.default_image = 'SXKJ:32775/java:1.0'
|
|
|
+ self.default_cmd = "echo \"$SCRIPT\" > run.py && python run.py"
|
|
|
+ self.task.cmd = self.task.cmd or self.default_cmd
|
|
|
+ tar_name = self.task.file_urls[0].split('/')[-1].split('_')[-1]
|
|
|
+ self.task.cmd = f'curl {"http://minio.default:9000"}/{self.task.file_urls[0]} --output {tar_name} && {self.task.cmd}'
|
|
|
+
|
|
|
+
|
|
|
class PythonTaskCompiler(TaskCompiler):
|
|
|
def __init__(self, item: AirflowTask):
|
|
|
super(PythonTaskCompiler, self).__init__(item)
|
|
@@ -42,9 +60,37 @@ class DataXTaskCompiler(TaskCompiler):
|
|
|
def __init__(self, item: AirflowTask):
|
|
|
super(DataXTaskCompiler, self).__init__(item)
|
|
|
self.default_image = 'SXKJ:32775/pod_datax:0.9'
|
|
|
- self.default_cmd = f"cd datax/bin && echo $SCRIPT > config.json && echo " \
|
|
|
- f"\"\'\"$HOME/conda/envs/py27/bin/python datax.py {self.task.cmd_parameters} config.json" \
|
|
|
- f"\"\'\" |xargs bash -c "
|
|
|
+ # self.default_cmd = f"cd datax/bin && echo $SCRIPT > config.json && echo " \
|
|
|
+ # f"\"\'\"$HOME/conda/envs/py27/bin/python datax.py {self.task.cmd_parameters} config.json" \
|
|
|
+ # f"\"\'\" |xargs bash -c "
|
|
|
+ self.default_cmd = f"cd datax/bin && echo \"$SCRIPT\" > transform_datax.py &&cat transform_datax.py && python3 transform_datax.py && cat config.json && $HOME/conda/envs/py27/bin/python datax.py {self.task.cmd_parameters} config.json"
|
|
|
+
|
|
|
+ def translate(self, task_mode=1):
|
|
|
+ print(f'{self.task.envs}')
|
|
|
+ script_str = self.render_spark_script(
|
|
|
+ parameters={'script': self.task.script,
|
|
|
+ 'first_begin_time': self.task.envs.get('first_begin_time', None),
|
|
|
+ 'last_key': self.task.envs.get('last_key', None),
|
|
|
+ 'current_key': self.task.envs.get('current_key', None),
|
|
|
+ 'partition_key': self.task.envs.get('partition_key', None),
|
|
|
+ 'partition_word': self.task.envs.get('partition_word', None),
|
|
|
+ 'partition_format': self.task.envs.get('partition_format', None),
|
|
|
+ 'partition_diff': self.task.envs.get('partition_diff', None),
|
|
|
+ },
|
|
|
+ template_file="transform_datax.py.jinja2")
|
|
|
+ # with open('./auto_generate_demo.py','w') as f:
|
|
|
+ # f.write(script_str)
|
|
|
+ res = {'image': self.task.run_image or self.default_image,
|
|
|
+ 'cmds': ["/bin/bash", "-c", f"{self.task.cmd or self.default_cmd} "],
|
|
|
+ 'script': script_str,
|
|
|
+ 'id': f'{self.task.id}',
|
|
|
+ 'env': {**{"SCRIPT": script_str}, **self.task.envs},
|
|
|
+ 'operator_name': f'op_{self.task.id}',
|
|
|
+ 'name': self.task.name,
|
|
|
+ 'desc': ""
|
|
|
+ }
|
|
|
+
|
|
|
+ return res
|
|
|
|
|
|
|
|
|
class SparksTaskCompiler(TaskCompiler):
|
|
@@ -74,7 +120,7 @@ class SparksTaskCompiler(TaskCompiler):
|
|
|
basic_cmds = "cd /home/sxkj/bigdata && echo \"$SCRIPT\" > run.py && ${SPARK_HOME}/bin/spark-submit"
|
|
|
self.cmd_str = lambda name: f"{basic_cmds} --name {name} {param_str} run.py"
|
|
|
|
|
|
- def translate(self):
|
|
|
+ def translate(self, task_mode=1):
|
|
|
# dag_script = {
|
|
|
# "sub_nodes": [
|
|
|
# {
|
|
@@ -111,11 +157,11 @@ class SparksTaskCompiler(TaskCompiler):
|
|
|
inputs = {}
|
|
|
template_file = 'sql_script_template.py.jinja2'
|
|
|
elif info['op'] == 'pyspark':
|
|
|
- inputs = {k: spark_result_tb_name(self.task.id, *v) for k, v in info['inputs'].items()}
|
|
|
+ inputs = {k: spark_result_tb_name(self.task.id, *v, task_mode) for k, v in info['inputs'].items()}
|
|
|
template_file = 'pyspark_script_template.py.jinja2'
|
|
|
else:
|
|
|
continue
|
|
|
- outputs = [spark_result_tb_name(self.task.id, info['id'], 0)]
|
|
|
+ outputs = [spark_result_tb_name(self.task.id, info['id'], 0, task_mode)]
|
|
|
sub_node = {
|
|
|
'id': f'{self.task.id}_{info["id"]}',
|
|
|
'name': info['name'],
|
|
@@ -137,12 +183,3 @@ class SparksTaskCompiler(TaskCompiler):
|
|
|
'name': self.task.name,
|
|
|
'desc': "first spark dag task"
|
|
|
}
|
|
|
-
|
|
|
- @staticmethod
|
|
|
- def render_spark_script(parameters, template_file):
|
|
|
- env = Environment(
|
|
|
- loader=PackageLoader('app.core.airflow'),
|
|
|
- autoescape=select_autoescape()
|
|
|
- )
|
|
|
- template = env.get_template(template_file)
|
|
|
- return template.render(parameters)
|