Browse Source

Merge branch 'master' of http://gogsb.soaringnova.com/sxwl_DL/datax-admin into debug

liweiquan 2 years ago
parent
commit
5021d57553
5 changed files with 144 additions and 12 deletions
  1. 32 9
      app/core/datax/hdfs.py
  2. 8 0
      configs/settings.py
  3. 17 0
      development.ini
  4. 72 0
      idctest.ini
  5. 15 3
      production.ini

+ 32 - 9
app/core/datax/hdfs.py

@@ -4,7 +4,8 @@ from app.models import JobJdbcDatasource
 from app.schemas.datax_json import DataXJsonParam
 from app.utils import *
 
-
+from configs.settings import config
+import json
 
 {
     "writer": {
@@ -61,6 +62,12 @@ from app.utils import *
     }
 }
 
+
+def _build_hadoop_configs():
+    hadoop = config.get('HADOOP_INNER', 'hadoop_config')
+    kerberos = config.get('HADOOP_INNER', 'kerberos_config')
+    return json.loads(hadoop), json.loads(kerberos)
+
 class HdfsReader(WriterBase):
     def __init__(self, ds: JobJdbcDatasource):
         WriterBase.__init__(self, ds)
@@ -82,17 +89,25 @@ class HdfsReader(WriterBase):
         return res
 
     def _convert_type(self, type):
-        if type.lower() == 'int':
+        if 'int' in type.lower().strip():
             return 'long'
-        elif type.lower() == 'varchar':
+        elif 'varchar' in type.lower().strip():
             return 'string'
+        return 'string'
 
     def build_parameter(self, param: DataXJsonParam, is_show=True):
         parameter = dict()
-        parameter['path'] = param.hive_reader.reader_path
-        parameter['defaultFS'] = param.hive_reader.reader_default_fs
+        # 需要kerberos验证的hive
+        if str(self.ds.id) in config.get('HADOOP_INNER', 'datasource').split(','):
+            parameter['defaultFS'] = config.get('HADOOP_INNER', 'default_fs')
+            hadoop, kerberos = _build_hadoop_configs()
+            parameter['hadoopConfig'] = hadoop
+            parameter.update(kerberos)
+        else:
+            parameter['defaultFS'] = param.hive_reader.reader_default_fs
+        parameter['path'] = param.hive_reader.reader_path.strip()
         parameter['fileType'] = param.hive_reader.reader_file_type
-        parameter['fieldDelimiter'] = param.hive_reader.reader_field_delimiter
+        parameter['fieldDelimiter'] = param.hive_reader.reader_field_delimiter.strip()
         parameter['column'] = self._build_column(param.reader_columns)
         return parameter
 
@@ -128,14 +143,22 @@ class HdfsWriter(WriterBase):
         return res
 
 
+
     def build_parameter(self, param: DataXJsonParam, is_show=True):
         parameter = dict()
-        parameter['defaultFS'] = param.hive_writer.writer_default_fs
+        # 需要kerberos验证的hive
+        if str(self.ds.id) in config.get('HADOOP_INNER', 'datasource').split(','):
+            parameter['defaultFS'] = config.get('HADOOP_INNER', 'default_fs')
+            hadoop, kerberos = _build_hadoop_configs()
+            parameter['hadoopConfig'] = hadoop
+            parameter.update(kerberos)
+        else:
+            parameter['defaultFS'] = param.hive_writer.writer_default_fs
         parameter['fileType'] = param.hive_writer.writer_file_type
-        parameter['path'] = param.hive_writer.writer_path
+        parameter['path'] = param.hive_writer.writer_path.strip()
         parameter['fileName'] = param.hive_writer.writer_filename
         parameter['writeMode'] = param.hive_writer.writer_mode
-        parameter['fieldDelimiter'] = param.hive_writer.writer_field_delimiter
+        parameter['fieldDelimiter'] = param.hive_writer.writer_field_delimiter.strip()
         parameter['column'] = self._build_column(param.writer_columns)
         return parameter
 

+ 8 - 0
configs/settings.py

@@ -1,5 +1,6 @@
 
 import configparser
+import json
 import os
 
 class DefaultOption(dict):
@@ -22,6 +23,13 @@ if os.environ.get('APP_ENV', 'development') == 'development':
     config.readfp(open('development.ini'))
 elif os.environ.get('APP_ENV') == 'production':
     config.readfp(open('production.ini'))
+elif os.environ.get('APP_ENV') == 'idctest':
+    config.readfp(open('idctest.ini'))
 
 print(f"get config of {os.environ.get('APP_ENV')}")
 print(config.get('DATABASE', 'host'))
+hadoop_config = config.get('HADOOP_INNER', 'hadoop_config')
+print(json.loads(hadoop_config))
+
+
+print((config.get('HADOOP_INNER', 'datasource')).split(','))

+ 17 - 0
development.ini

@@ -48,3 +48,20 @@ datax=SXKJ:32775/pod_datax:0.9
 python=SXKJ:32775/pod_python:1.1
 java=SXKJ:32775/java:1.0
 sparks=SXKJ:32775/jupyter:0.981
+
+
+[HADOOP_INNER]
+datasource = -1
+default_fs = hdfs://HDFS8000912
+hadoop_config={
+            "dfs.nameservices": "HDFS8000912",
+            "dfs.ha.namenodes.HDFS8000912": "nn1,nn2",
+            "dfs.namenode.rpc-address.HDFS8000912.nn1": "10.254.20.18:4007",
+            "dfs.namenode.rpc-address.HDFS8000912.nn2": "10.254.20.22:4007",
+            "dfs.client.failover.proxy.provider.HDFS8000912": "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"
+             }
+kerberos_config = {
+                    "haveKerberos": "true",
+                    "kerberosKeytabFilePath": "/workspace/confs/test/user.keytab",
+                    "kerberosPrincipal": "ailab@EMR-5XJSY31F"
+                  }

+ 72 - 0
idctest.ini

@@ -0,0 +1,72 @@
+[DATABASE]
+user = test_dev
+pwd = Yldve35@!
+db_name = aihubtest_dag_admin_db
+host = 10.138.143.44
+port = 3306
+ssl_disabled = true
+
+[MINIO]
+k8s_url = aihub-minio-yili-test:9000
+url = aihub-minio-yili-test:9000
+access_key = minioadmin
+secret_key = minioadmin
+
+
+[AF_BACKEND]
+uri=aihub-backend-af-yili-test:8080
+host=aihub-backend-af-yili-test
+port=8080
+dag_files_dir=/dags/
+
+[K8S]
+image_pull_key=codingregistrykey
+enable_kerberos=true
+
+[BACKEND]
+url=aihub-backend-yili-test:8080
+
+[AIRFLOW]
+uri=airflow-webserver:8080
+api_token=YWRtaW46YWRtaW4=
+
+
+[HIVE]
+host = 10.254.20.22
+port = 7001
+username = hive
+password = hive
+database_name = ailab
+kerberos = 1
+keytab = assets/test/user.keytab
+krb5config = assets/test/krb5.conf
+kerberos_service_name = hadoop
+principal = ailab@EMR-5XJSY31F
+
+
+[HIVE_METASTORE]
+uris=thrift://10.254.20.18:7004,thrift://10.254.20.22:7004
+
+[TASK_IMAGES]
+datax=yldc-docker.pkg.coding.yili.com/aiplatform/docker/aihub-datax-yili:latest
+python=yldc-docker.pkg.coding.yili.com/aiplatform/docker/aihub-minio-yili-test:python
+java=yldc-docker.pkg.coding.yili.com/aiplatform/docker/aihub-minio-yili-test:java
+sparks=yldc-docker.pkg.coding.yili.com/aiplatform/docker/aihub-minio-yili-test:spark
+
+
+
+[HADOOP_INNER]
+datasource = 8,10,11
+default_fs = hdfs://HDFS8000912
+hadoop_config={
+            "dfs.nameservices": "HDFS8000912",
+            "dfs.ha.namenodes.HDFS8000912": "nn1,nn2",
+            "dfs.namenode.rpc-address.HDFS8000912.nn1": "10.254.20.18:4007",
+            "dfs.namenode.rpc-address.HDFS8000912.nn2": "10.254.20.22:4007",
+            "dfs.client.failover.proxy.provider.HDFS8000912": "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"
+             }
+kerberos_config = {
+                    "haveKerberos": "true",
+                    "kerberosKeytabFilePath": "/workspace/confs/test/user.keytab",
+                    "kerberosPrincipal": "ailab@EMR-5XJSY31F"
+                  }

+ 15 - 3
production.ini

@@ -46,7 +46,6 @@ principal = ailab@EMR-5XJSY31F
 
 [HIVE_METASTORE]
 uris=thrift://10.254.20.18:7004,thrift://10.254.20.22:7004
-;uris=thrift://10.254.20.22:7004
 
 [TASK_IMAGES]
 datax=yldc-docker.pkg.coding.yili.com/aiplatform/docker/aihub-datax-yili:latest
@@ -56,5 +55,18 @@ sparks=yldc-docker.pkg.coding.yili.com/aiplatform/docker/aihub-minio-yili-test:s
 
 
 
-
-
+[HADOOP_INNER]
+datasource = 8,10,11
+default_fs = hdfs://HDFS8000912
+hadoop_config={
+            "dfs.nameservices": "HDFS8000912",
+            "dfs.ha.namenodes.HDFS8000912": "nn1,nn2",
+            "dfs.namenode.rpc-address.HDFS8000912.nn1": "10.254.20.18:4007",
+            "dfs.namenode.rpc-address.HDFS8000912.nn2": "10.254.20.22:4007",
+            "dfs.client.failover.proxy.provider.HDFS8000912": "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"
+             }
+kerberos_config = {
+                    "haveKerberos": "true",
+                    "kerberosKeytabFilePath": "/workspace/confs/test/user.keytab",
+                    "kerberosPrincipal": "ailab@EMR-5XJSY31F"
+                  }