Browse Source

update datax kerberos

Zhang Li 2 years ago
parent
commit
519ca2e2c8
4 changed files with 56 additions and 5 deletions
  1. 25 3
      app/core/datax/hdfs.py
  2. 6 0
      configs/settings.py
  3. 17 0
      development.ini
  4. 8 2
      production.ini

+ 25 - 3
app/core/datax/hdfs.py

@@ -4,7 +4,8 @@ from app.models import JobJdbcDatasource
 from app.schemas.datax_json import DataXJsonParam
 from app.utils import *
 
-
+from configs.settings import config
+import json
 
 {
     "writer": {
@@ -61,6 +62,12 @@ from app.utils import *
     }
 }
 
+
+def _build_hadoop_configs():
+    hadoop = config.get('HADOOP_INNER', 'hadoop_config')
+    kerberos = config.get('HADOOP_INNER', 'kerberos_config')
+    return json.loads(hadoop), json.loads(kerberos)
+
 class HdfsReader(WriterBase):
     def __init__(self, ds: JobJdbcDatasource):
         WriterBase.__init__(self, ds)
@@ -89,8 +96,15 @@ class HdfsReader(WriterBase):
 
     def build_parameter(self, param: DataXJsonParam, is_show=True):
         parameter = dict()
+        # 需要kerberos验证的hive
+        if str(self.ds.id) in config.get('HADOOP_INNER', 'datasource').split(','):
+            parameter['defaultFS'] = config.get('HADOOP_INNER', 'default_fs')
+            hadoop, kerberos = _build_hadoop_configs()
+            parameter['hadoopConfig'] = hadoop
+            parameter.update(kerberos)
+        else:
+            parameter['defaultFS'] = param.hive_reader.reader_default_fs
         parameter['path'] = param.hive_reader.reader_path
-        parameter['defaultFS'] = param.hive_reader.reader_default_fs
         parameter['fileType'] = param.hive_reader.reader_file_type
         parameter['fieldDelimiter'] = param.hive_reader.reader_field_delimiter
         parameter['column'] = self._build_column(param.reader_columns)
@@ -128,9 +142,17 @@ class HdfsWriter(WriterBase):
         return res
 
 
+
     def build_parameter(self, param: DataXJsonParam, is_show=True):
         parameter = dict()
-        parameter['defaultFS'] = param.hive_writer.writer_default_fs
+        # 需要kerberos验证的hive
+        if str(self.ds.id) in config.get('HADOOP_INNER', 'datasource').split(','):
+            parameter['defaultFS'] = config.get('HADOOP_INNER', 'default_fs')
+            hadoop, kerberos = _build_hadoop_configs()
+            parameter['hadoopConfig'] = hadoop
+            parameter.update(kerberos)
+        else:
+            parameter['defaultFS'] = param.hive_writer.writer_default_fs
         parameter['fileType'] = param.hive_writer.writer_file_type
         parameter['path'] = param.hive_writer.writer_path
         parameter['fileName'] = param.hive_writer.writer_filename

+ 6 - 0
configs/settings.py

@@ -1,5 +1,6 @@
 
 import configparser
+import json
 import os
 
 class DefaultOption(dict):
@@ -25,3 +26,8 @@ elif os.environ.get('APP_ENV') == 'production':
 
 print(f"get config of {os.environ.get('APP_ENV')}")
 print(config.get('DATABASE', 'host'))
+hadoop_config = config.get('HADOOP_INNER', 'hadoop_config')
+print(json.loads(hadoop_config))
+
+
+print((config.get('HADOOP_INNER', 'datasource')).split(','))

+ 17 - 0
development.ini

@@ -48,3 +48,20 @@ datax=SXKJ:32775/pod_datax:0.9
 python=SXKJ:32775/pod_python:1.1
 java=SXKJ:32775/java:1.0
 sparks=SXKJ:32775/jupyter:0.981
+
+
+[HADOOP_INNER]
+datasource = 2
+default_fs = hdfs://HDFS8000912
+hadoop_config={
+            "dfs.nameservices": "HDFS8000912",
+            "dfs.ha.namenodes.HDFS8000912": "nn1,nn2",
+            "dfs.namenode.rpc-address.HDFS8000912.nn1": "10.254.20.18:4007",
+            "dfs.namenode.rpc-address.HDFS8000912.nn2": "10.254.20.22:4007",
+            "dfs.client.failover.proxy.provider.HDFS8000912": "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"
+             }
+kerberos_config = {
+                    "haveKerberos": "true",
+                    "kerberosKeytabFilePath": "/workspace/confs/test/user.keytab",
+                    "kerberosPrincipal": "ailab@EMR-5XJSY31F"
+                  }

+ 8 - 2
production.ini

@@ -46,7 +46,6 @@ principal = ailab@EMR-5XJSY31F
 
 [HIVE_METASTORE]
 uris=thrift://10.254.20.18:7004,thrift://10.254.20.22:7004
-;uris=thrift://10.254.20.22:7004
 
 [TASK_IMAGES]
 datax=yldc-docker.pkg.coding.yili.com/aiplatform/docker/aihub-datax-yili:latest
@@ -55,6 +54,13 @@ java=yldc-docker.pkg.coding.yili.com/aiplatform/docker/aihub-minio-yili-test:jav
 sparks=yldc-docker.pkg.coding.yili.com/aiplatform/docker/aihub-minio-yili-test:spark
 
 
-
+[HADOOP]
+hadoopConfig={
+            "dfs.nameservices": "HDFS8000912",
+            "dfs.ha.namenodes.HDFS8000912": "nn1,nn2",
+            "dfs.namenode.rpc-address.HDFS8000912.nn1": "10.254.20.18:4007",
+            "dfs.namenode.rpc-address.HDFS8000912.nn2": "10.254.20.22:4007",
+            "dfs.client.failover.proxy.provider.HDFS8000912": "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"
+             }