Jelajahi Sumber

update datax kerberos in productoin

Zhang Li 2 tahun lalu
induk
melakukan
d302eea487
3 mengubah file dengan 18 tambahan dan 11 penghapusan
  1. 7 6
      app/core/datax/hdfs.py
  2. 1 1
      development.ini
  3. 10 4
      production.ini

+ 7 - 6
app/core/datax/hdfs.py

@@ -89,10 +89,11 @@ class HdfsReader(WriterBase):
         return res
 
     def _convert_type(self, type):
-        if type.lower() == 'int':
+        if 'int' in type.lower().strip():
             return 'long'
-        elif type.lower() == 'varchar':
+        elif 'varchar' in type.lower().strip():
             return 'string'
+        return 'string'
 
     def build_parameter(self, param: DataXJsonParam, is_show=True):
         parameter = dict()
@@ -104,9 +105,9 @@ class HdfsReader(WriterBase):
             parameter.update(kerberos)
         else:
             parameter['defaultFS'] = param.hive_reader.reader_default_fs
-        parameter['path'] = param.hive_reader.reader_path
+        parameter['path'] = param.hive_reader.reader_path.strip()
         parameter['fileType'] = param.hive_reader.reader_file_type
-        parameter['fieldDelimiter'] = param.hive_reader.reader_field_delimiter
+        parameter['fieldDelimiter'] = param.hive_reader.reader_field_delimiter.strip()
         parameter['column'] = self._build_column(param.reader_columns)
         return parameter
 
@@ -154,10 +155,10 @@ class HdfsWriter(WriterBase):
         else:
             parameter['defaultFS'] = param.hive_writer.writer_default_fs
         parameter['fileType'] = param.hive_writer.writer_file_type
-        parameter['path'] = param.hive_writer.writer_path
+        parameter['path'] = param.hive_writer.writer_path.strip()
         parameter['fileName'] = param.hive_writer.writer_filename
         parameter['writeMode'] = param.hive_writer.writer_mode
-        parameter['fieldDelimiter'] = param.hive_writer.writer_field_delimiter
+        parameter['fieldDelimiter'] = param.hive_writer.writer_field_delimiter.strip()
         parameter['column'] = self._build_column(param.writer_columns)
         return parameter
 

+ 1 - 1
development.ini

@@ -51,7 +51,7 @@ sparks=SXKJ:32775/jupyter:0.981
 
 
 [HADOOP_INNER]
-datasource = 2
+datasource = -1
 default_fs = hdfs://HDFS8000912
 hadoop_config={
             "dfs.nameservices": "HDFS8000912",

+ 10 - 4
production.ini

@@ -54,13 +54,19 @@ java=yldc-docker.pkg.coding.yili.com/aiplatform/docker/aihub-minio-yili-test:jav
 sparks=yldc-docker.pkg.coding.yili.com/aiplatform/docker/aihub-minio-yili-test:spark
 
 
-[HADOOP]
-hadoopConfig={
+
+[HADOOP_INNER]
+datasource = 8,10,11
+default_fs = hdfs://HDFS8000912
+hadoop_config={
             "dfs.nameservices": "HDFS8000912",
             "dfs.ha.namenodes.HDFS8000912": "nn1,nn2",
             "dfs.namenode.rpc-address.HDFS8000912.nn1": "10.254.20.18:4007",
             "dfs.namenode.rpc-address.HDFS8000912.nn2": "10.254.20.22:4007",
             "dfs.client.failover.proxy.provider.HDFS8000912": "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"
              }
-
-
+kerberos_config = {
+                    "haveKerberos": "true",
+                    "kerberosKeytabFilePath": "/workspace/confs/test/user.keytab",
+                    "kerberosPrincipal": "ailab@EMR-5XJSY31F"
+                  }