浏览代码

add prod confs

Zhang Li 2 年之前
父节点
当前提交
456473463c

+ 0 - 33
conf/livy-env.sh.template

@@ -1,33 +0,0 @@
-#!/usr/bin/env bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# LIVY ENVIRONMENT VARIABLES
-#
-# - JAVA_HOME       Java runtime to use. By default use "java" from PATH.
-# - HADOOP_CONF_DIR Directory containing the Hadoop / YARN configuration to use.
-# - SPARK_HOME      Spark which you would like to use in Livy.
-# - SPARK_CONF_DIR  Optional directory where the Spark configuration lives.
-#                   (Default: $SPARK_HOME/conf)
-# - LIVY_LOG_DIR    Where log files are stored. (Default: ${LIVY_HOME}/logs)
-# - LIVY_PID_DIR    Where the pid file is stored. (Default: /tmp)
-# - LIVY_SERVER_JAVA_OPTS  Java Opts for running livy server (You can set jvm related setting here,
-#                          like jvm memory/gc algorithm and etc.)
-# - LIVY_IDENT_STRING A name that identifies the Livy server instance, used to generate log file
-#                     names. (Default: name of the user starting Livy).
-# - LIVY_MAX_LOG_FILES Max number of log file to keep in the log directory. (Default: 5.)
-# - LIVY_NICENESS   Niceness of the Livy server process when running in the background. (Default: 0.)
-# - LIVY_CLASSPATH  Override if the additional classpath is required.

+ 0 - 24
conf/log4j.properties.template

@@ -1,24 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# The default Livy logging configuration.
-log4j.rootCategory=INFO, console
-log4j.appender.console=org.apache.log4j.ConsoleAppender
-log4j.appender.console.target=System.err
-log4j.appender.console.layout=org.apache.log4j.PatternLayout
-log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
-
-log4j.logger.org.eclipse.jetty=WARN

+ 0 - 35
conf/spark-blacklist.conf.template

@@ -1,35 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-#
-# Configuration override / blacklist. Defines a list of properties that users are not allowed
-# to override when starting Spark sessions.
-#
-# This file takes a list of property names (one per line). Empty lines and lines starting with "#"
-# are ignored.
-#
-
-# Disallow overriding the master and the deploy mode.
-spark.master
-spark.submit.deployMode
-
-# Disallow overriding the location of Spark cached jars.
-spark.yarn.jar
-spark.yarn.jars
-spark.yarn.archive
-
-# Don't allow users to override the RSC timeout.
-livy.rsc.server.idle-timeout

+ 30 - 23
docker/Dockerfile

@@ -18,48 +18,45 @@ RUN mvn clean package \
 
 
 
-FROM openjdk:8-jre-slim
+FROM openjdk:8-jre-slim as builder1
 RUN sed -i "s@http://\(deb\|security\).debian.org@https://mirrors.aliyun.com@g" /etc/apt/sources.list
 # COPY docker/sources.list /etc/apt
 RUN apt update && apt install -y --no-install-recommends \
     krb5-user zip unzip procps tar curl supervisor
+# 目录准备
 WORKDIR /opt/cluster
-COPY docker/cluster .
-ENV HADOOP_HOME=/opt/cluster/hadoop
-ENV HADOOP_CONF_DIR=/opt/cluster/conf
-ENV HIVE_HOME=/opt/cluster/hive
-ENV HIVE_CONF_DIR=/opt/cluster/conf
-ENV SPARK_HOME=/opt/cluster/spark
-ENV SPARK_CONF_DIR=/opt/cluster/conf
-ENV LIVY_HOME=/opt/cluster/livy
-ENV LIVY_CONF_DIR=/opt/cluster/conf
-RUN rm -rf /opt/cluster/livy/*
-# COPY docker/livy /opt/cluster/livy/
-
 RUN mkdir -p hadoop spark livy/logs
 
-RUN curl -o ./hadoop/hadoop.tgz -O  http://mirror.nju.edu.cn/apache/hadoop/common/hadoop-3.2.3/hadoop-3.2.3.tar.gz
-#&& \
-#    curl -o ./spark/spark.tgz -O  http://mirror.nju.edu.cn/apache/spark/spark-3.1.3/spark-3.1.3-bin-hadoop3.2.tgz
-
+# spark
 COPY  --from=builder /workspace/spark-3.0.3-bin-hadoop3.2.tgz  spark
-# RUN mv spark-3.0.3-bin-hadoop3.2.tgz
-
 RUN cd spark && tar zxfv spark-3.0.3-bin-hadoop3.2.tgz && mv spark*/* . && rm -rf spark-3.0.3-bin-hadoop3.2.tgz
+
+# hadoop
+RUN curl -o ./hadoop/hadoop.tgz -O  http://mirror.nju.edu.cn/apache/hadoop/common/hadoop-3.2.3/hadoop-3.2.3.tar.gz
 RUN cd hadoop && tar zxfv hadoop.tgz && mv hadoop*/* . && rm -rf hadoop.tgz
 
-ADD docker/krb5.conf /etc
+
+# livy
 COPY --from=builder /workspace/assembly/target/apache-livy*.zip livy
-# ADD assembly/target/apache-livy*.zip /opt/cluster/livy/
 RUN cd /opt/cluster/livy/ && unzip apache-livy*.zip && mv apache-livy*bin/* .
+
 # jar包冲突
 RUN cp hadoop/share/hadoop/common/lib/jersey-core-1.19.jar livy/jars/
 RUN cp hadoop/share/hadoop/common/lib/jersey-core-1.19.jar spark/jars/
 RUN cp hadoop/share/hadoop/yarn/lib/jersey-client-1.19.jar spark/jars/
 RUN rm spark/jars/jersey-client-2.30.jar
-# CMD ["livy/bin/livy-server"]
 
+# 环境变量
+ENV HADOOP_HOME=/opt/cluster/hadoop
+ENV HADOOP_CONF_DIR=/opt/cluster/conf
+ENV HIVE_HOME=/opt/cluster/hive
+ENV HIVE_CONF_DIR=/opt/cluster/conf
+ENV SPARK_HOME=/opt/cluster/spark
+ENV SPARK_CONF_DIR=/opt/cluster/conf
+ENV LIVY_HOME=/opt/cluster/livy
+ENV LIVY_CONF_DIR=/opt/cluster/conf
 
+# 启动命令
 RUN echo "\
 [program:livy]\n\
 directory=/opt/cluster\n\
@@ -71,4 +68,14 @@ stdout_logfile=/var/log/livy.log\n\
 stdout_logfile_maxbytes=50MB\n\
 " > /etc/supervisor/conf.d/livy.conf
 
-CMD ["supervisord", "-n"]
+CMD ["supervisord", "-n"]
+
+
+FROM builder1 as image-test
+COPY docker/dev .
+ADD docker/dev/conf/krb5.conf /etc
+
+
+FROM builder1 as image-prod
+COPY docker/prod .
+ADD docker/prod/conf/krb5.conf /etc

+ 0 - 0
docker/cluster/conf/core-site.xml → docker/dev/conf/core-site.xml


+ 0 - 0
docker/cluster/conf/hdfs-site.xml → docker/dev/conf/hdfs-site.xml


+ 0 - 0
docker/cluster/conf/hive-site.xml → docker/dev/conf/hive-site.xml


+ 0 - 0
docker/krb5.conf → docker/dev/conf/krb5.conf


+ 0 - 0
docker/cluster/conf/livy-client.conf → docker/dev/conf/livy-client.conf


+ 0 - 0
docker/cluster/conf/livy.conf → docker/dev/conf/livy.conf


+ 0 - 0
docker/cluster/conf/log4j.properties → docker/dev/conf/log4j.properties


+ 0 - 0
docker/cluster/conf/mapred-site.xml → docker/dev/conf/mapred-site.xml


+ 0 - 0
docker/cluster/conf/spark-defaults.conf → docker/dev/conf/spark-defaults.conf


+ 0 - 0
docker/cluster/conf/user.keytab → docker/dev/conf/user.keytab


+ 0 - 0
docker/cluster/conf/yarn-site.xml → docker/dev/conf/yarn-site.xml


+ 196 - 0
docker/prod/conf/core-site.xml

@@ -0,0 +1,196 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<configuration>
+
+    <property>
+        <name>emr.cfs.group.id.map</name>
+        <value>root:0;hadoop:500</value>
+    </property>
+
+    <property>
+        <name>emr.cfs.io.blocksize</name>
+        <value>1048576</value>
+    </property>
+
+    <property>
+        <name>emr.cfs.user.id.map</name>
+        <value>root:0;hadoop:500</value>
+    </property>
+
+    <property>
+        <name>emr.cfs.write.level</name>
+        <value>2</value>
+    </property>
+
+    <property>
+        <name>fs.AbstractFileSystem.ofs.impl</name>
+        <value>com.qcloud.chdfs.fs.CHDFSDelegateFSAdapter</value>
+    </property>
+
+    <property>
+        <name>fs.cfs.impl</name>
+        <value>com.tencent.cloud.emr.CFSFileSystem</value>
+    </property>
+
+    <property>
+        <name>fs.cos.buffer.dir</name>
+        <value>/data/emr/hdfs/tmp</value>
+    </property>
+
+    <property>
+        <name>fs.cos.local_block_size</name>
+        <value>2097152</value>
+    </property>
+
+    <property>
+        <name>fs.cos.userinfo.appid</name>
+        <value>1302259445</value>
+    </property>
+
+    <property>
+        <name>fs.cos.userinfo.region</name>
+        <value>bj</value>
+    </property>
+
+    <property>
+        <name>fs.cos.userinfo.useCDN</name>
+        <value>false</value>
+    </property>
+
+    <property>
+        <name>fs.cosn.block.size</name>
+        <value>67108864</value>
+    </property>
+
+    <property>
+        <name>fs.cosn.credentials.provider</name>
+        <value>org.apache.hadoop.fs.auth.EMRInstanceCredentialsProvider</value>
+    </property>
+
+    <property>
+        <name>fs.cosn.impl</name>
+        <value>org.apache.hadoop.fs.cosnative.NativeCosFileSystem</value>
+    </property>
+
+    <property>
+        <name>fs.cosn.local_block_size</name>
+        <value>2097152</value>
+    </property>
+
+    <property>
+        <name>fs.cosn.tmp.dir</name>
+        <value>/data/emr/hdfs/tmp/hadoop_cos</value>
+    </property>
+
+    <property>
+        <name>fs.cosn.upload.buffer</name>
+        <value>mapped_disk</value>
+    </property>
+
+    <property>
+        <name>fs.cosn.upload.buffer.size</name>
+        <value>-1</value>
+    </property>
+
+    <property>
+        <name>fs.cosn.userinfo.region</name>
+        <value>ap-beijing</value>
+    </property>
+
+    <property>
+        <name>fs.defaultFS</name>
+        <value>hdfs://HDFS84854</value>
+    </property>
+
+    <property>
+        <name>fs.emr.version</name>
+        <value>9c06b7b</value>
+    </property>
+
+    <property>
+        <name>fs.ofs.impl</name>
+        <value>com.qcloud.chdfs.fs.CHDFSHadoopFileSystemAdapter</value>
+    </property>
+
+    <property>
+        <name>fs.ofs.tmp.cache.dir</name>
+        <value>/data/emr/hdfs/tmp/chdfs</value>
+    </property>
+
+    <property>
+        <name>fs.ofs.user.appid</name>
+        <value>1302259445</value>
+    </property>
+
+    <property>
+        <name>fs.trash.interval</name>
+        <value>1440</value>
+    </property>
+
+    <property>
+        <name>ha.health-monitor.rpc-timeout.ms</name>
+        <value>180000</value>
+    </property>
+
+    <property>
+        <name>ha.zookeeper.session-timeout.ms</name>
+        <value>180000</value>
+    </property>
+
+    <property>
+        <name>hadoop.http.staticuser.user</name>
+        <value>hadoop</value>
+    </property>
+
+    <property>
+        <name>hadoop.logfile.count</name>
+        <value>20</value>
+    </property>
+
+    <property>
+        <name>hadoop.logfile.size</name>
+        <value>1000000000</value>
+    </property>
+
+    <property>
+        <name>hadoop.proxyuser.hadoop.groups</name>
+        <value>*</value>
+    </property>
+
+    <property>
+        <name>hadoop.proxyuser.hadoop.hosts</name>
+        <value>*</value>
+    </property>
+
+    <property>
+        <name>hadoop.security.authentication</name>
+        <value>kerberos</value>
+    </property>
+
+    <property>
+        <name>hadoop.security.authorization</name>
+        <value>true</value>
+    </property>
+
+    <property>
+        <name>hadoop.tmp.dir</name>
+        <value>/data/emr/hdfs/tmp</value>
+    </property>
+
+    <property>
+        <name>hadoop.zk.timeout-ms</name>
+        <value>60000</value>
+    </property>
+
+    <property>
+        <name>io.compression.codec.lzo.class</name>
+        <value>com.hadoop.compression.lzo.LzoCodec</value>
+    </property>
+
+    <property>
+        <name>io.compression.codecs</name>
+        <value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec,org.apache.hadoop.io.compress.SnappyCodec</value>
+    </property>
+
+</configuration>

+ 310 - 0
docker/prod/conf/hdfs-site.xml

@@ -0,0 +1,310 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<configuration>
+	
+	<property>
+		<name>dfs.balance.bandwidthPerSec</name>
+		<value>10485760</value>
+	</property>
+	
+	<property>
+		<name>dfs.block.access.token.enable</name>
+		<value>true</value>
+	</property>
+	
+	<property>
+		<name>dfs.blockdfs.webhdfs.enabled.access.token.enable</name>
+		<value>true</value>
+	</property>
+	
+	<property>
+		<name>dfs.blocksize</name>
+		<value>134217728</value>
+	</property>
+	
+	<property>
+		<name>dfs.client.block.write.replace-datanode-on-failure.enable</name>
+		<value>true</value>
+	</property>
+	
+	<property>
+		<name>dfs.client.failover.proxy.provider.HDFS84854</name>
+		<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
+	</property>
+	
+	<property>
+		<name>dfs.client.file-block-storage-locations.timeout.millis</name>
+		<value>10000</value>
+	</property>
+	
+	<property>
+		<name>dfs.client.read.shortcircuit</name>
+		<value>true</value>
+	</property>
+	
+	<property>
+		<name>dfs.client.slow.io.warning.threshold.ms</name>
+		<value>900000</value>
+	</property>
+	
+	<property>
+		<name>dfs.client.socket-timeout</name>
+		<value>60000</value>
+	</property>
+	
+	<property>
+		<name>dfs.client.use.datanode.hostname</name>
+		<value>true</value>
+	</property>
+	
+	<property>
+		<name>dfs.datanode.handler.count</name>
+		<value>128</value>
+	</property>
+	
+	<property>
+		<name>dfs.datanode.hdfs-blocks-metadata.enabled</name>
+		<value>true</value>
+	</property>
+	
+	<property>
+		<name>dfs.datanode.kerberos.principal</name>
+		<value>hadoop/_HOST@EMR-56L6ZNTS</value>
+	</property>
+	
+	<property>
+		<name>dfs.datanode.keytab.file</name>
+		<value>/var/krb5kdc/emr.keytab</value>
+	</property>
+	
+	<property>
+		<name>dfs.datanode.max.transfer.threads</name>
+		<value>16480</value>
+	</property>
+	
+	<property>
+		<name>dfs.datanode.max.xcievers</name>
+		<value>8000</value>
+	</property>
+	
+	<property>
+		<name>dfs.datanode.slow.io.warning.threshold.ms</name>
+		<value>90000</value>
+	</property>
+	
+	<property>
+		<name>dfs.datanode.socket.write.timeout</name>
+		<value>480000</value>
+	</property>
+	
+	<property>
+		<name>dfs.datanode.use.datanode.hostname</name>
+		<value>false</value>
+	</property>
+	
+	<property>
+		<name>dfs.domain.socket.path</name>
+		<value>/var/lib/hadoop/dn</value>
+	</property>
+	
+	<property>
+		<name>dfs.encryption.key.provider.uri</name>
+		<value></value>
+	</property>
+	
+	<property>
+		<name>dfs.ha.automatic-failover.enabled</name>
+		<value>true</value>
+	</property>
+	
+	<property>
+		<name>dfs.ha.fencing.methods</name>
+		<value>shell(/bin/true)</value>
+	</property>
+	
+	<property>
+		<name>dfs.ha.namenodes.HDFS84854</name>
+		<value>nn1,nn2</value>
+	</property>
+	
+	<property>
+		<name>dfs.heartbeat.interval</name>
+		<value>2</value>
+	</property>
+	
+	<property>
+		<name>dfs.hosts</name>
+		<value>/usr/local/service/hadoop/etc/hadoop/hdfshosts</value>
+	</property>
+	
+	<property>
+		<name>dfs.hosts.exclude</name>
+		<value>/usr/local/service/hadoop/etc/hadoop/hdfsexcludedhosts</value>
+	</property>
+	
+	<property>
+		<name>dfs.journalnode.edits.dir</name>
+		<value>/data/emr/hdfs/journalnode</value>
+	</property>
+	
+	<property>
+		<name>dfs.journalnode.kerberos.internal.spnego.principal</name>
+		<value>HTTP/_HOST@EMR-56L6ZNTS</value>
+	</property>
+	
+	<property>
+		<name>dfs.journalnode.kerberos.principal</name>
+		<value>hadoop/_HOST@EMR-56L6ZNTS</value>
+	</property>
+	
+	<property>
+		<name>dfs.journalnode.keytab.file</name>
+		<value>/var/krb5kdc/emr.keytab</value>
+	</property>
+	
+	<property>
+		<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
+		<value>false</value>
+	</property>
+	
+	<property>
+		<name>dfs.namenode.handler.count</name>
+		<value>64</value>
+	</property>
+	
+	<property>
+		<name>dfs.namenode.heartbeat.recheck-interval</name>
+		<value>90000</value>
+	</property>
+	
+	<property>
+		<name>dfs.namenode.http-address.HDFS84854.nn1</name>
+		<value>172.23.21.7:4008</value>
+	</property>
+	
+	<property>
+		<name>dfs.namenode.http-address.HDFS84854.nn2</name>
+		<value>172.23.21.8:4008</value>
+	</property>
+	
+	<property>
+		<name>dfs.namenode.https-address.HDFS84854.nn1</name>
+		<value>172.23.21.7:4009</value>
+	</property>
+	
+	<property>
+		<name>dfs.namenode.https-address.HDFS84854.nn2</name>
+		<value>172.23.21.8:4009</value>
+	</property>
+	
+	<property>
+		<name>dfs.namenode.inode.attributes.provider.class</name>
+		<value>org.apache.ranger.authorization.hadoop.RangerHdfsAuthorizer</value>
+	</property>
+	
+	<property>
+		<name>dfs.namenode.kerberos.internal.spnego.principal</name>
+		<value>HTTP/_HOST@EMR-56L6ZNTS</value>
+	</property>
+	
+	<property>
+		<name>dfs.namenode.kerberos.principal</name>
+		<value>hadoop/_HOST@EMR-56L6ZNTS</value>
+	</property>
+	
+	<property>
+		<name>dfs.namenode.keytab.file</name>
+		<value>/var/krb5kdc/emr.keytab</value>
+	</property>
+	
+	<property>
+		<name>dfs.namenode.name.dir</name>
+		<value>/data/emr/hdfs/namenode</value>
+	</property>
+	
+	<property>
+		<name>dfs.namenode.rpc-address.HDFS84854.nn1</name>
+		<value>172.23.21.7:4007</value>
+	</property>
+	
+	<property>
+		<name>dfs.namenode.rpc-address.HDFS84854.nn2</name>
+		<value>172.23.21.8:4007</value>
+	</property>
+	
+	<property>
+		<name>dfs.namenode.shared.edits.dir</name>
+		<value>qjournal://172.23.21.17:4005;172.23.21.15:4005;172.23.21.10:4005/hadoop</value>
+	</property>
+	
+	<property>
+		<name>dfs.nameservices</name>
+		<value>HDFS84854</value>
+	</property>
+	
+	<property>
+		<name>dfs.permissions</name>
+		<value>true</value>
+	</property>
+	
+	<property>
+		<name>dfs.permissions.umask-mode</name>
+		<value>077</value>
+	</property>
+	
+	<property>
+		<name>dfs.replication</name>
+		<value>3</value>
+	</property>
+	
+	<property>
+		<name>dfs.secondary.namenode.kerberos.internal.spnego.principal</name>
+		<value>HTTP/_HOST@EMR-56L6ZNTS</value>
+	</property>
+	
+	<property>
+		<name>dfs.secondary.namenode.kerberos.principal</name>
+		<value>hadoop/_HOST@EMR-56L6ZNTS</value>
+	</property>
+	
+	<property>
+		<name>dfs.secondary.namenode.keytab.file</name>
+		<value>/var/krb5kdc/emr.keytab</value>
+	</property>
+	
+	<property>
+		<name>dfs.socket.timeout</name>
+		<value>60000</value>
+	</property>
+	
+	<property>
+		<name>dfs.web.authentication.kerberos.keytab</name>
+		<value>/var/krb5kdc/emr.keytab</value>
+	</property>
+	
+	<property>
+		<name>dfs.web.authentication.kerberos.principal</name>
+		<value>HTTP/_HOST@EMR-56L6ZNTS</value>
+	</property>
+	
+	<property>
+		<name>dfs.webhdfs.enabled</name>
+		<value>true</value>
+	</property>
+	
+	<property>
+		<name>ha.zookeeper.quorum</name>
+		<value>172.23.21.17:2181,172.23.21.15:2181,172.23.21.10:2181</value>
+	</property>
+	
+	<property>
+		<name>ignore.secure.ports.for.testing</name>
+		<value>true</value>
+	</property>
+	
+	<property>
+		<name>output.replace-datanode-on-failure</name>
+		<value>false</value>
+	</property>
+	
+</configuration>

+ 365 - 0
docker/prod/conf/hive-site.xml

@@ -0,0 +1,365 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+-->
+<configuration>
+
+    <property>
+        <name>datanucleus.schema.autoCreateTables</name>
+        <value>true</value>
+    </property>
+
+    <property>
+        <name>hbase.zookeeper.quorum</name>
+        <value>172.23.21.17:2181,172.23.21.15:2181,172.23.21.10:2181</value>
+    </property>
+
+    <property>
+        <name>hive.cluster.delegation.token.store.class</name>
+        <value>org.apache.hadoop.hive.thrift.ZooKeeperTokenStore</value>
+    </property>
+
+    <property>
+        <name>hive.cluster.delegation.token.store.zookeeper.connectString</name>
+        <value>172.23.21.17:2181,172.23.21.10:2181,172.23.21.15:2181</value>
+    </property>
+
+    <property>
+        <name>hive.downloaded.resources.dir</name>
+        <value>/data/emr/hive/tmp/${hive.session.id}_resources</value>
+    </property>
+
+    <property>
+        <name>hive.exec.dynamic.partition.mode</name>
+        <value>nonstrict</value>
+    </property>
+
+    <property>
+        <name>hive.exec.local.scratchdir</name>
+        <value>/data/emr/hive/tmp</value>
+    </property>
+
+    <property>
+        <name>hive.exec.parallel</name>
+        <value>true</value>
+    </property>
+
+    <property>
+        <name>hive.exec.parallel.thread.number</name>
+        <value>16</value>
+    </property>
+
+    <property>
+        <name>hive.execution.engine</name>
+        <value>tez</value>
+    </property>
+
+    <property>
+        <name>hive.fetch.task.aggr</name>
+        <value>true</value>
+    </property>
+
+    <property>
+        <name>hive.hwi.listen.host</name>
+        <value>0.0.0.0</value>
+    </property>
+
+    <property>
+        <name>hive.hwi.listen.port</name>
+        <value>7002</value>
+    </property>
+
+    <property>
+        <name>hive.llap.daemon.output.service.port</name>
+        <value>7009</value>
+    </property>
+
+    <property>
+        <name>hive.llap.daemon.rpc.port</name>
+        <value>7007</value>
+    </property>
+
+    <property>
+        <name>hive.llap.daemon.web.port</name>
+        <value>7008</value>
+    </property>
+
+    <property>
+        <name>hive.llap.daemon.yarn.shuffle.port</name>
+        <value>7006</value>
+    </property>
+
+    <property>
+        <name>hive.llap.management.rpc.port</name>
+        <value>7005</value>
+    </property>
+
+    <property>
+        <name>hive.merge.mapfiles</name>
+        <value>true</value>
+    </property>
+
+    <property>
+        <name>hive.merge.mapredfiles</name>
+        <value>true</value>
+    </property>
+
+    <property>
+        <name>hive.merge.size.per.task</name>
+        <value>134217728</value>
+    </property>
+
+    <property>
+        <name>hive.merge.smallfiles.avgsize</name>
+        <value>134217728</value>
+    </property>
+
+    <property>
+        <name>hive.merge.sparkfiles</name>
+        <value>true</value>
+    </property>
+
+    <property>
+        <name>hive.merge.tezfiles</name>
+        <value>true</value>
+    </property>
+
+    <property>
+        <name>hive.metastore.alter.notifications.basic</name>
+        <value>false</value>
+    </property>
+
+    <property>
+        <name>hive.metastore.db.encoding</name>
+        <value>UTF-8</value>
+    </property>
+
+    <property>
+        <name>hive.metastore.dml.events</name>
+        <value>true</value>
+    </property>
+
+    <property>
+        <name>hive.metastore.event.db.listener.timetolive</name>
+        <value>172800s</value>
+    </property>
+
+    <property>
+        <name>hive.metastore.event.db.notification.api.auth</name>
+        <value>false</value>
+    </property>
+
+    <property>
+        <name>hive.metastore.kerberos.keytab.file</name>
+        <value>/var/krb5kdc/emr.keytab</value>
+    </property>
+
+    <property>
+        <name>hive.metastore.kerberos.principal</name>
+        <value>hadoop/_HOST@EMR-56L6ZNTS</value>
+    </property>
+
+    <property>
+        <name>hive.metastore.metrics.enabled</name>
+        <value>false</value>
+    </property>
+
+    <property>
+        <name>hive.metastore.notifications.add.thrift.objects</name>
+        <value>true</value>
+    </property>
+
+    <property>
+        <name>hive.metastore.port</name>
+        <value>7004</value>
+    </property>
+
+    <property>
+        <name>hive.metastore.sasl.enabled</name>
+        <value>true</value>
+    </property>
+
+    <property>
+        <name>hive.metastore.schema.verification</name>
+        <value>false</value>
+    </property>
+
+    <property>
+        <name>hive.metastore.schema.verification.record.version</name>
+        <value>false</value>
+    </property>
+
+    <property>
+        <name>hive.metastore.server.max.message.size</name>
+        <value>858993459</value>
+    </property>
+
+    <property>
+        <name>hive.metastore.transactional.event.listeners</name>
+        <value>org.apache.hive.hcatalog.listener.DbNotificationListener</value>
+    </property>
+
+    <property>
+        <name>hive.metastore.uris</name>
+        <value>thrift://172.23.21.7:7004,thrift://172.23.21.8:7004</value>
+    </property>
+
+    <property>
+        <name>hive.metastore.warehouse.dir</name>
+        <value>/user/hive/warehouse</value>
+    </property>
+
+    <property>
+        <name>hive.querylog.location</name>
+        <value>/data/emr/hive/tmp</value>
+    </property>
+
+    <property>
+        <name>hive.server2.authentication</name>
+        <value>kerberos</value>
+    </property>
+
+    <property>
+        <name>hive.server2.authentication.kerberos.keytab</name>
+        <value>/var/krb5kdc/emr.keytab</value>
+    </property>
+
+    <property>
+        <name>hive.server2.authentication.kerberos.principal</name>
+        <value>hadoop/_HOST@EMR-56L6ZNTS</value>
+    </property>
+
+    <property>
+        <name>hive.server2.logging.operation.log.location</name>
+        <value>/data/emr/hive/tmp/operation_logs</value>
+    </property>
+
+    <property>
+        <name>hive.server2.metrics.enabled</name>
+        <value>true</value>
+    </property>
+
+    <property>
+        <name>hive.server2.support.dynamic.service.discovery</name>
+        <value>true</value>
+    </property>
+
+    <property>
+        <name>hive.server2.thrift.bind.host</name>
+        <value>172.23.21.11</value>
+    </property>
+
+    <property>
+        <name>hive.server2.thrift.http.port</name>
+        <value>7000</value>
+    </property>
+
+    <property>
+        <name>hive.server2.thrift.max.worker.threads</name>
+        <value>1000</value>
+    </property>
+
+    <property>
+        <name>hive.server2.thrift.port</name>
+        <value>7001</value>
+    </property>
+
+    <property>
+        <name>hive.server2.webui.host</name>
+        <value>0.0.0.0</value>
+    </property>
+
+    <property>
+        <name>hive.server2.webui.port</name>
+        <value>7003</value>
+    </property>
+
+    <property>
+        <name>hive.server2.zookeeper.namespace</name>
+        <value>hiveserver2</value>
+    </property>
+
+    <property>
+        <name>hive.stats.autogather</name>
+        <value>true</value>
+    </property>
+
+    <property>
+        <name>hive.tez.auto.reducer.parallelism</name>
+        <value>true</value>
+    </property>
+
+    <property>
+        <name>hive.tez.container.size</name>
+        <value>1024</value>
+    </property>
+
+    <property>
+        <name>hive.zookeeper.client.port</name>
+        <value>2181</value>
+    </property>
+
+    <property>
+        <name>hive.zookeeper.quorum</name>
+        <value>172.23.21.17:2181,172.23.21.15:2181,172.23.21.10:2181</value>
+    </property>
+
+    <property>
+        <name>io.compression.codec.lzo.class</name>
+        <value>com.hadoop.compression.lzo.LzoCodec</value>
+    </property>
+
+    <property>
+        <name>io.compression.codecs</name>
+        <value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec,org.apache.hadoop.io.compress.SnappyCodec</value>
+    </property>
+
+    <property>
+        <name>javax.jdo.option.ConnectionDriverName</name>
+        <value>com.mysql.jdbc.Driver</value>
+    </property>
+
+    <property>
+        <name>javax.jdo.option.ConnectionPassword</name>
+        <value>enochLk9j7Hv35</value>
+    </property>
+
+    <property>
+        <name>javax.jdo.option.ConnectionURL</name>
+        <value>jdbc:mysql://172.23.21.6:3306/hivemetastore?useSSL=false&amp;createDatabaseIfNotExist=true&amp;characterEncoding=UTF-8</value>
+    </property>
+
+    <property>
+        <name>javax.jdo.option.ConnectionUserName</name>
+        <value>root</value>
+    </property>
+
+    <property>
+        <name>mapreduce.input.fileinputformat.list-status.num-threads</name>
+        <value>5</value>
+    </property>
+
+    <property>
+        <name>spark.yarn.jars</name>
+        <value>hdfs:///spark/jars/*</value>
+    </property>
+
+</configuration>

+ 25 - 0
docker/prod/conf/krb5.conf

@@ -0,0 +1,25 @@
+[libdefaults]
+    dns_lookup_realm = false
+    dns_lookup_kdc = false
+    ticket_lifetime = 24h
+    renew_lifetime = 7d
+    forwardable = true
+    rdns = false
+    default_realm = EMR-56L6ZNTS
+    default_tgs_enctypes = des3-cbc-sha1
+    default_tkt_enctypes = des3-cbc-sha1
+    permitted_enctypes = des3-cbc-sha1
+    kdc_timeout = 3000
+    max_retries = 3
+[realms]
+    EMR-56L6ZNTS = {
+
+        kdc = 172.23.21.7:88
+        admin_server = 172.23.21.7
+        kdc = 172.23.21.8:88
+        admin_server = 172.23.21.8
+
+    }
+
+[domain_realm]
+# .example.com = EXAMPLE.COM

+ 7 - 7
conf/livy-client.conf.template → docker/prod/conf/livy-client.conf

@@ -64,27 +64,27 @@
 # Comma-separated list of Livy RSC jars. By default Livy will upload jars from its installation
 # directory every time a session is started. By caching these files in HDFS, for example, startup
 # time of sessions on YARN can be reduced.
-# livy.rsc.jars =
+#livy.rsc.jars = hdfs:/user/aidevuser/rsc-jars/*
 # Location of the SparkR package for running sparkr
 # livy.rsc.sparkr.package =
 # Location of the PySpark package for running pyspark
 # livy.rsc.pyspark.archives =
 
 # Address for the RSC driver to connect back with it's connection info.
-# livy.rsc.launcher.address =
+livy.rsc.launcher.address = please_set_correct_addresss
 # Port Range on which RPC will launch . Port range in inclusive of start and end port .
-# livy.rsc.launcher.port.range = 10000~10010
+livy.rsc.launcher.port.range = 31100~31103
 
 # How long will the RSC wait for a connection for a Livy server before shutting itself down.
-# livy.rsc.server.idle-timeout = 10m
+livy.rsc.server.idle-timeout = 30m
 
 # The user that should be impersonated when requesting a Livy session
-# livy.rsc.proxy-user =
+#livy.rsc.proxy-user = aidevuser
 
 # Host or IP adress of the rpc server
-# livy.rsc.rpc.server.address =
+livy.rsc.rpc.server.address = 0.0.0.0
 # How long the rsc client will wait when attempting to connect to the Livy server
-# livy.rsc.server.connect.timeout = 90s
+livy.rsc.server.connect.timeout = 5m
 
 # The logging level for the rpc channel. Possible values: TRACE, DEBUG, INFO, WARN, or ERROR
 # livy.rsc.channel.log.level =

+ 9 - 7
conf/livy.conf.template → docker/prod/conf/livy.conf

@@ -32,17 +32,17 @@
 # livy.server.host = 0.0.0.0
 
 # What port to start the server on.
-# livy.server.port = 8998
+livy.server.port = 8998
 
 # What base path ui should work on. By default UI is mounted on "/".
 # E.g.: livy.ui.basePath = /my_livy - result in mounting UI on /my_livy/
 # livy.ui.basePath = ""
 
 # What spark master Livy sessions should use.
-# livy.spark.master = local
+livy.spark.master = yarn
 
 # What spark deploy mode Livy sessions should use.
-# livy.spark.deploy-mode =
+livy.spark.deploy-mode = cluster
 
 # Configure Livy server http request and response header size.
 # livy.server.request-header.size = 131072
@@ -70,14 +70,14 @@
 # Comma-separated list of Livy RSC jars. By default Livy will upload jars from its installation
 # directory every time a session is started. By caching these files in HDFS, for example, startup
 # time of sessions on YARN can be reduced.
-# livy.rsc.jars =
+#livy.rsc.jars = hdfs:/user/aidevuser/rsc-jars/*
 
 # Comma-separated list of Livy REPL jars. By default Livy will upload jars from its installation
 # directory every time a session is started. By caching these files in HDFS, for example, startup
 # time of sessions on YARN can be reduced. Please list all the repl dependencies including
 # Scala version-specific livy-repl jars, Livy will automatically pick the right dependencies
 # during session creation.
-# livy.repl.jars =
+#livy.repl.jars = hdfs:/user/aidevuser/repl_2.12-jars/*
 
 # Location of PySpark archives. By default Livy will upload the file from SPARK_HOME, but
 # by caching the file in HDFS, startup time of PySpark sessions on YARN can be reduced.
@@ -98,7 +98,7 @@
 
 # Whether to enable HiveContext in livy interpreter, if it is true hive-site.xml will be detected
 # on user request and then livy server classpath automatically.
-# livy.repl.enable-hive-context =
+livy.repl.enable-hive-context = true
 
 # Recovery mode of Livy. Possible values:
 # off: Default. Turn off recovery. Every time Livy shuts down, it stops and forgets all sessions.
@@ -140,7 +140,7 @@
 # livy.server.recovery.zk-state-store.key-prefix = livy
 
 # If Livy can't find the yarn app within this time, consider it lost.
-# livy.server.yarn.app-lookup-timeout = 120s
+livy.server.yarn.app-lookup-timeout = 5m
 # When the cluster is busy, we may fail to launch yarn app in app-lookup-timeout, then it would
 # cause session leakage, so we need to check session leakage.
 # How long to check livy session leakage
@@ -179,6 +179,8 @@
 # livy.server.auth.kerberos.principal = <spnego principal>
 # livy.server.auth.kerberos.keytab = <spnego keytab>
 # livy.server.auth.kerberos.name-rules = DEFAULT
+livy.server.launch.kerberos.principal = ailab
+livy.server.launch.kerberos.keytab = /opt/cluster/conf/user.keytab
 #
 # If user wants to use custom authentication filter, configurations are:
 # livy.server.auth.type = <custom>

二进制
docker/prod/conf/user.keytab


+ 8 - 0
docker/prod/conf/信息.txt

@@ -0,0 +1,8 @@
+ip+port:172.23.21.7:7001
+username:ylaiuser
+password:EMR-56L6ZNTS
+schema:default
+hdfs_path:hdfs://HDFS84854
+
+
+

+ 10 - 0
docker/prod/conf/平台登录地址.txt

@@ -0,0 +1,10 @@
+1、AI平台
+http://ailab.digitalyili.com/
+tenant/tenant_pwd 租户管理员
+pmguser/pmguser_pwd 项目组管理员
+pmuser/pmuser_pwd 项目负责人
+biaozhu/biaozhu_pwd 标注人员
+
+2、foot
+http://aicenter.digitalyili.com/
+admin/Foot@2022

+ 42 - 0
docker/prod/conf/第三方服务.txt

@@ -0,0 +1,42 @@
+1、rabbitmq
+172.23.22.106:5671
+root/1qaz@WSX
+
+2、gitlab
+aigit.digitalyili.com
+root/Admin@123
+
+3、artifactory
+http://aicenter.digitalyili.com/ui
+admin/Admin@123
+
+4、kafka
+172.23.22.106:9092
+
+5、redis(prod):
+ai平台:
+IP: 172.23.12.226:6379
+用户信息:RttN4RbjQBf
+
+6、mysql(prod):
+ip:172.23.12.194:3306
+用户信息:aihub_prod : RttN4RbjQBfv
+库名:
+aihub_base_prod
+aihub_data_prod
+aihub_foot_prod
+aihub_base_test_prod
+aihub_data_test_prod
+
+7、pgsql
+ip:172.23.12.207:5432
+用户信息:aihub_data : t@evPYeB5W5c$K0p
+库名:prod_aihub_data
+
+8、cos桶(prod):
+桶名:aihub-prod-1302259445
+访问路径:https://aihub-prod-1302259445.cos.ap-beijing.myqcloud.com
+CDN访问:https://aihubcos.digitalyili.com(暂未配置)
+API秘钥:
+SecretId AKIDb2D7mBJAk47tOdaGfogHacOt74lBJCqD
+SecretKey 8yXpqZ1D8m9drpY0B5i4B2btxlJacOt7