Explorar o código

update Dockerfile

Zhang Li %!s(int64=2) %!d(string=hai) anos
pai
achega
53d991e27f

+ 2 - 1
.dockerignore

@@ -35,4 +35,5 @@ python-api/dist
 .git
 .gitignore
 docker/Dockerfile
-.vscode/
+.vscode/
+Makefile

+ 10 - 0
Makefile

@@ -0,0 +1,10 @@
+
+.PHONY: all
+
+prod:
+	@DOCKER_BUILDKIT=1 docker build -f docker/Dockerfile  --build-arg BUILDKIT_INLINE_CACHE=1  --target image-prod -t livy:prod .
+
+
+test:
+	@DOCKER_BUILDKIT=1 docker build -f docker/Dockerfile  --build-arg BUILDKIT_INLINE_CACHE=1  --target image-test -t livy:test .
+

+ 4 - 2
docker/Dockerfile

@@ -3,7 +3,8 @@ FROM maven:3.8.6-openjdk-8 as builder
 WORKDIR /workspace
 RUN sed -i "s@http://\(deb\|security\).debian.org@https://mirrors.aliyun.com@g" /etc/apt/sources.list
 RUN apt update && apt install -y python3 python3-pip python-is-python3 \
-    && pip config set global.index-url https://mirror.nju.edu.cn/pypi/web/simple
+    && pip config set global.index-url https://mirror.baidu.com/pypi/simple \
+    && pip install -U setuptools
 ADD . .
 RUN mkdir -p ~/.m2 && cp -r docker/settings.xml ~/.m2
 # RUN wget http://mirror.nju.edu.cn/apache/spark/spark-3.1.3/spark-3.1.3-bin-hadoop3.2.tgz
@@ -32,7 +33,8 @@ COPY  --from=builder /workspace/spark-3.0.3-bin-hadoop3.2.tgz  spark
 RUN cd spark && tar zxfv spark-3.0.3-bin-hadoop3.2.tgz && mv spark*/* . && rm -rf spark-3.0.3-bin-hadoop3.2.tgz
 
 # hadoop
-RUN curl -o ./hadoop/hadoop.tgz -O  http://mirror.nju.edu.cn/apache/hadoop/common/hadoop-3.2.3/hadoop-3.2.3.tar.gz
+RUN curl -o ./hadoop/hadoop.tgz -O  https://archive.apache.org/dist/hadoop/common/hadoop-3.2.3/hadoop-3.2.3.tar.gz
+# RUN curl -o ./hadoop/hadoop.tgz -O  http://mirror.nju.edu.cn/apache/hadoop/common/hadoop-3.2.3/hadoop-3.2.3.tar.gz
 RUN cd hadoop && tar zxfv hadoop.tgz && mv hadoop*/* . && rm -rf hadoop.tgz
 
 

+ 28 - 0
docker/prod/conf/mapred-site.xml

@@ -0,0 +1,28 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+  <property>
+     <name>mapreduce.framework.name</name>
+     <value>yarn</value>
+  </property>
+  <property>
+    <name>mapreduce.application.classpath</name>
+    <value>$HADOOP_HOME/share/hadoop/mapreduce/*:$HADOOP_HOME/share/hadoop/mapreduce/lib/*</value>
+  </property>
+</configuration>

+ 33 - 0
docker/prod/conf/spark-defaults.conf

@@ -0,0 +1,33 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Default system properties included when running spark-submit.
+# This is useful for setting default environmental settings.
+
+# Example:
+# spark.master                     spark://master:7077
+# spark.eventLog.enabled           true
+# spark.eventLog.dir               hdfs://namenode:8021/directory
+# spark.serializer                 org.apache.spark.serializer.KryoSerializer
+# spark.driver.memory              5g
+# spark.executor.extraJavaOptions  -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
+spark.master yarn
+spark.submit.deployMode cluster
+spark.sql.hive.metastore.version 2.3.7
+spark.yarn.dist.archives hdfs:/user/ylaiuser/python_env.tar.gz#python_env
+spark.pyspark.python ./python_env/bin/python
+spark.yarn.archive hdfs:/user/ylaiuser/spark_libs.zip

+ 371 - 0
docker/prod/conf/yarn-site.xml

@@ -0,0 +1,371 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<configuration>
+
+	<property>
+		<name>yarn.acl.enable</name>
+		<value>true</value>
+	</property>
+
+	<property>
+		<name>yarn.app.mapreduce.am.scheduler.connection.wait.interval-ms</name>
+		<value>5000</value>
+	</property>
+
+	<property>
+		<name>yarn.app.mapreduce.am.staging-dir</name>
+		<value>/emr/hadoop-yarn/staging</value>
+	</property>
+
+	<property>
+		<name>yarn.authorization-provider</name>
+		<value>org.apache.ranger.authorization.yarn.authorizer.RangerYarnAuthorizer</value>
+	</property>
+
+	<property>
+		<name>yarn.log-aggregation-enable</name>
+		<value>true</value>
+	</property>
+
+	<property>
+		<name>yarn.log-aggregation.retain-check-interval-seconds</name>
+		<value>604800</value>
+	</property>
+
+	<property>
+		<name>yarn.log-aggregation.retain-seconds</name>
+		<value>604800</value>
+	</property>
+
+	<property>
+		<name>yarn.log.server.url</name>
+		<value>http://172.23.21.7:5024/jobhistory/logs</value>
+	</property>
+
+	<property>
+		<name>yarn.nm.liveness-monitor.expiry-interval-ms</name>
+		<value>100000</value>
+	</property>
+
+	<property>
+		<name>yarn.nodemanager.address</name>
+		<value>172.23.21.2:5006</value>
+	</property>
+
+	<property>
+		<name>yarn.nodemanager.aux-services</name>
+		<value>mapreduce_shuffle</value>
+	</property>
+
+	<property>
+		<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
+		<value>org.apache.hadoop.mapred.ShuffleHandler</value>
+	</property>
+
+	<property>
+		<name>yarn.nodemanager.container-executor.class</name>
+		<value>org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor</value>
+	</property>
+
+	<property>
+		<name>yarn.nodemanager.hostname</name>
+		<value>172.23.21.2</value>
+	</property>
+
+	<property>
+		<name>yarn.nodemanager.keytab</name>
+		<value>/var/krb5kdc/emr.keytab</value>
+	</property>
+
+	<property>
+		<name>yarn.nodemanager.linux-container-executor.cgroups.hierarchy</name>
+		<value>/hadoop-yarn</value>
+	</property>
+
+	<property>
+		<name>yarn.nodemanager.linux-container-executor.cgroups.mount</name>
+		<value>true</value>
+	</property>
+
+	<property>
+		<name>yarn.nodemanager.linux-container-executor.cgroups.mount-path</name>
+		<value>/sys/fs/cgroup</value>
+	</property>
+
+	<property>
+		<name>yarn.nodemanager.linux-container-executor.cgroups.strict-resource-usage</name>
+		<value>true</value>
+	</property>
+
+	<property>
+		<name>yarn.nodemanager.linux-container-executor.group</name>
+		<value>hadoop</value>
+	</property>
+
+	<property>
+		<name>yarn.nodemanager.linux-container-executor.nonsecure-mode.local-user</name>
+		<value>hadoop</value>
+	</property>
+
+	<property>
+		<name>yarn.nodemanager.linux-container-executor.resources-handler.class</name>
+		<value>org.apache.hadoop.yarn.server.nodemanager.util.DefaultLCEResourcesHandler</value>
+	</property>
+
+	<property>
+		<name>yarn.nodemanager.local-dirs</name>
+		<value>/data/emr/yarn/local</value>
+	</property>
+
+	<property>
+		<name>yarn.nodemanager.localizer.address</name>
+		<value>172.23.21.2:5007</value>
+	</property>
+
+	<property>
+		<name>yarn.nodemanager.log-dirs</name>
+		<value>/data/emr/yarn/logs</value>
+	</property>
+
+	<property>
+		<name>yarn.nodemanager.log.retain-seconds</name>
+		<value>604800</value>
+	</property>
+
+	<property>
+		<name>yarn.nodemanager.pmem-check-enabled</name>
+		<value>true</value>
+	</property>
+
+	<property>
+		<name>yarn.nodemanager.principal</name>
+		<value>hadoop/_HOST@EMR-56L6ZNTS</value>
+	</property>
+
+	<property>
+		<name>yarn.nodemanager.remote-app-log-dir</name>
+		<value>/emr/logs</value>
+	</property>
+
+	<property>
+		<name>yarn.nodemanager.resource.cpu-vcores</name>
+		<value>-1</value>
+	</property>
+
+	<property>
+		<name>yarn.nodemanager.resource.detect-hardware-capabilities</name>
+		<value>true</value>
+	</property>
+
+	<property>
+		<name>yarn.nodemanager.resource.memory-mb</name>
+		<value>-1</value>
+	</property>
+
+	<property>
+		<name>yarn.nodemanager.resource.pcores-vcores-multiplier</name>
+		<value>3</value>
+	</property>
+
+	<property>
+		<name>yarn.nodemanager.resource.percentage-physical-cpu-limit</name>
+		<value>85</value>
+	</property>
+
+	<property>
+		<name>yarn.nodemanager.resource.system-reserved-memory-mb</name>
+		<value>15360</value>
+	</property>
+
+	<property>
+		<name>yarn.nodemanager.vmem-check-enabled</name>
+		<value>false</value>
+	</property>
+
+	<property>
+		<name>yarn.nodemanager.vmem-pmem-ratio</name>
+		<value>8</value>
+	</property>
+
+	<property>
+		<name>yarn.nodemanager.webapp.address</name>
+		<value>172.23.21.2:5008</value>
+	</property>
+
+	<property>
+		<name>yarn.resourcemanager.address.rm1</name>
+		<value>172.23.21.7:5000</value>
+	</property>
+
+	<property>
+		<name>yarn.resourcemanager.address.rm2</name>
+		<value>172.23.21.8:5000</value>
+	</property>
+
+	<property>
+		<name>yarn.resourcemanager.admin.address.rm1</name>
+		<value>172.23.21.7:5003</value>
+	</property>
+
+	<property>
+		<name>yarn.resourcemanager.admin.address.rm2</name>
+		<value>172.23.21.8:5003</value>
+	</property>
+
+	<property>
+		<name>yarn.resourcemanager.cluster-id</name>
+		<value>emr-56l6znts</value>
+	</property>
+
+	<property>
+		<name>yarn.resourcemanager.connect.retry-interval.ms</name>
+		<value>2000</value>
+	</property>
+
+	<property>
+		<name>yarn.resourcemanager.ha.automatic-failover.embedded</name>
+		<value>true</value>
+	</property>
+
+	<property>
+		<name>yarn.resourcemanager.ha.automatic-failover.enabled</name>
+		<value>true</value>
+	</property>
+
+	<property>
+		<name>yarn.resourcemanager.ha.enabled</name>
+		<value>true</value>
+	</property>
+
+	<property>
+		<name>yarn.resourcemanager.ha.rm-ids</name>
+		<value>rm1,rm2</value>
+	</property>
+
+	<property>
+		<name>yarn.resourcemanager.keytab</name>
+		<value>/var/krb5kdc/emr.keytab</value>
+	</property>
+
+	<property>
+		<name>yarn.resourcemanager.max-completed-applications</name>
+		<value>150</value>
+	</property>
+
+	<property>
+		<name>yarn.resourcemanager.nodemanagers.heartbeat-interval-ms</name>
+		<value>1000</value>
+	</property>
+
+	<property>
+		<name>yarn.resourcemanager.nodes.exclude-path</name>
+		<value>/usr/local/service/hadoop/etc/hadoop/yarnexcludedhosts</value>
+	</property>
+
+	<property>
+		<name>yarn.resourcemanager.nodes.include-path</name>
+		<value>/usr/local/service/hadoop/etc/hadoop/yarnhosts</value>
+	</property>
+
+	<property>
+		<name>yarn.resourcemanager.principal</name>
+		<value>hadoop/_HOST@EMR-56L6ZNTS</value>
+	</property>
+
+	<property>
+		<name>yarn.resourcemanager.recovery.enabled</name>
+		<value>true</value>
+	</property>
+
+	<property>
+		<name>yarn.resourcemanager.resource-tracker.address.rm1</name>
+		<value>172.23.21.7:5002</value>
+	</property>
+
+	<property>
+		<name>yarn.resourcemanager.resource-tracker.address.rm2</name>
+		<value>172.23.21.8:5002</value>
+	</property>
+
+	<property>
+		<name>yarn.resourcemanager.scheduler.address.rm1</name>
+		<value>172.23.21.7:5001</value>
+	</property>
+
+	<property>
+		<name>yarn.resourcemanager.scheduler.address.rm2</name>
+		<value>172.23.21.8:5001</value>
+	</property>
+
+	<property>
+		<name>yarn.resourcemanager.scheduler.class</name>
+		<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
+	</property>
+
+	<property>
+		<name>yarn.resourcemanager.store.class</name>
+		<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
+	</property>
+
+	<property>
+		<name>yarn.resourcemanager.webapp.address.rm1</name>
+		<value>172.23.21.7:5004</value>
+	</property>
+
+	<property>
+		<name>yarn.resourcemanager.webapp.address.rm2</name>
+		<value>172.23.21.8:5004</value>
+	</property>
+
+	<property>
+		<name>yarn.resourcemanager.webapp.https.address.rm1</name>
+		<value>172.23.21.7:5005</value>
+	</property>
+
+	<property>
+		<name>yarn.resourcemanager.webapp.https.address.rm2</name>
+		<value>172.23.21.8:5005</value>
+	</property>
+
+	<property>
+		<name>yarn.resourcemanager.zk-address</name>
+		<value>172.23.21.17:2181,172.23.21.15:2181,172.23.21.10:2181</value>
+	</property>
+
+	<property>
+		<name>yarn.resourcemanager.zk.state-store.address</name>
+		<value>172.23.21.17:2181,172.23.21.15:2181,172.23.21.10:2181</value>
+	</property>
+
+	<property>
+		<name>yarn.scheduler.fair.allow-undeclared-pools</name>
+		<value>false</value>
+	</property>
+
+	<property>
+		<name>yarn.scheduler.fair.user-as-default-queue</name>
+		<value>false</value>
+	</property>
+
+	<property>
+		<name>yarn.scheduler.maximum-allocation-mb</name>
+		<value>262144</value>
+	</property>
+
+	<property>
+		<name>yarn.scheduler.maximum-allocation-vcores</name>
+		<value>128</value>
+	</property>
+
+	<property>
+		<name>yarn.scheduler.minimum-allocation-mb</name>
+		<value>16</value>
+	</property>
+
+	<property>
+		<name>yarn.scheduler.minimum-allocation-vcores</name>
+		<value>1</value>
+	</property>
+
+</configuration>