Bladeren bron

Merge branch 'yili' of http://gogsb.soaringnova.com/ylproj/jupyterlab into yili

kilnonedre 1 jaar geleden
bovenliggende
commit
69b68b5265

+ 202 - 0
Dockerfile.idc

@@ -0,0 +1,202 @@
+FROM node:16.15.1-bullseye-slim AS builder-fe
+COPY requirements.txt .
+RUN sed -i "s@http://\(deb\|security\).debian.org@http://mirrors.aliyun.com@g" /etc/apt/sources.list
+RUN apt update
+RUN apt update && apt install -y libpython3-dev  python3 python3-pip python-is-python3 make \
+    && pip config set global.index-url https://mirror.baidu.com/pypi/simple \
+    && pip install -U pip setuptools && pip install -r requirements.txt -i https://mirror.baidu.com/pypi/simple
+    #  \
+    # && yarn config set registry https://registry.npm.taobao.org \
+    # && npm config set registry https://registry.npm.taobao.org \
+    # && npm config set disturl https://npm.taobao.org/dist
+#https://registry.npmmirror.com
+COPY . /src/
+ENV PYTHONPATH=/src
+RUN cd /src && chmod a+x  ./bdist_wheel.sh && ./bdist_wheel.sh
+
+
+FROM  gettyimages/spark:2.4.0-hadoop-3.0  as builder
+RUN sed -i "s@http://\(deb\|security\).debian.org@http://mirrors.163.com@g" /etc/apt/sources.list
+RUN apt update && apt install -y --no-install-recommends \
+    krb5-user zip unzip procps tar curl supervisor net-tools telnet vim iputils-ping netcat jq wget zsh
+# 目录准备
+ENV WORKSPACE /hadoop
+WORKDIR ${WORKSPACE}
+
+
+# 环境变量
+ENV HADOOP_HOME=/usr/hadoop-3.0.0/
+ENV HADOOP_CONF_DIR=${WORKSPACE}/conf
+# ENV HIVE_HOME=${WORKSPACE}/hive
+# ENV HIVE_CONF_DIR=${WORKSPACE}/conf
+ENV PATH=$PATH:$HADOOP_HOME/bin:$SPARK_HOME/bin
+ENV LD_LIBRARY_PATH=$HADOOP_HOME/lib/native
+
+ENV LANG=zh_CN.UTF-8 LANGUAGE=zh_CN:zh LC_ALL=zh_CN.UTF-8 DEBIAN_FRONTEND=noninteractive
+
+RUN rm -rf  /etc/apt/sources.list.d/  && apt update
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    supervisor \
+    iputils-ping \
+    wget \
+    zsh \
+    build-essential \
+    cmake \
+    git \
+    curl \
+    vim \
+    ca-certificates \
+    libjpeg-dev \
+    zip \
+    unzip \
+    libpng-dev \
+    openssh-server \
+    autojump \
+    ttf-wqy-zenhei \
+    libgl1-mesa-glx  \
+    libglib2.0-0 \
+    locales \
+    krb5-user net-tools telnet && \
+    rm -rf /var/lib/apt/lists/*
+
+
+RUN locale-gen zh_CN.UTF-8
+RUN dpkg-reconfigure locales
+
+
+CMD ["supervisord", "-n"]
+
+
+FROM builder as builder1
+
+ENV PYTHON_VERSION 3
+RUN chsh -s `which zsh`
+RUN curl -o ~/miniconda.sh -O  https://repo.anaconda.com/miniconda/Miniconda3-py37_4.12.0-Linux-x86_64.sh  && \
+    chmod +x ~/miniconda.sh && \
+    ~/miniconda.sh -b -p /opt/conda && \
+    rm ~/miniconda.sh
+
+RUN ln /opt/conda/bin/conda /usr/local/bin/conda
+RUN conda init zsh
+RUN conda install mamba -n base -c conda-forge
+RUN ln /opt/conda/bin/mamba /usr/local/bin/mamba && mamba init zsh
+
+
+
+FROM builder1 as builder2
+WORKDIR /workspace
+RUN /opt/conda/bin/pip install jupyterlab jupyterlab-language-pack-zh-CN kazoo==2.9.0 -i https://pypi.douban.com/simple
+RUN mamba install nodejs  sparkmagic  pyhive pandas  nbconvert>=7 mistune>=2 -y
+# pyhive thrift sasl thrift_sasl scikit-learn
+# password 'sx'
+RUN /opt/conda/bin/jupyter lab --generate-config && \
+    echo "c.NotebookApp.password='argon2:\$argon2id\$v=19\$m=10240,t=10,p=8\$+zIUCF9Uk2FiCHlV8njX5A\$I5Mm/64DORArcXYTXWRVng'">>/root/.jupyter/jupyter_lab_config.py
+ENV PATH /opt/conda/bin/:$PATH
+RUN jupyter nbextension enable --py --sys-prefix widgetsnbextension
+
+
+FROM builder2 as builder3
+
+RUN cd /opt/conda/lib/python3.7/site-packages/ \
+    && jupyter-kernelspec install sparkmagic/kernels/sparkkernel \
+    && jupyter-kernelspec install sparkmagic/kernels/pysparkkernel \
+    && /opt/conda/bin/jupyter serverextension enable --py sparkmagic
+
+EXPOSE 8888
+
+# 电信云测试
+FROM builder3 as image-idctest
+COPY --from=builder-fe /src/dist  dist
+RUN /opt/conda/bin/pip install dist/*.whl  -i https://mirror.baidu.com/pypi/simple
+ADD confs/idctest/config.json .
+RUN mkdir -p $HOME/.sparkmagic && cp config.json $HOME/.sparkmagic
+COPY confs/idctest/ydtk /ydtk
+COPY confs/idctest/conf ${WORKSPACE}/conf
+RUN echo "\
+[program:jupyter]\n\
+directory=/workspace\n\
+command=/bin/bash -c '/opt/conda/bin/jupyter lab --ip 0.0.0.0 --port 8888 --allow-root --no-browser --NotebookApp.allow_origin=* --NotebookApp.allow_remote_access=1' \n\
+autorestart=true\n\
+startretries=0\n\
+redirect_stderr=true\n\
+stdout_logfile=/var/log/jupyter.log\n\
+stdout_logfile_maxbytes=50MB\n\
+environment=PYTHONPATH=/\n\
+" > /etc/supervisor/conf.d/jupyter.conf
+
+
+FROM builder2 as jupyter0-idctest
+COPY --from=builder-fe /src/dist  dist
+RUN /opt/conda/bin/pip install dist/*.whl  -i https://mirror.baidu.com/pypi/simple
+COPY confs/idctest/ydtk /ydtk
+COPY confs/idctest/conf ${WORKSPACE}/conf
+RUN echo "\
+[program:jupyter]\n\
+directory=/workspace\n\
+command=/bin/bash -c '/opt/conda/bin/jupyter lab --ip 0.0.0.0 --port 8888 --allow-root --no-browser --NotebookApp.allow_origin=* --NotebookApp.allow_remote_access=1' \n\
+autorestart=true\n\
+startretries=0\n\
+redirect_stderr=true\n\
+stdout_logfile=/var/log/jupyter.log\n\
+stdout_logfile_maxbytes=50MB\n\
+environment=PYTHONPATH=/\n\
+" > /etc/supervisor/conf.d/jupyter.conf
+
+
+
+# 电信云生产
+FROM builder3 as image-idcprod
+COPY --from=builder-fe /src/dist  dist
+RUN /opt/conda/bin/pip install dist/*.whl  -i https://mirror.baidu.com/pypi/simple
+ADD confs/idcprod/config.json .
+RUN mkdir -p $HOME/.sparkmagic && cp config.json $HOME/.sparkmagic
+COPY confs/idcprod/ydtk /ydtk
+COPY confs/idcprod/conf ${WORKSPACE}/conf
+RUN echo "\
+[program:jupyter]\n\
+directory=/workspace\n\
+command=/bin/bash -c '/opt/conda/bin/jupyter lab --ip 0.0.0.0 --port 8888 --allow-root --no-browser --NotebookApp.allow_origin=* --NotebookApp.allow_remote_access=1' \n\
+autorestart=true\n\
+startretries=0\n\
+redirect_stderr=true\n\
+stdout_logfile=/var/log/jupyter.log\n\
+stdout_logfile_maxbytes=50MB\n\
+environment=PYTHONPATH=/\n\
+" > /etc/supervisor/conf.d/jupyter.conf
+
+
+
+FROM builder2 as jupyter0-idcprod
+COPY --from=builder-fe /src/dist  dist
+RUN /opt/conda/bin/pip install dist/*.whl  -i https://mirror.baidu.com/pypi/simple
+COPY confs/idcprod/ydtk /ydtk
+COPY confs/idcprod/conf ${WORKSPACE}/conf
+RUN echo "\
+[program:jupyter]\n\
+directory=/workspace\n\
+command=/bin/bash -c '/opt/conda/bin/jupyter lab --ip 0.0.0.0 --port 8888 --allow-root --no-browser --NotebookApp.allow_origin=* --NotebookApp.allow_remote_access=1' \n\
+autorestart=true\n\
+startretries=0\n\
+redirect_stderr=true\n\
+stdout_logfile=/var/log/jupyter.log\n\
+stdout_logfile_maxbytes=50MB\n\
+environment=PYTHONPATH=/\n\
+" > /etc/supervisor/conf.d/jupyter.conf
+
+# 普通版
+
+FROM builder2 as jupyter0
+
+COPY --from=builder-fe /src/dist  dist
+RUN /opt/conda/bin/pip install dist/*.whl  -i https://mirror.baidu.com/pypi/simple
+RUN echo "\
+[program:jupyter]\n\
+directory=/workspace\n\
+command=/bin/bash -c '/opt/conda/bin/jupyter lab --ip 0.0.0.0 --port 8888 --allow-root --no-browser --NotebookApp.allow_origin=* --NotebookApp.allow_remote_access=1' \n\
+autorestart=true\n\
+startretries=0\n\
+redirect_stderr=true\n\
+stdout_logfile=/var/log/jupyter.log\n\
+stdout_logfile_maxbytes=50MB\n\
+" > /etc/supervisor/conf.d/jupyter.conf

+ 0 - 74
Dockerfile.dev → Dockerfile.tx

@@ -189,80 +189,6 @@ stdout_logfile_maxbytes=50MB\n\
 environment=PYTHONPATH=/\n\
 " > /etc/supervisor/conf.d/jupyter.conf
 
-# 电信云测试
-FROM builder3 as image-idctest
-COPY --from=builder-fe /src/dist  dist
-RUN /opt/conda/bin/pip install dist/*.whl  -i https://mirror.baidu.com/pypi/simple
-ADD confs/idctest/config.json .
-RUN mkdir -p $HOME/.sparkmagic && cp config.json $HOME/.sparkmagic
-COPY confs/idctest/ydtk /ydtk
-RUN echo "\
-[program:jupyter]\n\
-directory=/workspace\n\
-command=/bin/bash -c '/opt/conda/bin/jupyter lab --ip 0.0.0.0 --port 8888 --allow-root --no-browser --NotebookApp.allow_origin=* --NotebookApp.allow_remote_access=1' \n\
-autorestart=true\n\
-startretries=0\n\
-redirect_stderr=true\n\
-stdout_logfile=/var/log/jupyter.log\n\
-stdout_logfile_maxbytes=50MB\n\
-environment=PYTHONPATH=/\n\
-" > /etc/supervisor/conf.d/jupyter.conf
-
-
-FROM builder2 as jupyter0-idctest
-COPY --from=builder-fe /src/dist  dist
-RUN /opt/conda/bin/pip install dist/*.whl  -i https://mirror.baidu.com/pypi/simple
-COPY confs/idctest/ydtk /ydtk
-RUN echo "\
-[program:jupyter]\n\
-directory=/workspace\n\
-command=/bin/bash -c '/opt/conda/bin/jupyter lab --ip 0.0.0.0 --port 8888 --allow-root --no-browser --NotebookApp.allow_origin=* --NotebookApp.allow_remote_access=1' \n\
-autorestart=true\n\
-startretries=0\n\
-redirect_stderr=true\n\
-stdout_logfile=/var/log/jupyter.log\n\
-stdout_logfile_maxbytes=50MB\n\
-environment=PYTHONPATH=/\n\
-" > /etc/supervisor/conf.d/jupyter.conf
-
-
-
-# 电信云生产
-FROM builder3 as image-idcprod
-COPY --from=builder-fe /src/dist  dist
-RUN /opt/conda/bin/pip install dist/*.whl  -i https://mirror.baidu.com/pypi/simple
-ADD confs/idcprod/config.json .
-RUN mkdir -p $HOME/.sparkmagic && cp config.json $HOME/.sparkmagic
-COPY confs/idcprod/ydtk /ydtk
-RUN echo "\
-[program:jupyter]\n\
-directory=/workspace\n\
-command=/bin/bash -c '/opt/conda/bin/jupyter lab --ip 0.0.0.0 --port 8888 --allow-root --no-browser --NotebookApp.allow_origin=* --NotebookApp.allow_remote_access=1' \n\
-autorestart=true\n\
-startretries=0\n\
-redirect_stderr=true\n\
-stdout_logfile=/var/log/jupyter.log\n\
-stdout_logfile_maxbytes=50MB\n\
-environment=PYTHONPATH=/\n\
-" > /etc/supervisor/conf.d/jupyter.conf
-
-
-
-FROM builder2 as jupyter0-idcprod
-COPY --from=builder-fe /src/dist  dist
-RUN /opt/conda/bin/pip install dist/*.whl  -i https://mirror.baidu.com/pypi/simple
-COPY confs/idcprod/ydtk /ydtk
-RUN echo "\
-[program:jupyter]\n\
-directory=/workspace\n\
-command=/bin/bash -c '/opt/conda/bin/jupyter lab --ip 0.0.0.0 --port 8888 --allow-root --no-browser --NotebookApp.allow_origin=* --NotebookApp.allow_remote_access=1' \n\
-autorestart=true\n\
-startretries=0\n\
-redirect_stderr=true\n\
-stdout_logfile=/var/log/jupyter.log\n\
-stdout_logfile_maxbytes=50MB\n\
-environment=PYTHONPATH=/\n\
-" > /etc/supervisor/conf.d/jupyter.conf
 
 # 普通版
 

+ 10 - 19
Makefile

@@ -3,20 +3,13 @@
 
 all: sxkj idctest txtest txprod
 
-prod:
-	@DOCKER_BUILDKIT=1 docker build -f Dockerfile.dev  --build-arg BUILDKIT_INLINE_CACHE=1  --target image-prod -t jupyterlab:prod .
-
-
-test:
-	@DOCKER_BUILDKIT=1 docker build -f Dockerfile.dev  --build-arg BUILDKIT_INLINE_CACHE=1  --target image-test -t jupyterlab:test .
-
 
 sxkj:
 	@sed 's#http.*\.com#http://aihub-dag\.sxkj\.com#' -i packages/yili-dag/src/request.ts
 	@sed 's#http.*\.com#http://aihub-dag\.sxkj\.com#' -i packages/jldbq-extenison/src/api/config.ts
 	@sed 's#http.*\.com#http://aihub-dag\.sxkj\.com#' -i packages/filebrowser/src/api/config.ts
-	@docker build -f Dockerfile.dev  --target image-sxkj -t SXKJ:32775/jupyterlab:sxkj .
-	@docker build -f Dockerfile.dev  --target jupyter0 -t SXKJ:32775/jupyterlab0:sxkj .
+	@docker build -f Dockerfile.tx  --target image-sxkj -t SXKJ:32775/jupyterlab:sxkj .
+	@docker build -f Dockerfile.tx  --target jupyter0 -t SXKJ:32775/jupyterlab0:sxkj .
 	@docker push SXKJ:32775/jupyterlab:sxkj
 	@docker push SXKJ:32775/jupyterlab0:sxkj
 
@@ -24,8 +17,8 @@ idctest:
 	@sed 's#http.*\.com#http://aihub-dag-idctest\.digitalyili\.com#' -i  packages/yili-dag/src/request.ts
 	@sed 's#http.*\.com#http://aihub-dag-idctest\.digitalyili\.com#' -i  packages/jldbq-extenison/src/api/config.ts
 	@sed 's#http.*\.com#http://aihub-dag-idctest\.digitalyili\.com#' -i  packages/filebrowser/src/api/config.ts
-	@docker build -f Dockerfile.dev  --target image-idctest -t registry.cn-hangzhou.aliyuncs.com/sxtest/jupyterlab:idctest .
-	@docker build -f Dockerfile.dev  --target jupyter0 -t registry.cn-hangzhou.aliyuncs.com/sxtest/jupyterlab0:idctest .
+	@docker build -f Dockerfile.idc  --target image-idctest -t registry.cn-hangzhou.aliyuncs.com/sxtest/jupyterlab:idctest .
+	@docker build -f Dockerfile.idc  --target jupyter0 -t registry.cn-hangzhou.aliyuncs.com/sxtest/jupyterlab0:idctest .
 	@docker push registry.cn-hangzhou.aliyuncs.com/sxtest/jupyterlab:idctest
 	@docker push registry.cn-hangzhou.aliyuncs.com/sxtest/jupyterlab0:idctest
 
@@ -33,8 +26,8 @@ idcprod:
 	@sed 's#http.*\.com#http://aihub-dag-idc\.digitalyili\.com#' -i  packages/yili-dag/src/request.ts
 	@sed 's#http.*\.com#http://aihub-dag-idc\.digitalyili\.com#' -i  packages/jldbq-extenison/src/api/config.ts
 	@sed 's#http.*\.com#http://aihub-dag-idc\.digitalyili\.com#' -i  packages/filebrowser/src/api/config.ts
-	@docker build -f Dockerfile.dev  --target image-idcprod -t registry.cn-hangzhou.aliyuncs.com/sxtest/jupyterlab:idcprod .
-	@docker build -f Dockerfile.dev  --target jupyter0-idcprod -t registry.cn-hangzhou.aliyuncs.com/sxtest/jupyterlab0:idcprod .
+	@docker build -f Dockerfile.idc  --target image-idcprod -t registry.cn-hangzhou.aliyuncs.com/sxtest/jupyterlab:idcprod .
+	@docker build -f Dockerfile.idc  --target jupyter0-idcprod -t registry.cn-hangzhou.aliyuncs.com/sxtest/jupyterlab0:idcprod .
 	@docker push registry.cn-hangzhou.aliyuncs.com/sxtest/jupyterlab:idcprod
 	@docker push registry.cn-hangzhou.aliyuncs.com/sxtest/jupyterlab0:idcprod
 
@@ -43,8 +36,8 @@ txtest:
 	@sed 's#http.*\.com#https://aihub-dag-test\.digitalyili\.com#' -i  packages/yili-dag/src/request.ts
 	@sed 's#http.*\.com#https://aihub-dag-test\.digitalyili\.com#' -i  packages/jldbq-extenison/src/api/config.ts
 	@sed 's#http.*\.com#https://aihub-dag-test\.digitalyili\.com#' -i  packages/filebrowser/src/api/config.ts
-	@docker build -f Dockerfile.dev  --target image-txtest -t registry.cn-hangzhou.aliyuncs.com/sxtest/jupyterlab:txtest .
-	@docker build -f Dockerfile.dev  --target jupyter0-txtest -t registry.cn-hangzhou.aliyuncs.com/sxtest/jupyterlab0:txtest .
+	@docker build -f Dockerfile.tx  --target image-txtest -t registry.cn-hangzhou.aliyuncs.com/sxtest/jupyterlab:txtest .
+	@docker build -f Dockerfile.tx  --target jupyter0-txtest -t registry.cn-hangzhou.aliyuncs.com/sxtest/jupyterlab0:txtest .
 	@docker push registry.cn-hangzhou.aliyuncs.com/sxtest/jupyterlab:txtest
 	@docker push registry.cn-hangzhou.aliyuncs.com/sxtest/jupyterlab0:txtest
 
@@ -52,8 +45,8 @@ txprod:
 	@sed 's#http.*\.com#https://aihub-dag\.digitalyili\.com#' -i  packages/yili-dag/src/request.ts
 	@sed 's#http.*\.com#https://aihub-dag\.digitalyili\.com#' -i  packages/jldbq-extenison/src/api/config.ts
 	@sed 's#http.*\.com#https://aihub-dag\.digitalyili\.com#' -i  packages/filebrowser/src/api/config.ts
-	@docker build -f Dockerfile.dev  --target image-txprod -t registry.cn-hangzhou.aliyuncs.com/sxtest/jupyterlab:txprod .
-	@docker build -f Dockerfile.dev  --target jupyter0-txprod -t registry.cn-hangzhou.aliyuncs.com/sxtest/jupyterlab0:txprod .
+	@docker build -f Dockerfile.tx  --target image-txprod -t registry.cn-hangzhou.aliyuncs.com/sxtest/jupyterlab:txprod .
+	@docker build -f Dockerfile.tx  --target jupyter0-txprod -t registry.cn-hangzhou.aliyuncs.com/sxtest/jupyterlab0:txprod .
 	@docker push registry.cn-hangzhou.aliyuncs.com/sxtest/jupyterlab:txprod
 	@docker push registry.cn-hangzhou.aliyuncs.com/sxtest/jupyterlab0:txprod
 
@@ -61,5 +54,3 @@ txprod:
 rsync:
 	@rsync -azP --exclude ".*/"  --exclude "tmp/" `pwd` sxkj@192.168.199.109:/home/sxkj/zhangli/
 
-27:
-	@rsync -azP --exclude ".*/"  --exclude "tmp/" `pwd` sxkj@192.168.199.27:/home/sxkj/zhangli/

+ 133 - 0
confs/idcprod/conf/core-site.xml

@@ -0,0 +1,133 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--Autogenerated by Cloudera Manager-->
+<configuration>
+  <property>
+    <name>fs.defaultFS</name>
+    <value>hdfs://nameservice1</value>
+  </property>
+  <property>
+    <name>io.compression.codecs</name>
+    <value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.DeflateCodec,org.apache.hadoop.io.compress.SnappyCodec,org.apache.hadoop.io.compress.Lz4Codec</value>
+  </property>
+  <property>
+    <name>hadoop.security.authentication</name>
+    <value>simple</value>
+  </property>
+  <property>
+    <name>hadoop.security.authorization</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>hadoop.rpc.protection</name>
+    <value>authentication</value>
+  </property>
+  <property>
+    <name>hadoop.security.auth_to_local</name>
+    <value>DEFAULT</value>
+  </property>
+  <property>
+    <name>hadoop.proxyuser.oozie.hosts</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>hadoop.proxyuser.oozie.groups</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>hadoop.proxyuser.flume.hosts</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>hadoop.proxyuser.flume.groups</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>hadoop.proxyuser.HTTP.hosts</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>hadoop.proxyuser.HTTP.groups</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>hadoop.proxyuser.hive.hosts</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>hadoop.proxyuser.hive.groups</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>hadoop.proxyuser.hue.hosts</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>hadoop.proxyuser.hue.groups</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>hadoop.proxyuser.httpfs.hosts</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>hadoop.proxyuser.httpfs.groups</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>hadoop.proxyuser.hdfs.groups</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>hadoop.proxyuser.hdfs.hosts</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>hadoop.proxyuser.yarn.hosts</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>hadoop.proxyuser.yarn.groups</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>hadoop.security.group.mapping</name>
+    <value>org.apache.hadoop.security.ShellBasedUnixGroupsMapping</value>
+  </property>
+  <property>
+    <name>hadoop.security.instrumentation.requires.admin</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>net.topology.script.file.name</name>
+    <value>/etc/hadoop/conf.cloudera.yarn/topology.py</value>
+  </property>
+  <property>
+    <name>io.file.buffer.size</name>
+    <value>65536</value>
+  </property>
+  <property>
+    <name>hadoop.ssl.enabled</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>hadoop.ssl.require.client.cert</name>
+    <value>false</value>
+    <final>true</final>
+  </property>
+  <property>
+    <name>hadoop.ssl.keystores.factory.class</name>
+    <value>org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory</value>
+    <final>true</final>
+  </property>
+  <property>
+    <name>hadoop.ssl.server.conf</name>
+    <value>ssl-server.xml</value>
+    <final>true</final>
+  </property>
+  <property>
+    <name>hadoop.ssl.client.conf</name>
+    <value>ssl-client.xml</value>
+    <final>true</final>
+  </property>
+</configuration>

+ 101 - 0
confs/idcprod/conf/hdfs-site.xml

@@ -0,0 +1,101 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--Autogenerated by Cloudera Manager-->
+<configuration>
+  <property>
+    <name>dfs.nameservices</name>
+    <value>nameservice1</value>
+  </property>
+  <property>
+    <name>dfs.client.failover.proxy.provider.nameservice1</name>
+    <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
+  </property>
+  <property>
+    <name>dfs.ha.automatic-failover.enabled.nameservice1</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>ha.zookeeper.quorum</name>
+    <value>cdhproddn03.yili.com:2181,cdhproddn04.yili.com:2181,cdhproddn05.yili.com:2181,cdhproddn06.yili.com:2181,cdhproddn07.yili.com:2181</value>
+  </property>
+  <property>
+    <name>dfs.ha.namenodes.nameservice1</name>
+    <value>namenode149,namenode208</value>
+  </property>
+  <property>
+    <name>dfs.namenode.rpc-address.nameservice1.namenode149</name>
+    <value>cdhprodnm01.yili.com:8020</value>
+  </property>
+  <property>
+    <name>dfs.namenode.servicerpc-address.nameservice1.namenode149</name>
+    <value>cdhprodnm01.yili.com:8022</value>
+  </property>
+  <property>
+    <name>dfs.namenode.http-address.nameservice1.namenode149</name>
+    <value>cdhprodnm01.yili.com:9870</value>
+  </property>
+  <property>
+    <name>dfs.namenode.https-address.nameservice1.namenode149</name>
+    <value>cdhprodnm01.yili.com:9871</value>
+  </property>
+  <property>
+    <name>dfs.namenode.rpc-address.nameservice1.namenode208</name>
+    <value>cdhprodnm02.yili.com:8020</value>
+  </property>
+  <property>
+    <name>dfs.namenode.servicerpc-address.nameservice1.namenode208</name>
+    <value>cdhprodnm02.yili.com:8022</value>
+  </property>
+  <property>
+    <name>dfs.namenode.http-address.nameservice1.namenode208</name>
+    <value>cdhprodnm02.yili.com:9870</value>
+  </property>
+  <property>
+    <name>dfs.namenode.https-address.nameservice1.namenode208</name>
+    <value>cdhprodnm02.yili.com:9871</value>
+  </property>
+  <property>
+    <name>dfs.replication</name>
+    <value>3</value>
+  </property>
+  <property>
+    <name>dfs.blocksize</name>
+    <value>134217728</value>
+  </property>
+  <property>
+    <name>dfs.client.use.datanode.hostname</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>fs.permissions.umask-mode</name>
+    <value>022</value>
+  </property>
+  <property>
+    <name>dfs.client.block.write.locateFollowingBlock.retries</name>
+    <value>7</value>
+  </property>
+  <property>
+    <name>dfs.namenode.acls.enabled</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>dfs.client.read.shortcircuit</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>dfs.domain.socket.path</name>
+    <value>/var/run/hdfs-sockets/dn</value>
+  </property>
+  <property>
+    <name>dfs.client.read.shortcircuit.skip.checksum</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>dfs.client.domain.socket.data.traffic</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>dfs.datanode.hdfs-blocks-metadata.enabled</name>
+    <value>true</value>
+  </property>
+</configuration>

+ 295 - 0
confs/idcprod/conf/hive-site.xml

@@ -0,0 +1,295 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--Autogenerated by Cloudera Manager-->
+<configuration>
+  <property>
+    <name>hive.metastore.uris</name>
+    <value>thrift://cdhproddn02.yili.com:9083</value>
+  </property>
+  <property>
+    <name>hive.metastore.client.socket.timeout</name>
+    <value>1800</value>
+  </property>
+  <property>
+    <name>hive.metastore.warehouse.dir</name>
+    <value>/user/hive/warehouse</value>
+  </property>
+  <property>
+    <name>hive.warehouse.subdir.inherit.perms</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>hive.auto.convert.join</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>hive.auto.convert.join.noconditionaltask.size</name>
+    <value>1073741824</value>
+  </property>
+  <property>
+    <name>hive.optimize.bucketmapjoin.sortedmerge</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>hive.smbjoin.cache.rows</name>
+    <value>10000</value>
+  </property>
+  <property>
+    <name>hive.server2.logging.operation.enabled</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>hive.server2.logging.operation.log.location</name>
+    <value>/var/log/hive/operation_logs</value>
+  </property>
+  <property>
+    <name>mapred.reduce.tasks</name>
+    <value>-1</value>
+  </property>
+  <property>
+    <name>hive.exec.reducers.bytes.per.reducer</name>
+    <value>67108864</value>
+  </property>
+  <property>
+    <name>hive.exec.copyfile.maxsize</name>
+    <value>33554432</value>
+  </property>
+  <property>
+    <name>hive.exec.reducers.max</name>
+    <value>1099</value>
+  </property>
+  <property>
+    <name>hive.vectorized.groupby.checkinterval</name>
+    <value>4096</value>
+  </property>
+  <property>
+    <name>hive.vectorized.groupby.flush.percent</name>
+    <value>0.1</value>
+  </property>
+  <property>
+    <name>hive.compute.query.using.stats</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>hive.vectorized.execution.enabled</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>hive.vectorized.execution.reduce.enabled</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>hive.vectorized.use.vectorized.input.format</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>hive.vectorized.use.checked.expressions</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>hive.vectorized.use.vector.serde.deserialize</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>hive.vectorized.adaptor.usage.mode</name>
+    <value>chosen</value>
+  </property>
+  <property>
+    <name>hive.vectorized.input.format.excludes</name>
+    <value>org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat</value>
+  </property>
+  <property>
+    <name>hive.merge.mapfiles</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>hive.merge.mapredfiles</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>hive.cbo.enable</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>hive.fetch.task.conversion</name>
+    <value>minimal</value>
+  </property>
+  <property>
+    <name>hive.fetch.task.conversion.threshold</name>
+    <value>268435456</value>
+  </property>
+  <property>
+    <name>hive.limit.pushdown.memory.usage</name>
+    <value>0.1</value>
+  </property>
+  <property>
+    <name>hive.merge.sparkfiles</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>hive.merge.smallfiles.avgsize</name>
+    <value>16777216</value>
+  </property>
+  <property>
+    <name>hive.merge.size.per.task</name>
+    <value>268435456</value>
+  </property>
+  <property>
+    <name>hive.optimize.reducededuplication</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>hive.optimize.reducededuplication.min.reducer</name>
+    <value>4</value>
+  </property>
+  <property>
+    <name>hive.map.aggr</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>hive.map.aggr.hash.percentmemory</name>
+    <value>0.5</value>
+  </property>
+  <property>
+    <name>hive.optimize.sort.dynamic.partition</name>
+    <value>false</value>
+  </property>
+  <!--'hive.execution.engine', originally set to 'mr' (non-final), is overridden below by a safety valve-->
+  <property>
+    <name>spark.executor.memory</name>
+    <value>9488947609b</value>
+  </property>
+  <property>
+    <name>spark.driver.memory</name>
+    <value>11596411699b</value>
+  </property>
+  <property>
+    <name>spark.executor.cores</name>
+    <value>5</value>
+  </property>
+  <property>
+    <name>spark.yarn.driver.memoryOverhead</name>
+    <value>1228m</value>
+  </property>
+  <property>
+    <name>spark.yarn.executor.memoryOverhead</name>
+    <value>1596m</value>
+  </property>
+  <property>
+    <name>spark.dynamicAllocation.enabled</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>spark.dynamicAllocation.initialExecutors</name>
+    <value>1</value>
+  </property>
+  <property>
+    <name>spark.dynamicAllocation.minExecutors</name>
+    <value>1</value>
+  </property>
+  <property>
+    <name>spark.dynamicAllocation.maxExecutors</name>
+    <value>2147483647</value>
+  </property>
+  <property>
+    <name>hive.metastore.execute.setugi</name>
+    <value>true</value>
+  </property>
+  <!--'hive.support.concurrency', originally set to 'true' (non-final), is overridden below by a safety valve-->
+  <property>
+    <name>hive.zookeeper.quorum</name>
+    <value>cdhproddn04.yili.com,cdhproddn03.yili.com,cdhproddn07.yili.com,cdhproddn06.yili.com,cdhproddn05.yili.com</value>
+  </property>
+  <property>
+    <name>hive.zookeeper.client.port</name>
+    <value>2181</value>
+  </property>
+  <property>
+    <name>hive.zookeeper.namespace</name>
+    <value>hive_zookeeper_namespace_hive</value>
+  </property>
+  <property>
+    <name>hbase.zookeeper.quorum</name>
+    <value>cdhproddn04.yili.com,cdhproddn03.yili.com,cdhproddn07.yili.com,cdhproddn06.yili.com,cdhproddn05.yili.com</value>
+  </property>
+  <property>
+    <name>hbase.zookeeper.property.clientPort</name>
+    <value>2181</value>
+  </property>
+  <property>
+    <name>hive.cluster.delegation.token.store.class</name>
+    <value>org.apache.hadoop.hive.thrift.DBTokenStore</value>
+  </property>
+  <property>
+    <name>hive.server2.enable.doAs</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>hive.server2.use.SSL</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>spark.shuffle.service.enabled</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>hive.strict.checks.orderby.no.limit</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>hive.strict.checks.no.partition.filter</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>hive.strict.checks.type.safety</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>hive.strict.checks.cartesian.product</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>hive.strict.checks.bucketing</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>hive.execution.engine</name>
+    <value>spark</value>
+  </property>
+  <property>
+    <name>hive.tez.container.size</name>
+    <value>5120</value>
+  </property>
+  <property>
+    <name>hive.exec.dynamic.partition</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>hive.exec.dynamic.partition.mode</name>
+    <value>nonstrict</value>
+  </property>
+  <property>
+    <name>hive.exec.max.dynamic.partitions.pernode</name>
+    <value>2000</value>
+  </property>
+  <property>
+    <name>hive.tez.cpu.vcores</name>
+    <value>1</value>
+  </property>
+  <property>
+    <name>hive.sentry.conf.url</name>
+    <value>file:///etc/sentry/conf/sentry-site.xml</value>
+  </property>
+  <property>
+    <name>hive.metastore.client.impl</name>
+    <value>org.apache.sentry.binding.metastore.SentryHiveMetaStoreClient</value>
+  </property>
+  <property>
+    <name>hive.support.concurrency</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>tez.am.resource.memory.mb</name>
+    <value>8192</value>
+  </property>
+</configuration>

+ 104 - 0
confs/idcprod/conf/livy-client.conf

@@ -0,0 +1,104 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#
+# Configurations for a Livy Client, any configurations set here will override any
+# livy or spark-default configurations.
+#
+# Before a Livy Client is able to load these configurations the folder containing
+# this file must be added to the application classpath
+#
+
+#
+# Configurations for Livy HTTPClient
+#
+
+# HTTP Request configurations
+# How long before a request times out
+# livy.client.http.connection.timeout = 10s
+# How long between data packets before a request times out
+# livy.client.http.connection.socket.timeout = 5m
+# Whether content is compressed
+# livy.client.http.content.compress.enable = true
+
+# How long before idle connections are closed
+# livy.client.http.connection.idle.timeout = 10m
+
+# Initial interval before polling for Job results
+# livy.client.http.job.initial-poll-interval = 100ms
+# Maximum interval between successive polls
+# livy.client.http.job.max-poll-interval = 5s
+
+#
+# Configurations for Livy RSCClient
+#
+
+# Configurations for registering a client with the rpc server
+# Unique client id for connections to the rpc server
+# livy.rsc.client.auth.id =
+# Secret value for authenticating client connections with server
+# livy.rsc.client.auth.secret =
+
+# Timeout when stopping a rsc client
+# livy.rsc.client.shutdown-timeout = 10s
+
+# Class of the rsc driver to use
+# livy.rsc.driver-class =
+# The kind of rsc session. Examples: pyspark or sparkr
+# livy.rsc.session.kind =
+
+# Comma-separated list of Livy RSC jars. By default Livy will upload jars from its installation
+# directory every time a session is started. By caching these files in HDFS, for example, startup
+# time of sessions on YARN can be reduced.
+#livy.rsc.jars = hdfs:/user/aidevuser/rsc-jars/*
+# Location of the SparkR package for running sparkr
+# livy.rsc.sparkr.package =
+# Location of the PySpark package for running pyspark
+# livy.rsc.pyspark.archives =
+
+# Address for the RSC driver to connect back with it's connection info.
+livy.rsc.launcher.address = please_set_correct_addresss
+# Port Range on which RPC will launch . Port range in inclusive of start and end port .
+livy.rsc.launcher.port.range = 31100~31103
+
+# How long will the RSC wait for a connection for a Livy server before shutting itself down.
+livy.rsc.server.idle-timeout = 30m
+
+# The user that should be impersonated when requesting a Livy session
+#livy.rsc.proxy-user = aidevuser
+
+# Host or IP adress of the rpc server
+livy.rsc.rpc.server.address = 0.0.0.0
+# How long the rsc client will wait when attempting to connect to the Livy server
+livy.rsc.server.connect.timeout = 5m
+
+# The logging level for the rpc channel. Possible values: TRACE, DEBUG, INFO, WARN, or ERROR
+# livy.rsc.channel.log.level =
+
+# SASL configurations for authentication
+# SASL mechanism used for authentication
+# livy.rsc.rpc.sasl.mechanisms = DIGEST-MD5
+# SASL qop used for authentication
+# livy.rsc.rpc.sasl.qop =
+
+# Time between status checks for cancelled a Job
+# livy.rsc.job-cancel.trigger-interval = 100ms
+# Time before a cancelled a Job is forced into a Cancelled state
+# livy.rsc.job-cancel.timeout = 30s
+
+# Number of statements kept in driver's memory
+# livy.rsc.retained-statements = 100

+ 190 - 0
confs/idcprod/conf/livy.conf

@@ -0,0 +1,190 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Use this keystore for the SSL certificate and key.
+# livy.keystore =
+
+# Specify the keystore password.
+# livy.keystore.password =
+#
+# Specify the key password.
+# livy.key-password =
+
+# Hadoop Credential Provider Path to get "livy.keystore.password" and "livy.key-password".
+# Credential Provider can be created using command as follow:
+# hadoop credential create "livy.keystore.password" -value "secret" -provider jceks://hdfs/path/to/livy.jceks
+# livy.hadoop.security.credential.provider.path =
+
+# What host address to start the server on. By default, Livy will bind to all network interfaces.
+# livy.server.host = 0.0.0.0
+
+# What port to start the server on.
+livy.server.port = 8998
+
+# What base path ui should work on. By default UI is mounted on "/".
+# E.g.: livy.ui.basePath = /my_livy - result in mounting UI on /my_livy/
+# livy.ui.basePath = ""
+
+# What spark master Livy sessions should use.
+livy.spark.master = yarn
+
+# What spark deploy mode Livy sessions should use.
+livy.spark.deploy-mode = cluster
+
+# Configure Livy server http request and response header size.
+# livy.server.request-header.size = 131072
+# livy.server.response-header.size = 131072
+
+# Enabled to check whether timeout Livy sessions should be stopped.
+# livy.server.session.timeout-check = true
+#
+# Whether or not to skip timeout check for a busy session
+# livy.server.session.timeout-check.skip-busy = false
+
+# Time in milliseconds on how long Livy will wait before timing out an inactive session.
+# Note that the inactive session could be busy running jobs.
+# livy.server.session.timeout = 1h
+#
+# How long a finished session state should be kept in LivyServer for query.
+# livy.server.session.state-retain.sec = 600s
+
+# If livy should impersonate the requesting users when creating a new session.
+# livy.impersonation.enabled = false
+
+# Logs size livy can cache for each session/batch. 0 means don't cache the logs.
+# livy.cache-log.size = 200
+
+# Comma-separated list of Livy RSC jars. By default Livy will upload jars from its installation
+# directory every time a session is started. By caching these files in HDFS, for example, startup
+# time of sessions on YARN can be reduced.
+livy.rsc.jars = hdfs:/user/AIuser/livy/rsc-jars/*
+
+# Comma-separated list of Livy REPL jars. By default Livy will upload jars from its installation
+# directory every time a session is started. By caching these files in HDFS, for example, startup
+# time of sessions on YARN can be reduced. Please list all the repl dependencies including
+# Scala version-specific livy-repl jars, Livy will automatically pick the right dependencies
+# during session creation.
+#livy.repl.jars = hdfs:/user/aidevuser/repl_2.12-jars/*
+
+# Location of PySpark archives. By default Livy will upload the file from SPARK_HOME, but
+# by caching the file in HDFS, startup time of PySpark sessions on YARN can be reduced.
+# livy.pyspark.archives =
+
+# Location of the SparkR package. By default Livy will upload the file from SPARK_HOME, but
+# by caching the file in HDFS, startup time of R sessions on YARN can be reduced.
+# livy.sparkr.package =
+
+# List of local directories from where files are allowed to be added to user sessions. By
+# default it's empty, meaning users can only reference remote URIs when starting their
+# sessions.
+# livy.file.local-dir-whitelist =
+
+# Whether to enable csrf protection, by default it is false. If it is enabled, client should add
+# http-header "X-Requested-By" in request if the http method is POST/DELETE/PUT/PATCH.
+# livy.server.csrf-protection.enabled =
+
+# Whether to enable HiveContext in livy interpreter, if it is true hive-site.xml will be detected
+# on user request and then livy server classpath automatically.
+livy.repl.enable-hive-context = true
+
+# Recovery mode of Livy. Possible values:
+# off: Default. Turn off recovery. Every time Livy shuts down, it stops and forgets all sessions.
+# recovery: Livy persists session info to the state store. When Livy restarts, it recovers
+#           previous sessions from the state store.
+# Must set livy.server.recovery.state-store and livy.server.recovery.state-store.url to
+# configure the state store.
+# livy.server.recovery.mode = off
+# Zookeeper address used for HA and state store. e.g. host1:port1, host2:port2
+# livy.server.zookeeper.url =
+
+# Where Livy should store state to for recovery. Possible values:
+# <empty>: Default. State store disabled.
+# filesystem: Store state on a file system.
+# zookeeper: Store state in a Zookeeper instance.
+# livy.server.recovery.state-store =
+
+# For filesystem state store, the path of the state store directory. Please don't use a filesystem
+# that doesn't support atomic rename (e.g. S3). e.g. file:///tmp/livy or hdfs:///.
+# For zookeeper, the address to the Zookeeper servers. e.g. host1:port1,host2:port2
+# If livy.server.recovery.state-store is zookeeper, this config is for back-compatibility,
+# so if both this config and livy.server.zookeeper.url exist,
+# livy uses livy.server.zookeeper.url first.
+# livy.server.recovery.state-store.url =
+
+# The policy of curator connecting to zookeeper.
+# For example, m, n means retry m times and the interval of retry is n milliseconds.
+# Please use the new config: livy.server.zk.retry-policy.
+# Keep this config for back-compatibility.
+# If both this config and livy.server.zk.retry-policy exist,
+# livy uses livy.server.zk.retry-policy first.
+# livy.server.recovery.zk-state-store.retry-policy = 5,100
+
+# The policy of curator connecting to zookeeper.
+# For example, m, n means retry m times and the interval of retry is n milliseconds
+# livy.server.zk.retry-policy =
+
+# The dir in zk to store the data about session.
+# livy.server.recovery.zk-state-store.key-prefix = livy
+
+# If Livy can't find the yarn app within this time, consider it lost.
+livy.server.yarn.app-lookup-timeout = 5m
+# When the cluster is busy, we may fail to launch yarn app in app-lookup-timeout, then it would
+# cause session leakage, so we need to check session leakage.
+# How long to check livy session leakage
+# livy.server.yarn.app-leakage.check-timeout = 600s
+# how often to check livy session leakage
+# livy.server.yarn.app-leakage.check-interval = 60s
+
+# How often Livy polls YARN to refresh YARN app state.
+# livy.server.yarn.poll-interval = 5s
+#
+# Days to keep Livy server request logs.
+# livy.server.request-log-retain.days = 5
+
+# If the Livy Web UI should be included in the Livy Server. Enabled by default.
+# livy.ui.enabled = true
+
+# Whether to enable Livy server access control, if it is true then all the income requests will
+# be checked if the requested user has permission.
+# livy.server.access-control.enabled = false
+
+# Allowed users to access Livy, by default any user is allowed to access Livy. If user want to
+# limit who could access Livy, user should list all the permitted users with comma separated.
+# livy.server.access-control.allowed-users = *
+
+# A list of users with comma separated has the permission to change other user's submitted
+# session, like submitting statements, deleting session.
+# livy.server.access-control.modify-users =
+
+# A list of users with comma separated has the permission to view other user's infomation, like
+# submitted session state, statement results.
+# livy.server.access-control.view-users =
+#
+# Authentication support for Livy server
+# Livy has a built-in SPnego authentication support for HTTP requests  with below configurations.
+# livy.server.auth.type = kerberos
+# livy.server.auth.kerberos.principal = <spnego principal>
+# livy.server.auth.kerberos.keytab = <spnego keytab>
+# livy.server.auth.kerberos.name-rules = DEFAULT
+
+# livy.server.launch.kerberos.principal = ailab
+# livy.server.launch.kerberos.keytab = /opt/cluster/conf/user.keytab
+#
+# If user wants to use custom authentication filter, configurations are:
+# livy.server.auth.type = <custom>
+# livy.server.auth.<custom>.class = <class of custom auth filter>
+# livy.server.auth.<custom>.param.<foo1> = <bar1>
+# livy.server.auth.<custom>.param.<foo2> = <bar2>

+ 268 - 0
confs/idcprod/conf/log4j.properties

@@ -0,0 +1,268 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Define some default values that can be overridden by system properties
+hadoop.root.logger=INFO,console
+hadoop.log.dir=.
+hadoop.log.file=hadoop.log
+
+# Define the root logger to the system property "hadoop.root.logger".
+log4j.rootLogger=${hadoop.root.logger}, EventCounter
+
+# Logging Threshold
+log4j.threshold=ALL
+
+# Null Appender
+log4j.appender.NullAppender=org.apache.log4j.varia.NullAppender
+
+#
+# Rolling File Appender - cap space usage at 5gb.
+#
+hadoop.log.maxfilesize=256MB
+hadoop.log.maxbackupindex=20
+log4j.appender.RFA=org.apache.log4j.RollingFileAppender
+log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+log4j.appender.RFA.MaxFileSize=${hadoop.log.maxfilesize}
+log4j.appender.RFA.MaxBackupIndex=${hadoop.log.maxbackupindex}
+
+log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
+
+# Pattern format: Date LogLevel LoggerName LogMessage
+log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+# Debugging Pattern format
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+
+#
+# Daily Rolling File Appender
+#
+
+log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Rollover at midnight
+log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
+
+log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
+
+# Pattern format: Date LogLevel LoggerName LogMessage
+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+# Debugging Pattern format
+#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+
+#
+# console
+# Add "console" to rootlogger above if you want to use this
+#
+
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
+
+#
+# TaskLog Appender
+#
+
+#Default values
+hadoop.tasklog.taskid=null
+hadoop.tasklog.iscleanup=false
+hadoop.tasklog.noKeepSplits=4
+hadoop.tasklog.totalLogFileSize=100
+hadoop.tasklog.purgeLogSplits=true
+hadoop.tasklog.logsRetainHours=12
+
+log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
+log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
+log4j.appender.TLA.isCleanup=${hadoop.tasklog.iscleanup}
+log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
+
+log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
+log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+
+#
+# HDFS block state change log from block manager
+#
+# Uncomment the following to suppress normal block state change
+# messages from BlockManager in NameNode.
+#log4j.logger.BlockStateChange=WARN
+
+#
+#Security appender
+#
+hadoop.security.logger=INFO,NullAppender
+hadoop.security.log.maxfilesize=256MB
+hadoop.security.log.maxbackupindex=20
+log4j.category.SecurityLogger=${hadoop.security.logger}
+hadoop.security.log.file=SecurityAuth-${user.name}.audit
+log4j.appender.RFAS=org.apache.log4j.RollingFileAppender
+log4j.appender.RFAS.File=${hadoop.log.dir}/${hadoop.security.log.file}
+log4j.appender.RFAS.layout=org.apache.log4j.PatternLayout
+log4j.appender.RFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+log4j.appender.RFAS.MaxFileSize=${hadoop.security.log.maxfilesize}
+log4j.appender.RFAS.MaxBackupIndex=${hadoop.security.log.maxbackupindex}
+
+#
+# Daily Rolling Security appender
+#
+log4j.appender.DRFAS=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFAS.File=${hadoop.log.dir}/${hadoop.security.log.file}
+log4j.appender.DRFAS.layout=org.apache.log4j.PatternLayout
+log4j.appender.DRFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+log4j.appender.DRFAS.DatePattern=.yyyy-MM-dd
+
+#
+# hadoop configuration logging
+#
+
+# Uncomment the following line to turn off configuration deprecation warnings.
+# log4j.logger.org.apache.hadoop.conf.Configuration.deprecation=WARN
+
+#
+# hdfs audit logging
+#
+hdfs.audit.logger=INFO,NullAppender
+hdfs.audit.log.maxfilesize=256MB
+hdfs.audit.log.maxbackupindex=20
+log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=${hdfs.audit.logger}
+log4j.additivity.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=false
+log4j.appender.RFAAUDIT=org.apache.log4j.RollingFileAppender
+log4j.appender.RFAAUDIT.File=${hadoop.log.dir}/hdfs-audit.log
+log4j.appender.RFAAUDIT.layout=org.apache.log4j.PatternLayout
+log4j.appender.RFAAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
+log4j.appender.RFAAUDIT.MaxFileSize=${hdfs.audit.log.maxfilesize}
+log4j.appender.RFAAUDIT.MaxBackupIndex=${hdfs.audit.log.maxbackupindex}
+
+#
+# mapred audit logging
+#
+mapred.audit.logger=INFO,NullAppender
+mapred.audit.log.maxfilesize=256MB
+mapred.audit.log.maxbackupindex=20
+log4j.logger.org.apache.hadoop.mapred.AuditLogger=${mapred.audit.logger}
+log4j.additivity.org.apache.hadoop.mapred.AuditLogger=false
+log4j.appender.MRAUDIT=org.apache.log4j.RollingFileAppender
+log4j.appender.MRAUDIT.File=${hadoop.log.dir}/mapred-audit.log
+log4j.appender.MRAUDIT.layout=org.apache.log4j.PatternLayout
+log4j.appender.MRAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
+log4j.appender.MRAUDIT.MaxFileSize=${mapred.audit.log.maxfilesize}
+log4j.appender.MRAUDIT.MaxBackupIndex=${mapred.audit.log.maxbackupindex}
+
+# Custom Logging levels
+
+#log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
+#log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
+#log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=DEBUG
+
+# Jets3t library
+log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
+
+# AWS SDK & S3A FileSystem
+log4j.logger.com.amazonaws=ERROR
+log4j.logger.com.amazonaws.http.AmazonHttpClient=ERROR
+log4j.logger.org.apache.hadoop.fs.s3a.S3AFileSystem=WARN
+
+#
+# Event Counter Appender
+# Sends counts of logging messages at different severity levels to Hadoop Metrics.
+#
+log4j.appender.EventCounter=org.apache.hadoop.log.metrics.EventCounter
+
+#
+# Job Summary Appender
+#
+# Use following logger to send summary to separate file defined by
+# hadoop.mapreduce.jobsummary.log.file :
+# hadoop.mapreduce.jobsummary.logger=INFO,JSA
+#
+hadoop.mapreduce.jobsummary.logger=${hadoop.root.logger}
+hadoop.mapreduce.jobsummary.log.file=hadoop-mapreduce.jobsummary.log
+hadoop.mapreduce.jobsummary.log.maxfilesize=256MB
+hadoop.mapreduce.jobsummary.log.maxbackupindex=20
+log4j.appender.JSA=org.apache.log4j.RollingFileAppender
+log4j.appender.JSA.File=${hadoop.log.dir}/${hadoop.mapreduce.jobsummary.log.file}
+log4j.appender.JSA.MaxFileSize=${hadoop.mapreduce.jobsummary.log.maxfilesize}
+log4j.appender.JSA.MaxBackupIndex=${hadoop.mapreduce.jobsummary.log.maxbackupindex}
+log4j.appender.JSA.layout=org.apache.log4j.PatternLayout
+log4j.appender.JSA.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
+log4j.logger.org.apache.hadoop.mapred.JobInProgress$JobSummary=${hadoop.mapreduce.jobsummary.logger}
+log4j.additivity.org.apache.hadoop.mapred.JobInProgress$JobSummary=false
+
+#
+# Yarn ResourceManager Application Summary Log
+#
+# Set the ResourceManager summary log filename
+yarn.server.resourcemanager.appsummary.log.file=rm-appsummary.log
+# Set the ResourceManager summary log level and appender
+yarn.server.resourcemanager.appsummary.logger=${hadoop.root.logger}
+#yarn.server.resourcemanager.appsummary.logger=INFO,RMSUMMARY
+
+# To enable AppSummaryLogging for the RM,
+# set yarn.server.resourcemanager.appsummary.logger to
+# <LEVEL>,RMSUMMARY in hadoop-env.sh
+
+# Appender for ResourceManager Application Summary Log
+# Requires the following properties to be set
+#    - hadoop.log.dir (Hadoop Log directory)
+#    - yarn.server.resourcemanager.appsummary.log.file (resource manager app summary log filename)
+#    - yarn.server.resourcemanager.appsummary.logger (resource manager app summary log level and appender)
+
+log4j.logger.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=${yarn.server.resourcemanager.appsummary.logger}
+log4j.additivity.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=false
+log4j.appender.RMSUMMARY=org.apache.log4j.RollingFileAppender
+log4j.appender.RMSUMMARY.File=${hadoop.log.dir}/${yarn.server.resourcemanager.appsummary.log.file}
+log4j.appender.RMSUMMARY.MaxFileSize=256MB
+log4j.appender.RMSUMMARY.MaxBackupIndex=20
+log4j.appender.RMSUMMARY.layout=org.apache.log4j.PatternLayout
+log4j.appender.RMSUMMARY.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
+
+# HS audit log configs
+#mapreduce.hs.audit.logger=INFO,HSAUDIT
+#log4j.logger.org.apache.hadoop.mapreduce.v2.hs.HSAuditLogger=${mapreduce.hs.audit.logger}
+#log4j.additivity.org.apache.hadoop.mapreduce.v2.hs.HSAuditLogger=false
+#log4j.appender.HSAUDIT=org.apache.log4j.DailyRollingFileAppender
+#log4j.appender.HSAUDIT.File=${hadoop.log.dir}/hs-audit.log
+#log4j.appender.HSAUDIT.layout=org.apache.log4j.PatternLayout
+#log4j.appender.HSAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
+#log4j.appender.HSAUDIT.DatePattern=.yyyy-MM-dd
+
+# Http Server Request Logs
+#log4j.logger.http.requests.namenode=INFO,namenoderequestlog
+#log4j.appender.namenoderequestlog=org.apache.hadoop.http.HttpRequestLogAppender
+#log4j.appender.namenoderequestlog.Filename=${hadoop.log.dir}/jetty-namenode-yyyy_mm_dd.log
+#log4j.appender.namenoderequestlog.RetainDays=3
+
+#log4j.logger.http.requests.datanode=INFO,datanoderequestlog
+#log4j.appender.datanoderequestlog=org.apache.hadoop.http.HttpRequestLogAppender
+#log4j.appender.datanoderequestlog.Filename=${hadoop.log.dir}/jetty-datanode-yyyy_mm_dd.log
+#log4j.appender.datanoderequestlog.RetainDays=3
+
+#log4j.logger.http.requests.resourcemanager=INFO,resourcemanagerrequestlog
+#log4j.appender.resourcemanagerrequestlog=org.apache.hadoop.http.HttpRequestLogAppender
+#log4j.appender.resourcemanagerrequestlog.Filename=${hadoop.log.dir}/jetty-resourcemanager-yyyy_mm_dd.log
+#log4j.appender.resourcemanagerrequestlog.RetainDays=3
+
+#log4j.logger.http.requests.jobhistory=INFO,jobhistoryrequestlog
+#log4j.appender.jobhistoryrequestlog=org.apache.hadoop.http.HttpRequestLogAppender
+#log4j.appender.jobhistoryrequestlog.Filename=${hadoop.log.dir}/jetty-jobhistory-yyyy_mm_dd.log
+#log4j.appender.jobhistoryrequestlog.RetainDays=3
+
+#log4j.logger.http.requests.nodemanager=INFO,nodemanagerrequestlog
+#log4j.appender.nodemanagerrequestlog=org.apache.hadoop.http.HttpRequestLogAppender
+#log4j.appender.nodemanagerrequestlog.Filename=${hadoop.log.dir}/jetty-nodemanager-yyyy_mm_dd.log
+#log4j.appender.nodemanagerrequestlog.RetainDays=3

+ 189 - 0
confs/idcprod/conf/mapred-site.xml

@@ -0,0 +1,189 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--Autogenerated by Cloudera Manager-->
+<configuration>
+  <property>
+    <name>mapreduce.job.split.metainfo.maxsize</name>
+    <value>10000000</value>
+  </property>
+  <property>
+    <name>mapreduce.job.counters.max</name>
+    <value>120</value>
+  </property>
+  <property>
+    <name>mapreduce.job.counters.groups.max</name>
+    <value>50</value>
+  </property>
+  <property>
+    <name>mapreduce.output.fileoutputformat.compress</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>mapreduce.output.fileoutputformat.compress.type</name>
+    <value>BLOCK</value>
+  </property>
+  <property>
+    <name>mapreduce.output.fileoutputformat.compress.codec</name>
+    <value>org.apache.hadoop.io.compress.DefaultCodec</value>
+  </property>
+  <property>
+    <name>mapreduce.map.output.compress.codec</name>
+    <value>org.apache.hadoop.io.compress.SnappyCodec</value>
+  </property>
+  <property>
+    <name>mapreduce.map.output.compress</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>zlib.compress.level</name>
+    <value>DEFAULT_COMPRESSION</value>
+  </property>
+  <property>
+    <name>mapreduce.task.io.sort.factor</name>
+    <value>100</value>
+  </property>
+  <property>
+    <name>mapreduce.map.sort.spill.percent</name>
+    <value>0.8</value>
+  </property>
+  <property>
+    <name>mapreduce.reduce.shuffle.parallelcopies</name>
+    <value>30</value>
+  </property>
+  <property>
+    <name>mapreduce.task.timeout</name>
+    <value>600000</value>
+  </property>
+  <property>
+    <name>mapreduce.client.submit.file.replication</name>
+    <value>3</value>
+  </property>
+  <property>
+    <name>mapreduce.job.reduces</name>
+    <value>400</value>
+  </property>
+  <property>
+    <name>mapreduce.task.io.sort.mb</name>
+    <value>1024</value>
+  </property>
+  <property>
+    <name>mapreduce.map.speculative</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>mapreduce.reduce.speculative</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>mapreduce.job.reduce.slowstart.completedmaps</name>
+    <value>0.8</value>
+  </property>
+  <property>
+    <name>mapreduce.jobhistory.address</name>
+    <value>cdhproddn02.yili.com:10020</value>
+  </property>
+  <property>
+    <name>mapreduce.jobhistory.webapp.address</name>
+    <value>cdhproddn02.yili.com:19888</value>
+  </property>
+  <property>
+    <name>mapreduce.jobhistory.webapp.https.address</name>
+    <value>cdhproddn02.yili.com:19890</value>
+  </property>
+  <property>
+    <name>mapreduce.jobhistory.admin.address</name>
+    <value>cdhproddn02.yili.com:10033</value>
+  </property>
+  <property>
+    <name>mapreduce.framework.name</name>
+    <value>yarn</value>
+  </property>
+  <property>
+    <name>yarn.app.mapreduce.am.staging-dir</name>
+    <value>/user</value>
+  </property>
+  <property>
+    <name>mapreduce.am.max-attempts</name>
+    <value>2</value>
+  </property>
+  <property>
+    <name>yarn.app.mapreduce.am.resource.mb</name>
+    <value>1024</value>
+  </property>
+  <property>
+    <name>yarn.app.mapreduce.am.resource.cpu-vcores</name>
+    <value>1</value>
+  </property>
+  <property>
+    <name>mapreduce.job.ubertask.enable</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>yarn.app.mapreduce.am.command-opts</name>
+    <value>-Djava.net.preferIPv4Stack=true -Xmx825955249</value>
+  </property>
+  <property>
+    <name>mapreduce.map.java.opts</name>
+    <value>-Djava.net.preferIPv4Stack=true -Xmx4096m</value>
+  </property>
+  <property>
+    <name>mapreduce.reduce.java.opts</name>
+    <value>-Djava.net.preferIPv4Stack=true  -Xmx8192m</value>
+  </property>
+  <property>
+    <name>yarn.app.mapreduce.am.admin.user.env</name>
+    <value>LD_LIBRARY_PATH=$HADOOP_COMMON_HOME/lib/native:$JAVA_LIBRARY_PATH</value>
+  </property>
+  <property>
+    <name>mapreduce.map.memory.mb</name>
+    <value>3072</value>
+  </property>
+  <property>
+    <name>mapreduce.map.cpu.vcores</name>
+    <value>1</value>
+  </property>
+  <property>
+    <name>mapreduce.reduce.memory.mb</name>
+    <value>6144</value>
+  </property>
+  <property>
+    <name>mapreduce.reduce.cpu.vcores</name>
+    <value>1</value>
+  </property>
+  <property>
+    <name>mapreduce.job.heap.memory-mb.ratio</name>
+    <value>0.8</value>
+  </property>
+  <property>
+    <name>mapreduce.application.classpath</name>
+    <value>$HADOOP_CLIENT_CONF_DIR,$PWD/mr-framework/*,$MR2_CLASSPATH</value>
+  </property>
+  <property>
+    <name>mapreduce.application.framework.path</name>
+    <value>hdfs://nameservice1//user/yarn/mapreduce/mr-framework/3.0.0-cdh6.3.2-mr-framework.tar.gz#mr-framework</value>
+  </property>
+  <property>
+    <name>mapreduce.jobhistory.jhist.format</name>
+    <value>binary</value>
+  </property>
+  <property>
+    <name>mapreduce.admin.user.env</name>
+    <value>LD_LIBRARY_PATH=$HADOOP_COMMON_HOME/lib/native:$JAVA_LIBRARY_PATH</value>
+  </property>
+  <property>
+    <name>mapreduce.job.redacted-properties</name>
+    <value>fs.s3a.access.key,fs.s3a.secret.key,fs.adl.oauth2.credential,dfs.adls.oauth2.credential,fs.azure.account.oauth2.client.secret</value>
+  </property>
+  <property>
+    <name>mapreduce.job.acl-view-job</name>
+    <value> </value>
+  </property>
+  <property>
+    <name>mapreduce.job.acl-modify-job</name>
+    <value> </value>
+  </property>
+  <property>
+    <name>mapreduce.cluster.acls.enabled</name>
+    <value>false</value>
+  </property>
+</configuration>

+ 12 - 0
confs/idcprod/conf/spark-defaults.conf

@@ -0,0 +1,12 @@
+spark.master yarn
+spark.submit.deployMode cluster
+spark.sql.hive.metastore.version 2.1
+spark.sql.hive.convertMetastoreOrc false
+spark.sql.hive.convertMetastoreParquet false
+spark.yarn.archive hdfs:/user/AIuser/sparkcdh_jars.zip
+spark.yarn.dist.archives hdfs:/user/AIuser/py37.zip#python3env
+spark.pyspark.python ./python3env/py37/bin/python
+spark.yarn.queue root.aiuser
+# spark.yarn.queue root.default
+
+

+ 2 - 0
confs/idcprod/conf/spark-env.sh

@@ -0,0 +1,2 @@
+export SPARK_DIST_CLASSPATH=$SPARK_DIST_CLASSPATH:/opt/cloudera/parcels/CDH/lib/hadoop-mapreduce/*
+export HADOOP_USER_NAME=aiuser

+ 13 - 0
confs/idcprod/conf/ssl-client.xml

@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--Autogenerated by Cloudera Manager-->
+<configuration>
+  <property>
+    <name>ssl.client.truststore.type</name>
+    <value>jks</value>
+  </property>
+  <property>
+    <name>ssl.client.truststore.reload.interval</name>
+    <value>10000</value>
+  </property>
+</configuration>

+ 5 - 0
confs/idcprod/conf/submit.sh

@@ -0,0 +1,5 @@
+"$SPARK_HOME"/bin/spark-submit --class org.apache.spark.examples.SparkPi \
+--master yarn \
+--deploy-mode cluster \
+"$SPARK_HOME"/examples/jars/spark-examples_2.11-2.4.0-cdh6.3.2.jar \
+10

+ 125 - 0
confs/idcprod/conf/yarn-site.xml

@@ -0,0 +1,125 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--Autogenerated by Cloudera Manager-->
+<configuration>
+  <property>
+    <name>yarn.acl.enable</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>yarn.admin.acl</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.address</name>
+    <value>cdhproddn01.yili.com:8032</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.admin.address</name>
+    <value>cdhproddn01.yili.com:8033</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.scheduler.address</name>
+    <value>cdhproddn01.yili.com:8030</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.resource-tracker.address</name>
+    <value>cdhproddn01.yili.com:8031</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.webapp.address</name>
+    <value>cdhproddn01.yili.com:8088</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.webapp.https.address</name>
+    <value>cdhproddn01.yili.com:8090</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.client.thread-count</name>
+    <value>50</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.scheduler.client.thread-count</name>
+    <value>50</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.admin.client.thread-count</name>
+    <value>1</value>
+  </property>
+  <property>
+    <name>yarn.scheduler.minimum-allocation-mb</name>
+    <value>5120</value>
+  </property>
+  <property>
+    <name>yarn.scheduler.increment-allocation-mb</name>
+    <value>512</value>
+  </property>
+  <property>
+    <name>yarn.scheduler.maximum-allocation-mb</name>
+    <value>49152</value>
+  </property>
+  <property>
+    <name>yarn.scheduler.minimum-allocation-vcores</name>
+    <value>1</value>
+  </property>
+  <property>
+    <name>yarn.scheduler.increment-allocation-vcores</name>
+    <value>1</value>
+  </property>
+  <property>
+    <name>yarn.scheduler.maximum-allocation-vcores</name>
+    <value>51</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.amliveliness-monitor.interval-ms</name>
+    <value>1000</value>
+  </property>
+  <property>
+    <name>yarn.am.liveness-monitor.expiry-interval-ms</name>
+    <value>600000</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.am.max-attempts</name>
+    <value>2</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.container.liveness-monitor.interval-ms</name>
+    <value>600000</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.nm.liveness-monitor.interval-ms</name>
+    <value>1000</value>
+  </property>
+  <property>
+    <name>yarn.nm.liveness-monitor.expiry-interval-ms</name>
+    <value>600000</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.resource-tracker.client.thread-count</name>
+    <value>50</value>
+  </property>
+  <property>
+    <name>yarn.application.classpath</name>
+    <value>$HADOOP_CLIENT_CONF_DIR,$HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*,$HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*,$HADOOP_YARN_HOME/*,$HADOOP_YARN_HOME/lib/*</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.scheduler.class</name>
+    <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
+  </property>
+  <property>
+    <name>yarn.scheduler.capacity.resource-calculator</name>
+    <value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.max-completed-applications</name>
+    <value>10000</value>
+  </property>
+  <property>
+    <name>yarn.nodemanager.remote-app-log-dir</name>
+    <value>/tmp/logs</value>
+  </property>
+  <property>
+    <name>yarn.nodemanager.remote-app-log-dir-suffix</name>
+    <value>logs</value>
+  </property>
+</configuration>

+ 133 - 0
confs/idctest/conf/core-site.xml

@@ -0,0 +1,133 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--Autogenerated by Cloudera Manager-->
+<configuration>
+  <property>
+    <name>fs.defaultFS</name>
+    <value>hdfs://nameservice1</value>
+  </property>
+  <property>
+    <name>io.compression.codecs</name>
+    <value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.DeflateCodec,org.apache.hadoop.io.compress.SnappyCodec,org.apache.hadoop.io.compress.Lz4Codec</value>
+  </property>
+  <property>
+    <name>hadoop.security.authentication</name>
+    <value>simple</value>
+  </property>
+  <property>
+    <name>hadoop.security.authorization</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>hadoop.rpc.protection</name>
+    <value>authentication</value>
+  </property>
+  <property>
+    <name>hadoop.security.auth_to_local</name>
+    <value>DEFAULT</value>
+  </property>
+  <property>
+    <name>hadoop.proxyuser.oozie.hosts</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>hadoop.proxyuser.oozie.groups</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>hadoop.proxyuser.flume.hosts</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>hadoop.proxyuser.flume.groups</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>hadoop.proxyuser.HTTP.hosts</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>hadoop.proxyuser.HTTP.groups</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>hadoop.proxyuser.hive.hosts</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>hadoop.proxyuser.hive.groups</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>hadoop.proxyuser.hue.hosts</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>hadoop.proxyuser.hue.groups</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>hadoop.proxyuser.httpfs.hosts</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>hadoop.proxyuser.httpfs.groups</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>hadoop.proxyuser.hdfs.groups</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>hadoop.proxyuser.hdfs.hosts</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>hadoop.proxyuser.yarn.hosts</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>hadoop.proxyuser.yarn.groups</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>hadoop.security.group.mapping</name>
+    <value>org.apache.hadoop.security.ShellBasedUnixGroupsMapping</value>
+  </property>
+  <property>
+    <name>hadoop.security.instrumentation.requires.admin</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>net.topology.script.file.name</name>
+    <value>/etc/hadoop/conf.cloudera.yarn/topology.py</value>
+  </property>
+  <property>
+    <name>io.file.buffer.size</name>
+    <value>65536</value>
+  </property>
+  <property>
+    <name>hadoop.ssl.enabled</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>hadoop.ssl.require.client.cert</name>
+    <value>false</value>
+    <final>true</final>
+  </property>
+  <property>
+    <name>hadoop.ssl.keystores.factory.class</name>
+    <value>org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory</value>
+    <final>true</final>
+  </property>
+  <property>
+    <name>hadoop.ssl.server.conf</name>
+    <value>ssl-server.xml</value>
+    <final>true</final>
+  </property>
+  <property>
+    <name>hadoop.ssl.client.conf</name>
+    <value>ssl-client.xml</value>
+    <final>true</final>
+  </property>
+</configuration>

+ 111 - 0
confs/idctest/conf/hdfs-site.xml

@@ -0,0 +1,111 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--Autogenerated by Cloudera Manager-->
+<configuration>
+  <property>
+    <name>dfs.nameservices</name>
+    <value>nameservice1</value>
+  </property>
+  <property>
+    <name>dfs.client.failover.proxy.provider.nameservice1</name>
+    <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
+  </property>
+  <property>
+    <name>dfs.ha.automatic-failover.enabled.nameservice1</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>ha.zookeeper.quorum</name>
+    <value>10.116.1.76,10.116.1.77,10.116.1.78</value>
+    <!-- <value>cdhtestdn06.yili.com:2181,cdhtestdn07.yili.com:2181,cdhtestdn08.yili.com:2181</value> -->
+  </property>
+  <property>
+    <name>dfs.ha.namenodes.nameservice1</name>
+    <value>namenode96,namenode163</value>
+  </property>
+  <property>
+    <name>dfs.namenode.rpc-address.nameservice1.namenode96</name>
+    <value>10.119.14.67:8020</value>
+    <!-- <value>cdhtestnm01.yili.com:8020</value> -->
+  </property>
+  <property>
+    <name>dfs.namenode.servicerpc-address.nameservice1.namenode96</name>
+    <value>10.119.14.67:8022</value>
+    <!-- <value>cdhtestnm01.yili.com:8022</value> -->
+  </property>
+  <property>
+    <name>dfs.namenode.http-address.nameservice1.namenode96</name>
+    <value>10.119.14.67:9870</value>
+    <!-- <value>cdhtestnm01.yili.com:9870</value> -->
+  </property>
+  <property>
+    <name>dfs.namenode.https-address.nameservice1.namenode96</name>
+    <value>10.119.14.67:9871</value>
+    <!-- <value>cdhtestnm01.yili.com:9871</value> -->
+  </property>
+  <property>
+    <name>dfs.namenode.rpc-address.nameservice1.namenode163</name>
+    <value>10.119.14.68:8020</value>
+    <!-- <value>cdhtestnm02.yili.com:8020</value> -->
+  </property>
+  <property>
+    <name>dfs.namenode.servicerpc-address.nameservice1.namenode163</name>
+    <value>10.119.14.68:9871</value>
+    <!-- <value>cdhtestnm02.yili.com:8022</value> -->
+  </property>
+  <property>
+    <name>dfs.namenode.http-address.nameservice1.namenode163</name>
+    <value>10.119.14.68:9870</value>
+    <!-- <value>cdhtestnm02.yili.com:9870</value> -->
+  </property>
+  <property>
+    <name>dfs.namenode.https-address.nameservice1.namenode163</name>
+    <value>10.119.14.68:9871</value>
+    <!-- <value>cdhtestnm02.yili.com:9871</value> -->
+  </property>
+  <property>
+    <name>dfs.replication</name>
+    <value>2</value>
+  </property>
+  <property>
+    <name>dfs.blocksize</name>
+    <value>134217728</value>
+  </property>
+  <property>
+    <name>dfs.client.use.datanode.hostname</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>fs.permissions.umask-mode</name>
+    <value>022</value>
+  </property>
+  <!--'dfs.client.block.write.locateFollowingBlock.retries', originally set to '7' (non-final), is overridden below by a safety valve-->
+  <property>
+    <name>dfs.namenode.acls.enabled</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>dfs.client.read.shortcircuit</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>dfs.domain.socket.path</name>
+    <value>/var/run/hdfs-sockets/dn</value>
+  </property>
+  <property>
+    <name>dfs.client.read.shortcircuit.skip.checksum</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>dfs.client.domain.socket.data.traffic</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>dfs.datanode.hdfs-blocks-metadata.enabled</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>dfs.client.block.write.locateFollowingBlock.retries</name>
+    <value>15</value>
+  </property>
+</configuration>

+ 285 - 0
confs/idctest/conf/hive-site.xml

@@ -0,0 +1,285 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--Autogenerated by Cloudera Manager-->
+<configuration>
+  <property>
+    <name>hive.metastore.uris</name>
+    <value>thrift://10.116.1.72:9083</value>
+    <!-- <value>thrift://cdhtestdn02.yili.com:9083</value> -->
+  </property>
+  <property>
+    <name>hive.metastore.client.socket.timeout</name>
+    <value>3600</value>
+  </property>
+  <property>
+    <name>hive.metastore.warehouse.dir</name>
+    <value>/user/hive/warehouse</value>
+  </property>
+  <property>
+    <name>hive.warehouse.subdir.inherit.perms</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>hive.auto.convert.join</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>hive.auto.convert.join.noconditionaltask.size</name>
+    <value>1073741824</value>
+  </property>
+  <property>
+    <name>hive.optimize.bucketmapjoin.sortedmerge</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>hive.smbjoin.cache.rows</name>
+    <value>10000</value>
+  </property>
+  <property>
+    <name>hive.server2.logging.operation.enabled</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>hive.server2.logging.operation.log.location</name>
+    <value>/var/log/hive/operation_logs</value>
+  </property>
+  <property>
+    <name>mapred.reduce.tasks</name>
+    <value>-1</value>
+  </property>
+  <property>
+    <name>hive.exec.reducers.bytes.per.reducer</name>
+    <value>67108864</value>
+  </property>
+  <property>
+    <name>hive.exec.copyfile.maxsize</name>
+    <value>33554432</value>
+  </property>
+  <property>
+    <name>hive.exec.reducers.max</name>
+    <value>1099</value>
+  </property>
+  <property>
+    <name>hive.vectorized.groupby.checkinterval</name>
+    <value>4096</value>
+  </property>
+  <property>
+    <name>hive.vectorized.groupby.flush.percent</name>
+    <value>0.1</value>
+  </property>
+  <property>
+    <name>hive.compute.query.using.stats</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>hive.vectorized.execution.enabled</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>hive.vectorized.execution.reduce.enabled</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>hive.vectorized.use.vectorized.input.format</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>hive.vectorized.use.checked.expressions</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>hive.vectorized.use.vector.serde.deserialize</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>hive.vectorized.adaptor.usage.mode</name>
+    <value>chosen</value>
+  </property>
+  <property>
+    <name>hive.vectorized.input.format.excludes</name>
+    <value>org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat</value>
+  </property>
+  <property>
+    <name>hive.merge.mapfiles</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>hive.merge.mapredfiles</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>hive.cbo.enable</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>hive.fetch.task.conversion</name>
+    <value>minimal</value>
+  </property>
+  <property>
+    <name>hive.fetch.task.conversion.threshold</name>
+    <value>268435456</value>
+  </property>
+  <property>
+    <name>hive.limit.pushdown.memory.usage</name>
+    <value>0.1</value>
+  </property>
+  <property>
+    <name>hive.merge.sparkfiles</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>hive.merge.smallfiles.avgsize</name>
+    <value>16777216</value>
+  </property>
+  <property>
+    <name>hive.merge.size.per.task</name>
+    <value>268435456</value>
+  </property>
+  <property>
+    <name>hive.optimize.reducededuplication</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>hive.optimize.reducededuplication.min.reducer</name>
+    <value>4</value>
+  </property>
+  <property>
+    <name>hive.map.aggr</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>hive.map.aggr.hash.percentmemory</name>
+    <value>0.5</value>
+  </property>
+  <property>
+    <name>hive.optimize.sort.dynamic.partition</name>
+    <value>false</value>
+  </property>
+  <!--'hive.execution.engine', originally set to 'mr' (non-final), is overridden below by a safety valve-->
+  <property>
+    <name>spark.executor.memory</name>
+    <value>9488947609b</value>
+  </property>
+  <property>
+    <name>spark.driver.memory</name>
+    <value>11596411699b</value>
+  </property>
+  <property>
+    <name>spark.executor.cores</name>
+    <value>4</value>
+  </property>
+  <property>
+    <name>spark.yarn.driver.memoryOverhead</name>
+    <value>1228m</value>
+  </property>
+  <property>
+    <name>spark.yarn.executor.memoryOverhead</name>
+    <value>1596m</value>
+  </property>
+  <property>
+    <name>spark.dynamicAllocation.enabled</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>spark.dynamicAllocation.initialExecutors</name>
+    <value>1</value>
+  </property>
+  <property>
+    <name>spark.dynamicAllocation.minExecutors</name>
+    <value>1</value>
+  </property>
+  <property>
+    <name>spark.dynamicAllocation.maxExecutors</name>
+    <value>2147483647</value>
+  </property>
+  <property>
+    <name>hive.metastore.execute.setugi</name>
+    <value>true</value>
+  </property>
+  <!--'hive.support.concurrency', originally set to 'true' (non-final), is overridden below by a safety valve-->
+  <property>
+    <name>hive.zookeeper.quorum</name>
+    <!-- <value>10.116.1.76,10.116.1.77,10.116.1.78</value> -->
+    <value>cdhtestdn06.yili.com,cdhtestdn07.yili.com,cdhtestdn08.yili.com</value>
+  </property>
+  <property>
+    <name>hive.zookeeper.client.port</name>
+    <value>2181</value>
+  </property>
+  <property>
+    <name>hive.zookeeper.namespace</name>
+    <value>hive_zookeeper_namespace_hive</value>
+  </property>
+  <property>
+    <name>hive.cluster.delegation.token.store.class</name>
+    <value>org.apache.hadoop.hive.thrift.DBTokenStore</value>
+  </property>
+  <property>
+    <name>hive.server2.enable.doAs</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>hive.server2.use.SSL</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>spark.shuffle.service.enabled</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>hive.strict.checks.orderby.no.limit</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>hive.strict.checks.no.partition.filter</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>hive.strict.checks.type.safety</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>hive.strict.checks.cartesian.product</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>hive.strict.checks.bucketing</name>
+    <value>true</value>
+  </property>
+  <!-- <property>
+    <name>hive.execution.engine</name>
+    <value>tez</value>
+  </property> -->
+  <property>
+    <name>hive.tez.container.size</name>
+    <value>5120</value>
+  </property>
+  <property>
+    <name>hive.exec.dynamic.partition</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>hive.exec.dynamic.partition.mode</name>
+    <value>nonstrict</value>
+  </property>
+  <property>
+    <name>hive.exec.max.dynamic.partitions.pernode</name>
+    <value>2000</value>
+  </property>
+  <property>
+    <name>hive.tez.cpu.vcores</name>
+    <value>1</value>
+  </property>
+  <property>
+    <name>hive.sentry.conf.url</name>
+    <value>file:///etc/sentry/conf/sentry-site.xml</value>
+  </property>
+  <property>
+    <name>hive.support.concurrency</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>tez.am.resource.memory.mb</name>
+    <value>8192</value>
+  </property>
+</configuration>

+ 104 - 0
confs/idctest/conf/livy-client.conf

@@ -0,0 +1,104 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#
+# Configurations for a Livy Client, any configurations set here will override any
+# livy or spark-default configurations.
+#
+# Before a Livy Client is able to load these configurations the folder containing
+# this file must be added to the application classpath
+#
+
+#
+# Configurations for Livy HTTPClient
+#
+
+# HTTP Request configurations
+# How long before a request times out
+# livy.client.http.connection.timeout = 10s
+# How long between data packets before a request times out
+# livy.client.http.connection.socket.timeout = 5m
+# Whether content is compressed
+# livy.client.http.content.compress.enable = true
+
+# How long before idle connections are closed
+# livy.client.http.connection.idle.timeout = 10m
+
+# Initial interval before polling for Job results
+# livy.client.http.job.initial-poll-interval = 100ms
+# Maximum interval between successive polls
+# livy.client.http.job.max-poll-interval = 5s
+
+#
+# Configurations for Livy RSCClient
+#
+
+# Configurations for registering a client with the rpc server
+# Unique client id for connections to the rpc server
+# livy.rsc.client.auth.id =
+# Secret value for authenticating client connections with server
+# livy.rsc.client.auth.secret =
+
+# Timeout when stopping a rsc client
+# livy.rsc.client.shutdown-timeout = 10s
+
+# Class of the rsc driver to use
+# livy.rsc.driver-class =
+# The kind of rsc session. Examples: pyspark or sparkr
+# livy.rsc.session.kind =
+
+# Comma-separated list of Livy RSC jars. By default Livy will upload jars from its installation
+# directory every time a session is started. By caching these files in HDFS, for example, startup
+# time of sessions on YARN can be reduced.
+#livy.rsc.jars = hdfs:/user/aidevuser/rsc-jars/*
+# Location of the SparkR package for running sparkr
+# livy.rsc.sparkr.package =
+# Location of the PySpark package for running pyspark
+# livy.rsc.pyspark.archives =
+
+# Address for the RSC driver to connect back with it's connection info.
+livy.rsc.launcher.address = please_set_correct_addresss
+# Port Range on which RPC will launch . Port range in inclusive of start and end port .
+livy.rsc.launcher.port.range = 31100~31110
+
+# How long will the RSC wait for a connection for a Livy server before shutting itself down.
+livy.rsc.server.idle-timeout = 30m
+
+# The user that should be impersonated when requesting a Livy session
+#livy.rsc.proxy-user = aidevuser
+
+# Host or IP adress of the rpc server
+livy.rsc.rpc.server.address = 0.0.0.0
+# How long the rsc client will wait when attempting to connect to the Livy server
+livy.rsc.server.connect.timeout = 5m
+
+# The logging level for the rpc channel. Possible values: TRACE, DEBUG, INFO, WARN, or ERROR
+# livy.rsc.channel.log.level =
+
+# SASL configurations for authentication
+# SASL mechanism used for authentication
+# livy.rsc.rpc.sasl.mechanisms = DIGEST-MD5
+# SASL qop used for authentication
+# livy.rsc.rpc.sasl.qop =
+
+# Time between status checks for cancelled a Job
+# livy.rsc.job-cancel.trigger-interval = 100ms
+# Time before a cancelled a Job is forced into a Cancelled state
+# livy.rsc.job-cancel.timeout = 30s
+
+# Number of statements kept in driver's memory
+# livy.rsc.retained-statements = 100

+ 190 - 0
confs/idctest/conf/livy.conf

@@ -0,0 +1,190 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Use this keystore for the SSL certificate and key.
+# livy.keystore =
+
+# Specify the keystore password.
+# livy.keystore.password =
+#
+# Specify the key password.
+# livy.key-password =
+
+# Hadoop Credential Provider Path to get "livy.keystore.password" and "livy.key-password".
+# Credential Provider can be created using command as follow:
+# hadoop credential create "livy.keystore.password" -value "secret" -provider jceks://hdfs/path/to/livy.jceks
+# livy.hadoop.security.credential.provider.path =
+
+# What host address to start the server on. By default, Livy will bind to all network interfaces.
+# livy.server.host = 0.0.0.0
+
+# What port to start the server on.
+livy.server.port = 8998
+
+# What base path ui should work on. By default UI is mounted on "/".
+# E.g.: livy.ui.basePath = /my_livy - result in mounting UI on /my_livy/
+# livy.ui.basePath = ""
+
+# What spark master Livy sessions should use.
+livy.spark.master = yarn
+
+# What spark deploy mode Livy sessions should use.
+livy.spark.deploy-mode = cluster
+
+# Configure Livy server http request and response header size.
+# livy.server.request-header.size = 131072
+# livy.server.response-header.size = 131072
+
+# Enabled to check whether timeout Livy sessions should be stopped.
+# livy.server.session.timeout-check = true
+#
+# Whether or not to skip timeout check for a busy session
+# livy.server.session.timeout-check.skip-busy = false
+
+# Time in milliseconds on how long Livy will wait before timing out an inactive session.
+# Note that the inactive session could be busy running jobs.
+# livy.server.session.timeout = 1h
+#
+# How long a finished session state should be kept in LivyServer for query.
+# livy.server.session.state-retain.sec = 600s
+
+# If livy should impersonate the requesting users when creating a new session.
+# livy.impersonation.enabled = false
+
+# Logs size livy can cache for each session/batch. 0 means don't cache the logs.
+# livy.cache-log.size = 200
+
+# Comma-separated list of Livy RSC jars. By default Livy will upload jars from its installation
+# directory every time a session is started. By caching these files in HDFS, for example, startup
+# time of sessions on YARN can be reduced.
+livy.rsc.jars = hdfs:/user/aiuser/livy/rsc-jars/*
+
+# Comma-separated list of Livy REPL jars. By default Livy will upload jars from its installation
+# directory every time a session is started. By caching these files in HDFS, for example, startup
+# time of sessions on YARN can be reduced. Please list all the repl dependencies including
+# Scala version-specific livy-repl jars, Livy will automatically pick the right dependencies
+# during session creation.
+# livy.repl.jars = hdfs:/user/aiuser/livy/repl_2.11-jars/*
+
+# Location of PySpark archives. By default Livy will upload the file from SPARK_HOME, but
+# by caching the file in HDFS, startup time of PySpark sessions on YARN can be reduced.
+# livy.pyspark.archives =
+
+# Location of the SparkR package. By default Livy will upload the file from SPARK_HOME, but
+# by caching the file in HDFS, startup time of R sessions on YARN can be reduced.
+# livy.sparkr.package =
+
+# List of local directories from where files are allowed to be added to user sessions. By
+# default it's empty, meaning users can only reference remote URIs when starting their
+# sessions.
+# livy.file.local-dir-whitelist =
+
+# Whether to enable csrf protection, by default it is false. If it is enabled, client should add
+# http-header "X-Requested-By" in request if the http method is POST/DELETE/PUT/PATCH.
+# livy.server.csrf-protection.enabled =
+
+# Whether to enable HiveContext in livy interpreter, if it is true hive-site.xml will be detected
+# on user request and then livy server classpath automatically.
+livy.repl.enable-hive-context = true
+
+# Recovery mode of Livy. Possible values:
+# off: Default. Turn off recovery. Every time Livy shuts down, it stops and forgets all sessions.
+# recovery: Livy persists session info to the state store. When Livy restarts, it recovers
+#           previous sessions from the state store.
+# Must set livy.server.recovery.state-store and livy.server.recovery.state-store.url to
+# configure the state store.
+# livy.server.recovery.mode = off
+# Zookeeper address used for HA and state store. e.g. host1:port1, host2:port2
+# livy.server.zookeeper.url =
+
+# Where Livy should store state to for recovery. Possible values:
+# <empty>: Default. State store disabled.
+# filesystem: Store state on a file system.
+# zookeeper: Store state in a Zookeeper instance.
+# livy.server.recovery.state-store =
+
+# For filesystem state store, the path of the state store directory. Please don't use a filesystem
+# that doesn't support atomic rename (e.g. S3). e.g. file:///tmp/livy or hdfs:///.
+# For zookeeper, the address to the Zookeeper servers. e.g. host1:port1,host2:port2
+# If livy.server.recovery.state-store is zookeeper, this config is for back-compatibility,
+# so if both this config and livy.server.zookeeper.url exist,
+# livy uses livy.server.zookeeper.url first.
+# livy.server.recovery.state-store.url =
+
+# The policy of curator connecting to zookeeper.
+# For example, m, n means retry m times and the interval of retry is n milliseconds.
+# Please use the new config: livy.server.zk.retry-policy.
+# Keep this config for back-compatibility.
+# If both this config and livy.server.zk.retry-policy exist,
+# livy uses livy.server.zk.retry-policy first.
+# livy.server.recovery.zk-state-store.retry-policy = 5,100
+
+# The policy of curator connecting to zookeeper.
+# For example, m, n means retry m times and the interval of retry is n milliseconds
+# livy.server.zk.retry-policy =
+
+# The dir in zk to store the data about session.
+# livy.server.recovery.zk-state-store.key-prefix = livy
+
+# If Livy can't find the yarn app within this time, consider it lost.
+livy.server.yarn.app-lookup-timeout = 5m
+# When the cluster is busy, we may fail to launch yarn app in app-lookup-timeout, then it would
+# cause session leakage, so we need to check session leakage.
+# How long to check livy session leakage
+# livy.server.yarn.app-leakage.check-timeout = 600s
+# how often to check livy session leakage
+# livy.server.yarn.app-leakage.check-interval = 60s
+
+# How often Livy polls YARN to refresh YARN app state.
+# livy.server.yarn.poll-interval = 5s
+#
+# Days to keep Livy server request logs.
+# livy.server.request-log-retain.days = 5
+
+# If the Livy Web UI should be included in the Livy Server. Enabled by default.
+# livy.ui.enabled = true
+
+# Whether to enable Livy server access control, if it is true then all the income requests will
+# be checked if the requested user has permission.
+# livy.server.access-control.enabled = false
+
+# Allowed users to access Livy, by default any user is allowed to access Livy. If user want to
+# limit who could access Livy, user should list all the permitted users with comma separated.
+# livy.server.access-control.allowed-users = *
+
+# A list of users with comma separated has the permission to change other user's submitted
+# session, like submitting statements, deleting session.
+# livy.server.access-control.modify-users =
+
+# A list of users with comma separated has the permission to view other user's infomation, like
+# submitted session state, statement results.
+# livy.server.access-control.view-users =
+#
+# Authentication support for Livy server
+# Livy has a built-in SPnego authentication support for HTTP requests  with below configurations.
+# livy.server.auth.type = kerberos
+# livy.server.auth.kerberos.principal = <spnego principal>
+# livy.server.auth.kerberos.keytab = <spnego keytab>
+# livy.server.auth.kerberos.name-rules = DEFAULT
+
+# livy.server.launch.kerberos.principal = ailab
+# livy.server.launch.kerberos.keytab = /opt/cluster/conf/user.keytab
+#
+# If user wants to use custom authentication filter, configurations are:
+# livy.server.auth.type = <custom>
+# livy.server.auth.<custom>.class = <class of custom auth filter>
+# livy.server.auth.<custom>.param.<foo1> = <bar1>
+# livy.server.auth.<custom>.param.<foo2> = <bar2>

+ 268 - 0
confs/idctest/conf/log4j.properties

@@ -0,0 +1,268 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Define some default values that can be overridden by system properties
+hadoop.root.logger=INFO,console
+hadoop.log.dir=.
+hadoop.log.file=hadoop.log
+
+# Define the root logger to the system property "hadoop.root.logger".
+log4j.rootLogger=${hadoop.root.logger}, EventCounter
+
+# Logging Threshold
+log4j.threshold=ALL
+
+# Null Appender
+log4j.appender.NullAppender=org.apache.log4j.varia.NullAppender
+
+#
+# Rolling File Appender - cap space usage at 5gb.
+#
+hadoop.log.maxfilesize=256MB
+hadoop.log.maxbackupindex=20
+log4j.appender.RFA=org.apache.log4j.RollingFileAppender
+log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+log4j.appender.RFA.MaxFileSize=${hadoop.log.maxfilesize}
+log4j.appender.RFA.MaxBackupIndex=${hadoop.log.maxbackupindex}
+
+log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
+
+# Pattern format: Date LogLevel LoggerName LogMessage
+log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+# Debugging Pattern format
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+
+#
+# Daily Rolling File Appender
+#
+
+log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Rollover at midnight
+log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
+
+log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
+
+# Pattern format: Date LogLevel LoggerName LogMessage
+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+# Debugging Pattern format
+#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+
+#
+# console
+# Add "console" to rootlogger above if you want to use this
+#
+
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
+
+#
+# TaskLog Appender
+#
+
+#Default values
+hadoop.tasklog.taskid=null
+hadoop.tasklog.iscleanup=false
+hadoop.tasklog.noKeepSplits=4
+hadoop.tasklog.totalLogFileSize=100
+hadoop.tasklog.purgeLogSplits=true
+hadoop.tasklog.logsRetainHours=12
+
+log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
+log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
+log4j.appender.TLA.isCleanup=${hadoop.tasklog.iscleanup}
+log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
+
+log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
+log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+
+#
+# HDFS block state change log from block manager
+#
+# Uncomment the following to suppress normal block state change
+# messages from BlockManager in NameNode.
+#log4j.logger.BlockStateChange=WARN
+
+#
+#Security appender
+#
+hadoop.security.logger=INFO,NullAppender
+hadoop.security.log.maxfilesize=256MB
+hadoop.security.log.maxbackupindex=20
+log4j.category.SecurityLogger=${hadoop.security.logger}
+hadoop.security.log.file=SecurityAuth-${user.name}.audit
+log4j.appender.RFAS=org.apache.log4j.RollingFileAppender
+log4j.appender.RFAS.File=${hadoop.log.dir}/${hadoop.security.log.file}
+log4j.appender.RFAS.layout=org.apache.log4j.PatternLayout
+log4j.appender.RFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+log4j.appender.RFAS.MaxFileSize=${hadoop.security.log.maxfilesize}
+log4j.appender.RFAS.MaxBackupIndex=${hadoop.security.log.maxbackupindex}
+
+#
+# Daily Rolling Security appender
+#
+log4j.appender.DRFAS=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFAS.File=${hadoop.log.dir}/${hadoop.security.log.file}
+log4j.appender.DRFAS.layout=org.apache.log4j.PatternLayout
+log4j.appender.DRFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+log4j.appender.DRFAS.DatePattern=.yyyy-MM-dd
+
+#
+# hadoop configuration logging
+#
+
+# Uncomment the following line to turn off configuration deprecation warnings.
+# log4j.logger.org.apache.hadoop.conf.Configuration.deprecation=WARN
+
+#
+# hdfs audit logging
+#
+hdfs.audit.logger=INFO,NullAppender
+hdfs.audit.log.maxfilesize=256MB
+hdfs.audit.log.maxbackupindex=20
+log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=${hdfs.audit.logger}
+log4j.additivity.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=false
+log4j.appender.RFAAUDIT=org.apache.log4j.RollingFileAppender
+log4j.appender.RFAAUDIT.File=${hadoop.log.dir}/hdfs-audit.log
+log4j.appender.RFAAUDIT.layout=org.apache.log4j.PatternLayout
+log4j.appender.RFAAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
+log4j.appender.RFAAUDIT.MaxFileSize=${hdfs.audit.log.maxfilesize}
+log4j.appender.RFAAUDIT.MaxBackupIndex=${hdfs.audit.log.maxbackupindex}
+
+#
+# mapred audit logging
+#
+mapred.audit.logger=INFO,NullAppender
+mapred.audit.log.maxfilesize=256MB
+mapred.audit.log.maxbackupindex=20
+log4j.logger.org.apache.hadoop.mapred.AuditLogger=${mapred.audit.logger}
+log4j.additivity.org.apache.hadoop.mapred.AuditLogger=false
+log4j.appender.MRAUDIT=org.apache.log4j.RollingFileAppender
+log4j.appender.MRAUDIT.File=${hadoop.log.dir}/mapred-audit.log
+log4j.appender.MRAUDIT.layout=org.apache.log4j.PatternLayout
+log4j.appender.MRAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
+log4j.appender.MRAUDIT.MaxFileSize=${mapred.audit.log.maxfilesize}
+log4j.appender.MRAUDIT.MaxBackupIndex=${mapred.audit.log.maxbackupindex}
+
+# Custom Logging levels
+
+#log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
+#log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
+#log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=DEBUG
+
+# Jets3t library
+log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
+
+# AWS SDK & S3A FileSystem
+log4j.logger.com.amazonaws=ERROR
+log4j.logger.com.amazonaws.http.AmazonHttpClient=ERROR
+log4j.logger.org.apache.hadoop.fs.s3a.S3AFileSystem=WARN
+
+#
+# Event Counter Appender
+# Sends counts of logging messages at different severity levels to Hadoop Metrics.
+#
+log4j.appender.EventCounter=org.apache.hadoop.log.metrics.EventCounter
+
+#
+# Job Summary Appender
+#
+# Use following logger to send summary to separate file defined by
+# hadoop.mapreduce.jobsummary.log.file :
+# hadoop.mapreduce.jobsummary.logger=INFO,JSA
+#
+hadoop.mapreduce.jobsummary.logger=${hadoop.root.logger}
+hadoop.mapreduce.jobsummary.log.file=hadoop-mapreduce.jobsummary.log
+hadoop.mapreduce.jobsummary.log.maxfilesize=256MB
+hadoop.mapreduce.jobsummary.log.maxbackupindex=20
+log4j.appender.JSA=org.apache.log4j.RollingFileAppender
+log4j.appender.JSA.File=${hadoop.log.dir}/${hadoop.mapreduce.jobsummary.log.file}
+log4j.appender.JSA.MaxFileSize=${hadoop.mapreduce.jobsummary.log.maxfilesize}
+log4j.appender.JSA.MaxBackupIndex=${hadoop.mapreduce.jobsummary.log.maxbackupindex}
+log4j.appender.JSA.layout=org.apache.log4j.PatternLayout
+log4j.appender.JSA.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
+log4j.logger.org.apache.hadoop.mapred.JobInProgress$JobSummary=${hadoop.mapreduce.jobsummary.logger}
+log4j.additivity.org.apache.hadoop.mapred.JobInProgress$JobSummary=false
+
+#
+# Yarn ResourceManager Application Summary Log
+#
+# Set the ResourceManager summary log filename
+yarn.server.resourcemanager.appsummary.log.file=rm-appsummary.log
+# Set the ResourceManager summary log level and appender
+yarn.server.resourcemanager.appsummary.logger=${hadoop.root.logger}
+#yarn.server.resourcemanager.appsummary.logger=INFO,RMSUMMARY
+
+# To enable AppSummaryLogging for the RM,
+# set yarn.server.resourcemanager.appsummary.logger to
+# <LEVEL>,RMSUMMARY in hadoop-env.sh
+
+# Appender for ResourceManager Application Summary Log
+# Requires the following properties to be set
+#    - hadoop.log.dir (Hadoop Log directory)
+#    - yarn.server.resourcemanager.appsummary.log.file (resource manager app summary log filename)
+#    - yarn.server.resourcemanager.appsummary.logger (resource manager app summary log level and appender)
+
+log4j.logger.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=${yarn.server.resourcemanager.appsummary.logger}
+log4j.additivity.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=false
+log4j.appender.RMSUMMARY=org.apache.log4j.RollingFileAppender
+log4j.appender.RMSUMMARY.File=${hadoop.log.dir}/${yarn.server.resourcemanager.appsummary.log.file}
+log4j.appender.RMSUMMARY.MaxFileSize=256MB
+log4j.appender.RMSUMMARY.MaxBackupIndex=20
+log4j.appender.RMSUMMARY.layout=org.apache.log4j.PatternLayout
+log4j.appender.RMSUMMARY.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
+
+# HS audit log configs
+#mapreduce.hs.audit.logger=INFO,HSAUDIT
+#log4j.logger.org.apache.hadoop.mapreduce.v2.hs.HSAuditLogger=${mapreduce.hs.audit.logger}
+#log4j.additivity.org.apache.hadoop.mapreduce.v2.hs.HSAuditLogger=false
+#log4j.appender.HSAUDIT=org.apache.log4j.DailyRollingFileAppender
+#log4j.appender.HSAUDIT.File=${hadoop.log.dir}/hs-audit.log
+#log4j.appender.HSAUDIT.layout=org.apache.log4j.PatternLayout
+#log4j.appender.HSAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
+#log4j.appender.HSAUDIT.DatePattern=.yyyy-MM-dd
+
+# Http Server Request Logs
+#log4j.logger.http.requests.namenode=INFO,namenoderequestlog
+#log4j.appender.namenoderequestlog=org.apache.hadoop.http.HttpRequestLogAppender
+#log4j.appender.namenoderequestlog.Filename=${hadoop.log.dir}/jetty-namenode-yyyy_mm_dd.log
+#log4j.appender.namenoderequestlog.RetainDays=3
+
+#log4j.logger.http.requests.datanode=INFO,datanoderequestlog
+#log4j.appender.datanoderequestlog=org.apache.hadoop.http.HttpRequestLogAppender
+#log4j.appender.datanoderequestlog.Filename=${hadoop.log.dir}/jetty-datanode-yyyy_mm_dd.log
+#log4j.appender.datanoderequestlog.RetainDays=3
+
+#log4j.logger.http.requests.resourcemanager=INFO,resourcemanagerrequestlog
+#log4j.appender.resourcemanagerrequestlog=org.apache.hadoop.http.HttpRequestLogAppender
+#log4j.appender.resourcemanagerrequestlog.Filename=${hadoop.log.dir}/jetty-resourcemanager-yyyy_mm_dd.log
+#log4j.appender.resourcemanagerrequestlog.RetainDays=3
+
+#log4j.logger.http.requests.jobhistory=INFO,jobhistoryrequestlog
+#log4j.appender.jobhistoryrequestlog=org.apache.hadoop.http.HttpRequestLogAppender
+#log4j.appender.jobhistoryrequestlog.Filename=${hadoop.log.dir}/jetty-jobhistory-yyyy_mm_dd.log
+#log4j.appender.jobhistoryrequestlog.RetainDays=3
+
+#log4j.logger.http.requests.nodemanager=INFO,nodemanagerrequestlog
+#log4j.appender.nodemanagerrequestlog=org.apache.hadoop.http.HttpRequestLogAppender
+#log4j.appender.nodemanagerrequestlog.Filename=${hadoop.log.dir}/jetty-nodemanager-yyyy_mm_dd.log
+#log4j.appender.nodemanagerrequestlog.RetainDays=3

+ 189 - 0
confs/idctest/conf/mapred-site.xml

@@ -0,0 +1,189 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--Autogenerated by Cloudera Manager-->
+<configuration>
+  <property>
+    <name>mapreduce.job.split.metainfo.maxsize</name>
+    <value>10000000</value>
+  </property>
+  <property>
+    <name>mapreduce.job.counters.max</name>
+    <value>120</value>
+  </property>
+  <property>
+    <name>mapreduce.job.counters.groups.max</name>
+    <value>50</value>
+  </property>
+  <property>
+    <name>mapreduce.output.fileoutputformat.compress</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>mapreduce.output.fileoutputformat.compress.type</name>
+    <value>BLOCK</value>
+  </property>
+  <property>
+    <name>mapreduce.output.fileoutputformat.compress.codec</name>
+    <value>org.apache.hadoop.io.compress.DefaultCodec</value>
+  </property>
+  <property>
+    <name>mapreduce.map.output.compress.codec</name>
+    <value>org.apache.hadoop.io.compress.SnappyCodec</value>
+  </property>
+  <property>
+    <name>mapreduce.map.output.compress</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>zlib.compress.level</name>
+    <value>DEFAULT_COMPRESSION</value>
+  </property>
+  <property>
+    <name>mapreduce.task.io.sort.factor</name>
+    <value>64</value>
+  </property>
+  <property>
+    <name>mapreduce.map.sort.spill.percent</name>
+    <value>0.8</value>
+  </property>
+  <property>
+    <name>mapreduce.reduce.shuffle.parallelcopies</name>
+    <value>10</value>
+  </property>
+  <property>
+    <name>mapreduce.task.timeout</name>
+    <value>600000</value>
+  </property>
+  <property>
+    <name>mapreduce.client.submit.file.replication</name>
+    <value>3</value>
+  </property>
+  <property>
+    <name>mapreduce.job.reduces</name>
+    <value>256</value>
+  </property>
+  <property>
+    <name>mapreduce.task.io.sort.mb</name>
+    <value>256</value>
+  </property>
+  <property>
+    <name>mapreduce.map.speculative</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>mapreduce.reduce.speculative</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>mapreduce.job.reduce.slowstart.completedmaps</name>
+    <value>0.8</value>
+  </property>
+  <property>
+    <name>mapreduce.jobhistory.address</name>
+    <value>cdhtestnm02.yili.com:10020</value>
+  </property>
+  <property>
+    <name>mapreduce.jobhistory.webapp.address</name>
+    <value>cdhtestnm02.yili.com:19888</value>
+  </property>
+  <property>
+    <name>mapreduce.jobhistory.webapp.https.address</name>
+    <value>cdhtestnm02.yili.com:19890</value>
+  </property>
+  <property>
+    <name>mapreduce.jobhistory.admin.address</name>
+    <value>cdhtestnm02.yili.com:10033</value>
+  </property>
+  <property>
+    <name>mapreduce.framework.name</name>
+    <value>yarn</value>
+  </property>
+  <property>
+    <name>yarn.app.mapreduce.am.staging-dir</name>
+    <value>/user</value>
+  </property>
+  <property>
+    <name>mapreduce.am.max-attempts</name>
+    <value>2</value>
+  </property>
+  <property>
+    <name>yarn.app.mapreduce.am.resource.mb</name>
+    <value>1024</value>
+  </property>
+  <property>
+    <name>yarn.app.mapreduce.am.resource.cpu-vcores</name>
+    <value>1</value>
+  </property>
+  <property>
+    <name>mapreduce.job.ubertask.enable</name>
+    <value>false</value>
+  </property>
+  <property>
+    <name>yarn.app.mapreduce.am.command-opts</name>
+    <value>-Djava.net.preferIPv4Stack=true -Xmx825955249</value>
+  </property>
+  <property>
+    <name>mapreduce.map.java.opts</name>
+    <value>-Djava.net.preferIPv4Stack=true</value>
+  </property>
+  <property>
+    <name>mapreduce.reduce.java.opts</name>
+    <value>-Djava.net.preferIPv4Stack=true</value>
+  </property>
+  <property>
+    <name>yarn.app.mapreduce.am.admin.user.env</name>
+    <value>LD_LIBRARY_PATH=$HADOOP_COMMON_HOME/lib/native:$JAVA_LIBRARY_PATH</value>
+  </property>
+  <property>
+    <name>mapreduce.map.memory.mb</name>
+    <value>4096</value>
+  </property>
+  <property>
+    <name>mapreduce.map.cpu.vcores</name>
+    <value>1</value>
+  </property>
+  <property>
+    <name>mapreduce.reduce.memory.mb</name>
+    <value>4096</value>
+  </property>
+  <property>
+    <name>mapreduce.reduce.cpu.vcores</name>
+    <value>1</value>
+  </property>
+  <property>
+    <name>mapreduce.job.heap.memory-mb.ratio</name>
+    <value>0.8</value>
+  </property>
+  <property>
+    <name>mapreduce.application.classpath</name>
+    <value>$HADOOP_CLIENT_CONF_DIR,$PWD/mr-framework/*,$MR2_CLASSPATH</value>
+  </property>
+  <property>
+    <name>mapreduce.application.framework.path</name>
+    <value>hdfs://nameservice1//user/yarn/mapreduce/mr-framework/3.0.0-cdh6.3.2-mr-framework.tar.gz#mr-framework</value>
+  </property>
+  <property>
+    <name>mapreduce.jobhistory.jhist.format</name>
+    <value>binary</value>
+  </property>
+  <property>
+    <name>mapreduce.admin.user.env</name>
+    <value>LD_LIBRARY_PATH=$HADOOP_COMMON_HOME/lib/native:$JAVA_LIBRARY_PATH</value>
+  </property>
+  <property>
+    <name>mapreduce.job.redacted-properties</name>
+    <value>fs.s3a.access.key,fs.s3a.secret.key,fs.adl.oauth2.credential,dfs.adls.oauth2.credential,fs.azure.account.oauth2.client.secret</value>
+  </property>
+  <property>
+    <name>mapreduce.job.acl-view-job</name>
+    <value> </value>
+  </property>
+  <property>
+    <name>mapreduce.job.acl-modify-job</name>
+    <value> </value>
+  </property>
+  <property>
+    <name>mapreduce.cluster.acls.enabled</name>
+    <value>false</value>
+  </property>
+</configuration>

+ 10 - 0
confs/idctest/conf/spark-defaults.conf

@@ -0,0 +1,10 @@
+spark.master yarn
+spark.submit.deployMode cluster
+spark.sql.hive.metastore.version 2.1
+spark.sql.hive.convertMetastoreOrc false
+spark.sql.hive.convertMetastoreParquet false
+spark.yarn.jars hdfs:/user/aiuser/sparkcdh_jars/*
+spark.yarn.dist.archives hdfs:/user/aiuser/py37.zip#python3env
+spark.pyspark.python ./python3env/py37/bin/python
+# spark.yarn.queue root.default
+spark.yarn.queue root.aiuser

+ 1 - 0
confs/idctest/conf/spark-env.sh

@@ -0,0 +1 @@
+export SPARK_DIST_CLASSPATH=$SPARK_DIST_CLASSPATH:/opt/cloudera/parcels/CDH/lib/hadoop-mapreduce/*:/opt/cloudera/parcels/CDH/lib/spark/jars/*

+ 4 - 0
confs/idctest/conf/submit.sh

@@ -0,0 +1,4 @@
+$SPARK_HOME/bin/spark-submit --class org.apache.spark.examples.SparkPi \
+--master yarn \
+--deploy-mode cluster \
+$SPARK_HOME/examples/jars/spark-examples_2.12-3.0.3.jar 10

+ 125 - 0
confs/idctest/conf/yarn-site.xml

@@ -0,0 +1,125 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--Autogenerated by Cloudera Manager-->
+<configuration>
+  <property>
+    <name>yarn.acl.enable</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>yarn.admin.acl</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.address</name>
+    <value>cdhtestnm02.yili.com:8032</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.admin.address</name>
+    <value>cdhtestnm02.yili.com:8033</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.scheduler.address</name>
+    <value>cdhtestnm02.yili.com:8030</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.resource-tracker.address</name>
+    <value>cdhtestnm02.yili.com:8031</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.webapp.address</name>
+    <value>cdhtestnm02.yili.com:8088</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.webapp.https.address</name>
+    <value>cdhtestnm02.yili.com:8090</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.client.thread-count</name>
+    <value>50</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.scheduler.client.thread-count</name>
+    <value>50</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.admin.client.thread-count</name>
+    <value>1</value>
+  </property>
+  <property>
+    <name>yarn.scheduler.minimum-allocation-mb</name>
+    <value>5120</value>
+  </property>
+  <property>
+    <name>yarn.scheduler.increment-allocation-mb</name>
+    <value>512</value>
+  </property>
+  <property>
+    <name>yarn.scheduler.maximum-allocation-mb</name>
+    <value>49152</value>
+  </property>
+  <property>
+    <name>yarn.scheduler.minimum-allocation-vcores</name>
+    <value>1</value>
+  </property>
+  <property>
+    <name>yarn.scheduler.increment-allocation-vcores</name>
+    <value>1</value>
+  </property>
+  <property>
+    <name>yarn.scheduler.maximum-allocation-vcores</name>
+    <value>64</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.amliveliness-monitor.interval-ms</name>
+    <value>1000</value>
+  </property>
+  <property>
+    <name>yarn.am.liveness-monitor.expiry-interval-ms</name>
+    <value>600000</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.am.max-attempts</name>
+    <value>2</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.container.liveness-monitor.interval-ms</name>
+    <value>600000</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.nm.liveness-monitor.interval-ms</name>
+    <value>1000</value>
+  </property>
+  <property>
+    <name>yarn.nm.liveness-monitor.expiry-interval-ms</name>
+    <value>600000</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.resource-tracker.client.thread-count</name>
+    <value>50</value>
+  </property>
+  <property>
+    <name>yarn.application.classpath</name>
+    <value>/opt/cloudera/parcels/CDH/lib/hadoop-mapreduce/*,$HADOOP_CLIENT_CONF_DIR,$HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*,$HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*,$HADOOP_YARN_HOME/*,$HADOOP_YARN_HOME/lib/*</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.scheduler.class</name>
+    <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
+  </property>
+  <property>
+    <name>yarn.scheduler.capacity.resource-calculator</name>
+    <value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator</value>
+  </property>
+  <property>
+    <name>yarn.resourcemanager.max-completed-applications</name>
+    <value>10000</value>
+  </property>
+  <property>
+    <name>yarn.nodemanager.remote-app-log-dir</name>
+    <value>/tmp/logs</value>
+  </property>
+  <property>
+    <name>yarn.nodemanager.remote-app-log-dir-suffix</name>
+    <value>logs</value>
+  </property>
+</configuration>