123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051 |
- ModelType=lrDemo
- if [ $# -gt 0 ];then
- if [ $1 == "df" ];then
- ModelType=lrDemo_df
- fi
- fi
- echo -e "\n--------------------\nModelType: ${ModelType}\n--------------------\n"
- CUR_PATH=$(cd "$(dirname "$0")";pwd)
- echo $CUR_PATH
- #
- #input_path_1=hdfs://192.168.199.27:9000/user/sxkj/train.txt
- #input_path_2=hdfs://192.168.199.27:9000/user/sxkj/test.txt
- #output_path=hdfs://192.168.199.27:9000/tmp/sparkDemo/${ModelType}
- inputs='{"train_df": "hdfs://192.168.199.27:9000/user/sxkj/train.txt", "test_df": "hdfs://192.168.199.27:9000/user/sxkj/test.txt"}'
- outputs='["hdfs://192.168.199.27:9000/tmp/sparkDemo/lrDemo/target/output1"]'
- # hdfs dfs -rm -r $output_path
- sourceDIR=/home/sxkj/bigdata
- who=sxkj
- # CUR_PATH=$(cd "$(dirname "$0")";pwd)
- # sourceDIR=${CUR_PATH}
- YARN_QUEUE=
- #DEPLOY_MODE=cluster
- DEPLOY_MODE=client
- ${SPARK_HOME}/bin/spark-submit \
- --master yarn \
- --name "spark_${ModelType}_${who}" \
- --deploy-mode cluster \
- --driver-memory 1g \
- --driver-cores 1 \
- --executor-memory 1g \
- --executor-cores 1 \
- --num-executors 1 \
- --archives ${sourceDIR}/py37.zip#python3env \
- --conf spark.default.parallelism=1 \
- --conf spark.executor.memoryOverhead=1g \
- --conf spark.driver.memoryOverhead=1g \
- --conf spark.yarn.maxAppAttempts=3 \
- --conf spark.yarn.submit.waitAppCompletion=true \
- --conf spark.pyspark.driver.python=${sourceDIR}/py37/bin/python \
- --conf spark.yarn.appMasterEnv.PYSPARK_PYTHON=python3env/py37/bin/python \
- --conf spark.pyspark.python=python3env/py37/bin/python \
- ./lr_df_demo.py "$inputs" "$outputs"
|