ModelType=lrDemo if [ $# -gt 0 ];then if [ $1 == "df" ];then ModelType=lrDemo_df fi fi echo -e "\n--------------------\nModelType: ${ModelType}\n--------------------\n" CUR_PATH=$(cd "$(dirname "$0")";pwd) echo $CUR_PATH # #input_path_1=hdfs://192.168.199.27:9000/user/sxkj/train.txt #input_path_2=hdfs://192.168.199.27:9000/user/sxkj/test.txt #output_path=hdfs://192.168.199.27:9000/tmp/sparkDemo/${ModelType} inputs='{"train_df": "hdfs://192.168.199.27:9000/user/sxkj/train.txt", "test_df": "hdfs://192.168.199.27:9000/user/sxkj/test.txt"}' outputs='["hdfs://192.168.199.27:9000/tmp/sparkDemo/lrDemo/target/output1"]' # hdfs dfs -rm -r $output_path sourceDIR=/home/sxkj/bigdata who=sxkj # CUR_PATH=$(cd "$(dirname "$0")";pwd) # sourceDIR=${CUR_PATH} YARN_QUEUE= #DEPLOY_MODE=cluster DEPLOY_MODE=client ${SPARK_HOME}/bin/spark-submit \ --master yarn \ --name "spark_${ModelType}_${who}" \ --deploy-mode cluster \ --driver-memory 1g \ --driver-cores 1 \ --executor-memory 1g \ --executor-cores 1 \ --num-executors 1 \ --archives ${sourceDIR}/py37.zip#python3env \ --conf spark.default.parallelism=1 \ --conf spark.executor.memoryOverhead=1g \ --conf spark.driver.memoryOverhead=1g \ --conf spark.yarn.maxAppAttempts=3 \ --conf spark.yarn.submit.waitAppCompletion=true \ --conf spark.pyspark.driver.python=${sourceDIR}/py37/bin/python \ --conf spark.yarn.appMasterEnv.PYSPARK_PYTHON=python3env/py37/bin/python \ --conf spark.pyspark.python=python3env/py37/bin/python \ ./lr_df_demo.py "$inputs" "$outputs"