spark-submit_lr.sh 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. ModelType=lrDemo
  2. if [ $# -gt 0 ];then
  3. if [ $1 == "df" ];then
  4. ModelType=lrDemo_df
  5. fi
  6. fi
  7. echo -e "\n--------------------\nModelType: ${ModelType}\n--------------------\n"
  8. CUR_PATH=$(cd "$(dirname "$0")";pwd)
  9. echo $CUR_PATH
  10. #
  11. #input_path_1=hdfs://192.168.199.27:9000/user/sxkj/train.txt
  12. #input_path_2=hdfs://192.168.199.27:9000/user/sxkj/test.txt
  13. #output_path=hdfs://192.168.199.27:9000/tmp/sparkDemo/${ModelType}
  14. inputs='{"train_df": "hdfs://192.168.199.27:9000/user/sxkj/train.txt", "test_df": "hdfs://192.168.199.27:9000/user/sxkj/test.txt"}'
  15. outputs='["hdfs://192.168.199.27:9000/tmp/sparkDemo/lrDemo/target/output1"]'
  16. # hdfs dfs -rm -r $output_path
  17. sourceDIR=/home/sxkj/bigdata
  18. who=sxkj
  19. # CUR_PATH=$(cd "$(dirname "$0")";pwd)
  20. # sourceDIR=${CUR_PATH}
  21. YARN_QUEUE=
  22. #DEPLOY_MODE=cluster
  23. DEPLOY_MODE=client
  24. ${SPARK_HOME}/bin/spark-submit \
  25. --master yarn \
  26. --name "spark_${ModelType}_${who}" \
  27. --deploy-mode cluster \
  28. --driver-memory 1g \
  29. --driver-cores 1 \
  30. --executor-memory 1g \
  31. --executor-cores 1 \
  32. --num-executors 1 \
  33. --archives ${sourceDIR}/py37.zip#python3env \
  34. --conf spark.default.parallelism=1 \
  35. --conf spark.executor.memoryOverhead=1g \
  36. --conf spark.driver.memoryOverhead=1g \
  37. --conf spark.yarn.maxAppAttempts=3 \
  38. --conf spark.yarn.submit.waitAppCompletion=true \
  39. --conf spark.pyspark.driver.python=${sourceDIR}/py37/bin/python \
  40. --conf spark.yarn.appMasterEnv.PYSPARK_PYTHON=python3env/py37/bin/python \
  41. --conf spark.pyspark.python=python3env/py37/bin/python \
  42. ./lr_df_demo.py "$inputs" "$outputs"