spark-submit_1009_hdfs.sh 930 B

123456789101112131415161718192021222324
  1. inputs='{"train_df": "train", "test_df": "test"}'
  2. outputs='["results_tmp_11_10_0"]' # results_tmp_{task_id}_{sub_node_id}_{output_pin_id}
  3. sourceDIR=/home/sxkj/bigdata
  4. ${SPARK_HOME}/bin/spark-submit \
  5. --master yarn \
  6. --name "spark_demo_1009" \
  7. --deploy-mode cluster \
  8. --driver-memory 1g \
  9. --driver-cores 1 \
  10. --executor-memory 1g \
  11. --executor-cores 1 \
  12. --num-executors 1 \
  13. --archives hdfs://192.168.199.27:9000/tmp/lyl/py37.zip#python3env \
  14. --conf spark.default.parallelism=1 \
  15. --conf spark.executor.memoryOverhead=1g \
  16. --conf spark.driver.memoryOverhead=1g \
  17. --conf spark.yarn.maxAppAttempts=3 \
  18. --conf spark.yarn.submit.waitAppCompletion=true \
  19. --conf spark.pyspark.driver.python=${sourceDIR}/py37/bin/python \
  20. --conf spark.yarn.appMasterEnv.PYSPARK_PYTHON=python3env/py37/bin/python \
  21. --conf spark.pyspark.python=python3env/py37/bin/python \
  22. ./spark_script_demo_1009.py "$inputs" "$outputs"