table_model.sh 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. #!/bin/bash
  2. # shellcheck disable=SC2155
  3. set -eux
  4. # SLANet_ch 模型是 PaddleOCR 目前最优的中文表格预训练模型
  5. # https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.7/doc/doc_ch/table_recognition.md
  6. readonly MODEL_NAME="SLANet_ch"
  7. readonly MODEL_CONF="configs/table/${MODEL_NAME}.yml"
  8. # 推理参数
  9. readonly INFER_IMG_DIR="train_data/table-dataset/conv.v16i/all"
  10. readonly CUR_TIME=$(date "+%s")
  11. edit_model() {
  12. vim "${MODEL_CONF}"
  13. }
  14. train_model() {
  15. python3 tools/train.py -c "${MODEL_CONF}"
  16. }
  17. train_model_distr() {
  18. python3 \
  19. -m paddle.distributed.launch \
  20. --gpus '0,1,2,3,4,5,6,7' \
  21. tools/train.py -c "${MODEL_CONF}"
  22. }
  23. export_model() {
  24. python3 tools/export_model.py \
  25. -c "${MODEL_CONF}" \
  26. -o Global.pretrained_model="./output/${MODEL_NAME}/best_accuracy" \
  27. Global.save_inference_dir="./inference/${MODEL_NAME}"
  28. }
  29. infer_model() {
  30. python3 ppstructure/table/predict_structure.py \
  31. --table_model_dir=inference/"${MODEL_NAME}" \
  32. --rec_char_dict_path="./ppocr/utils/ppocr_keys_v1.txt" \
  33. --table_char_dict_path="./ppocr/utils/dict/table_structure_dict_ch.txt" \
  34. --image_dir="${INFER_IMG_DIR}" \
  35. --output="inference_results/${MODEL_NAME}_${CUR_TIME}"
  36. }
  37. main() {
  38. case "${1}" in
  39. edit)
  40. edit_model
  41. ;;
  42. train)
  43. train_model
  44. ;;
  45. train_distr)
  46. train_model_distr
  47. ;;
  48. export)
  49. export_model
  50. ;;
  51. infer)
  52. infer_model
  53. ;;
  54. *)
  55. echo "Invalid option: ${1}"
  56. exit 1
  57. ;;
  58. esac
  59. }
  60. main "$@"