1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071 |
- #!/bin/bash
- # shellcheck disable=SC2155
- set -eux
- readonly CUR_TIME=$(date "+%s")
- # SLANet_ch 模型是 PaddleOCR 目前最优的中文表格预训练模型
- # https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.7/doc/doc_ch/table_recognition.md
- readonly MODEL_NAME="SLANet_ch"
- readonly MODEL_CONF="configs/table/${TABLE_NAME}.yml"
- # 推理参数
- readonly INFER_IMG_DIR="train_data/table-dataset/conv.v16i/all"
- edit_model() {
- vim "${MODEL_CONF}"
- }
- train_model() {
- python3 tools/train.py -c "${MODEL_CONF}"
- }
- train_model_distr() {
- python3 \
- -m paddle.distributed.launch \
- --gpus '0,1,2,3,4,5,6,7' \
- tools/train.py -c "${MODEL_CONF}"
- }
- export_model() {
- python3 tools/export_model.py \
- -c "${MODEL_CONF}" \
- -o Global.pretrained_model="./output/${MODEL_NAME}/best_accuracy" \
- Global.save_inference_dir="./inference/${MODEL_NAME}"
- }
- infer_model() {
- python3 ppstructure/table/predict_structure.py \
- --table_model_dir=inference/"${MODEL_NAME}" \
- --rec_char_dict_path="./ppocr/utils/ppocr_keys_v1.txt" \
- --table_char_dict_path="./ppocr/utils/dict/table_structure_dict_ch.txt" \
- --image_dir="${INFER_IMG_DIR}" \
- --output="inference_results/${MODEL_NAME}_${CUR_TIME}"
- }
- main() {
- case "${1}" in
- edit)
- edit_model
- ;;
- train)
- train_model
- ;;
- train_distr)
- train_model_distr
- ;;
- export)
- export_model
- ;;
- infer)
- infer_model
- ;;
- *)
- echo "Invalid option: ${1}"
- exit 1
- ;;
- esac
- }
- main "$@"
|