main.py 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. import numpy as np
  2. from itertools import chain
  3. from mdutils.mdutils import MdUtils
  4. from YQ_OCR.utils.datasets import Dataset
  5. from YQ_OCR.utils.text2md import TableMD
  6. from YQ_OCR.utils.utils import *
  7. # 1. xlsx -> 正确json文件(写入厂家信息)
  8. # 2. 发送图片(带正确json文件)
  9. # 3. 把返回的json 和正确的json 进行对比(有key--用返回结果与正确结果比对,无key--用正确结果与返回结果比对)
  10. if __name__ == '__main__':
  11. img_paths = chain(*[Path(imgs_path).rglob(f'*.{ext}') for ext in ['jpg', 'png', 'jpeg']])
  12. all_rate = []
  13. table_mean_acc = []
  14. for img_path in img_paths:
  15. print(img_path)
  16. true_d, true_json = open_true_json(img_path.with_suffix('.json'))
  17. result = send_request(img_path, true_json)
  18. res_d = parse_result(result)
  19. markdown = TableMD(img_path.name)
  20. markdown.write_header(title='推理结果', level=2)
  21. # json result
  22. rate, statistics = markdown.evaluate_one(true_d, res_d)
  23. all_rate.append(rate)
  24. print(f'文字识别正确率:{rate:.2f}%')
  25. # table gt result
  26. dataset = Dataset(gt_file=img_path.with_suffix('.txt'), img_name=img_path.name, results=res_d)
  27. markdown.write_table_accuracy(ds=dataset, key='new')
  28. table_acc = markdown.get_table_accuracy()
  29. table_mean_acc.append(table_acc)
  30. print(f'表格识别正确率:{table_acc:.2f}%')
  31. markdown.f.create_md_file()
  32. print('----------------------------------------')
  33. all_rate = "{:.2f}%".format(np.mean(all_rate))
  34. all_table_rate = "{:.2f}%".format(np.mean(table_mean_acc))
  35. print(f'文字识别总体正确率:{all_rate}')
  36. print(f'表格识别总体正确率:{all_table_rate}')