ocr.py 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. from dataclasses import dataclass
  2. import cv2
  3. from core.line_parser import LineParser
  4. from core.parser import *
  5. from core.direction import *
  6. import numpy as np
  7. from paddleocr import PaddleOCR
  8. # <- 传入pic pic_type
  9. # 1. 旋转pic (to 正向)
  10. # 2. 重写识别pic (get res)
  11. # 3. 行处理res (get res)
  12. # 4. 对res字段逻辑识别 (get dict)
  13. # -> dict
  14. @dataclass
  15. class IdCardOcr:
  16. ocr: PaddleOCR
  17. # 角度探测器
  18. angle_detector: AngleDetector
  19. # master
  20. def predict(self, image: np.ndarray, image_type: str):
  21. img_type = int(image_type)
  22. image, angle, result = self._rotate_img(image, img_type)
  23. print(f'---------- detect angle: {angle} 图片角度 ----------')
  24. if angle != 0:
  25. _, _, result = self._ocr(image)
  26. return self._post_process(result, angle, image_type)
  27. # 检测角度
  28. def _rotate_img(self, image, image_type) -> (np.ndarray, int):
  29. angle, result = self.angle_detector.detect_angle(image, image_type)
  30. if angle == 1:
  31. image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
  32. if angle == 2:
  33. image = cv2.rotate(image, cv2.ROTATE_180)
  34. if angle == 3:
  35. image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
  36. # print("图片的角度:",angle)
  37. return image, angle, result
  38. def _ocr(self, image):
  39. # 获取模型检测结果
  40. result = self.ocr.ocr(image, cls=True)
  41. print("------------------")
  42. print(result)
  43. if not result:
  44. raise Exception('无法识别')
  45. confs = [line[1][1] for line in result]
  46. # 将检测到的文字放到一个列表中
  47. txts = [line[1][0] for line in result]
  48. print("......................................")
  49. print(txts)
  50. print("......................................")
  51. return txts, confs, result
  52. def _post_process(self, result, angle: int, image_type: str):
  53. # 行处理
  54. line_parser = LineParser(result)
  55. line_result = line_parser.parse()
  56. conf = line_parser.confidence # 平均conf
  57. # 0:常驻人口面
  58. # 1:户口本首页
  59. if int(image_type) == 0:
  60. pass
  61. parser = PeopleRegBookParser(line_result)
  62. elif int(image_type) == 1:
  63. pass
  64. parser = FrontRegBookParser(line_result)
  65. else:
  66. raise Exception('未传入 image_type')
  67. # 字段逻辑处理后对res(dict)
  68. ocr_res = parser.parse()
  69. res = {
  70. "confidence": conf,
  71. "img_type": str(image_type),
  72. "orientation": angle, # 原angle是逆时针,转成顺时针
  73. **ocr_res
  74. }
  75. return res