ocr.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. from dataclasses import dataclass
  2. from typing import Any
  3. from core.line_parser import LineParser
  4. from core.parser import *
  5. from core.direction import *
  6. import numpy as np
  7. from paddleocr import PaddleOCR
  8. # <- 传入pic pic_type
  9. # 1. 旋转pic (to 正向)
  10. # 2. 重写识别pic (get res)
  11. # 3. 行处理res (get res)
  12. # 4. 对res字段逻辑识别 (get dict)
  13. # -> dict
  14. # 身份证OCR
  15. @dataclass
  16. class IdCardOcr:
  17. ocr: PaddleOCR
  18. # 角度探测器
  19. angle_detector: AngleDetector
  20. # 检测
  21. # <- 传入pic pic_type
  22. # -> dict
  23. def predict(self, image: np.ndarray, image_type) -> ():
  24. image_type = int(image_type)
  25. # 旋转后img angle result(生ocr)
  26. image, angle, result = self._pre_process(image, image_type)
  27. print(f'---------- detect angle: {angle} 角度 --------')
  28. if image_type == 0:
  29. if angle != 0:
  30. # 角度不为0需要重新识别,字面
  31. _, _, result = self._ocr(image)
  32. else:
  33. _, _, result = self._ocr(image)
  34. # ==> result(正向img-> 生ocr)
  35. return self._post_process(result, angle, image_type)
  36. # 预处理(旋转图片)
  37. # <- img(cv2) img_type
  38. # -> 正向的img(旋转后) 源img角度 result(ocr生)
  39. def _pre_process(self, image, image_type) -> (np.ndarray, int, Any):
  40. # pic角度 result(ocr生)
  41. angle, result = self.angle_detector.detect_angle(image, image_type)
  42. if angle == 1:
  43. image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
  44. print(angle) # 逆时针
  45. if angle == 2:
  46. image = cv2.rotate(image, cv2.ROTATE_180)
  47. if angle == 3:
  48. image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
  49. return image, angle, result
  50. # 获取模型检测结果
  51. def _ocr(self, image):
  52. result = self.ocr.ocr(image, cls=True)
  53. print("------------------")
  54. print(result)
  55. if not result:
  56. raise Exception('无法识别')
  57. confs = [line[1][1] for line in result]
  58. # 将检测到的文字放到一个列表中
  59. txts = [line[1][0] for line in result]
  60. # print("......................................")
  61. # print(txts)
  62. # print("......................................")
  63. return txts, confs, result
  64. # <- result(正向img_生ocr) angle img_type
  65. # == 对 正向img_res 进行[行处理]
  66. # -> 最后要返回的结果 dict
  67. def _post_process(self, result, angle: int, image_type):
  68. line_parser = LineParser(result)
  69. line_result = line_parser.parse()
  70. print('-------------')
  71. print(line_result)
  72. print('-------------')
  73. conf = line_parser.confidence
  74. if int(image_type) == 0:
  75. parser = FrontParser(line_result)
  76. elif int(image_type) == 1:
  77. parser = BackParser(line_result)
  78. else:
  79. raise Exception('无法识别')
  80. # 字段逻辑处理后对res(dict)
  81. ocr_res = parser.parse()
  82. res = {
  83. "confidence": conf,
  84. "card_type": str(image_type),
  85. "orientation": angle, # 原angle是逆时针,转成顺时针
  86. **ocr_res
  87. }
  88. print(res)
  89. return res