ocr.py 3.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. from dataclasses import dataclass
  2. import cv2
  3. from core.line_parser import LineParser
  4. from core.parser import *
  5. from core.direction import *
  6. import numpy as np
  7. from paddleocr import PaddleOCR
  8. # <- 传入pic pic_type
  9. # 1. 旋转pic (to 正向)
  10. # 2. 重写识别pic (get res)
  11. # 3. 行处理res (get res)
  12. # 4. 对res字段逻辑识别 (get dict)
  13. # -> dict
  14. from core.ranks_parse import *
  15. @dataclass
  16. class IdCardOcr:
  17. ocr: PaddleOCR
  18. # 角度探测器
  19. angle_detector: AngleDetector
  20. # master
  21. def predict(self, image: np.ndarray, image_type: str):
  22. img_type = int(image_type)
  23. image, angle, result = self._rotate_img(image, img_type)
  24. print(f'---------- detect angle: {angle} 图片角度 ----------')
  25. if img_type == 0 and angle != 0 or img_type != 0:
  26. # 角度不为0需要重新识别,字面
  27. _, _, result = self._ocr(image)
  28. return self._post_process(result, angle, image_type)
  29. # 检测角度
  30. def _rotate_img(self, image, image_type) -> (np.ndarray, int):
  31. angle, result = self.angle_detector.detect_angle(image, image_type)
  32. if angle == 1:
  33. image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
  34. if angle == 2:
  35. image = cv2.rotate(image, cv2.ROTATE_180)
  36. if angle == 3:
  37. image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
  38. if image_type == 1:
  39. image = image[:image.shape[0] // 2]
  40. # image = cv2.resize(image, (int(image.shape[1] * 0.9999), int(image.shape[0] * 0.999)),interpolation=cv2.INTER_CUBIC)
  41. print(angle)
  42. return image, angle, result
  43. def _ocr(self, image):
  44. # 获取模型检测结果
  45. result = self.ocr.ocr(image, cls=True)
  46. print("------------------")
  47. print(result)
  48. if not result:
  49. raise Exception('无法识别')
  50. confs = [line[1][1] for line in result]
  51. # 将检测到的文字放到一个列表中
  52. txts = [line[1][0] for line in result]
  53. print("......................................")
  54. print(txts)
  55. print("......................................")
  56. return txts, confs, result
  57. def _post_process(self, result, angle: int, image_type: str):
  58. # sourcery skip: inline-immediately-returned-variable, remove-unnecessary-cast
  59. # 行处理
  60. # line_parser = LineParser(result)
  61. # line_result = line_parser.parse()
  62. # conf = line_parser.confidence
  63. # 0:常驻人口面
  64. # 1:户口本首页
  65. if int(image_type) == 0:
  66. ranks_result = PeopleParser(result).parse()
  67. conf = PeopleParser(result).confidence
  68. parser = PeopleRegBookParser(ranks_result, LineParser(result).parse())
  69. elif int(image_type) == 1:
  70. # result_ = FrontParser(result).parse_f() or LineParser(result).parse()
  71. conf = FrontParser(result).confidence
  72. parser = FrontRegBookParser(FrontParser(result).parse_f(), LineParser(result).parse())
  73. else:
  74. raise Exception('未传入 image_type')
  75. # 字段逻辑处理后对res(dict)
  76. ocr_res = parser.parse()
  77. res = {
  78. "confidence": conf,
  79. "img_type": str(image_type),
  80. "orientation": angle, # 原angle是逆时针,转成顺时针
  81. **ocr_res
  82. }
  83. return res