from dataclasses import dataclass import cv2 from core.line_parser import LineParser from core.parser import * from core.direction import * import numpy as np from paddleocr import PaddleOCR # <- 传入pic pic_type # 1. 旋转pic (to 正向) # 2. 重写识别pic (get res) # 3. 行处理res (get res) # 4. 对res字段逻辑识别 (get dict) # -> dict from core.ranks_parse import * @dataclass class IdCardOcr: ocr: PaddleOCR # 角度探测器 angle_detector: AngleDetector # master def predict(self, image: np.ndarray, image_type: str): img_type = int(image_type) image, angle, result = self._rotate_img(image, img_type) print(f'---------- detect angle: {angle} 图片角度 ----------') if img_type == 0 and angle != 0 or img_type != 0: # 角度不为0需要重新识别,字面 _, _, result = self._ocr(image) return self._post_process(result, angle, image_type) # 检测角度 def _rotate_img(self, image, image_type) -> (np.ndarray, int): angle, result = self.angle_detector.detect_angle(image, image_type) if angle == 1: image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE) if angle == 2: image = cv2.rotate(image, cv2.ROTATE_180) if angle == 3: image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE) if image_type == 1: image = image[:image.shape[0] // 2] # image = cv2.resize(image, (int(image.shape[1] * 0.9999), int(image.shape[0] * 0.999)),interpolation=cv2.INTER_CUBIC) print(angle) return image, angle, result def _ocr(self, image): # 获取模型检测结果 result = self.ocr.ocr(image, cls=True) print("------------------") print(result) if not result: raise Exception('无法识别') confs = [line[1][1] for line in result] # 将检测到的文字放到一个列表中 txts = [line[1][0] for line in result] print("......................................") print(txts) print("......................................") return txts, confs, result def _post_process(self, result, angle: int, image_type: str): # sourcery skip: inline-immediately-returned-variable, remove-unnecessary-cast # 行处理 # line_parser = LineParser(result) # line_result = line_parser.parse() # conf = line_parser.confidence # 0:常驻人口面 # 1:户口本首页 if int(image_type) == 0: ranks_result = PeopleParser(result).parse() conf = PeopleParser(result).confidence parser = PeopleRegBookParser(ranks_result, LineParser(result).parse()) elif int(image_type) == 1: # result_ = FrontParser(result).parse_f() or LineParser(result).parse() conf = FrontParser(result).confidence parser = FrontRegBookParser(FrontParser(result).parse_f(), LineParser(result).parse()) else: raise Exception('未传入 image_type') # 字段逻辑处理后对res(dict) ocr_res = parser.parse() res = { "confidence": conf, "img_type": str(image_type), "orientation": angle, # 原angle是逆时针,转成顺时针 **ocr_res } return res