from dataclasses import dataclass from typing import Any from core.line_parser import LineParser from core.parser import * from core.direction import * import numpy as np from paddleocr import PaddleOCR # <- 传入pic pic_type # 1. 旋转pic (to 正向) # 2. 重写识别pic (get res) # 3. 行处理res (get res) # 4. 对res字段逻辑识别 (get dict) # -> dict # 身份证OCR @dataclass class CetOcr: ocr: PaddleOCR # 角度探测器 angle_detector: AngleDetector # 检测 # <- 传入pic pic_type # -> dict def predict(self, image: np.ndarray) -> (): # 旋转后img angle result(生ocr) image, angle, result, image_type = self._pre_process(image) print(f'---------- detect angle: {angle} 角度 --------') return self._post_process(result, angle, image_type) # 预处理(旋转图片) # <- img(cv2) img_type # -> 正向的img(旋转后) 源img角度 result(ocr生) def _pre_process(self, image) -> (np.ndarray, int, Any): # pic角度 result(ocr生) angle, result, image_type = self.angle_detector.detect_angle(image) if angle == 1: image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE) print(angle) # 逆时针 if angle == 2: image = cv2.rotate(image, cv2.ROTATE_180) if angle == 3: image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE) return image, angle, result, image_type # 获取模型检测结果 def _ocr(self, image): result = self.ocr.ocr(image, cls=True) print("------------------") print(result) if not result: raise Exception('无法识别') confs = [line[1][1] for line in result] # 将检测到的文字放到一个列表中 txts = [line[1][0] for line in result] # print("......................................") # print(txts) # print("......................................") return txts, confs, result # <- result(正向img_生ocr) angle img_type # == 对 正向img_res 进行[行处理] # -> 最后要返回的结果 dict def _post_process(self, result, angle: int, image_type): filters = [lambda x: x.is_slope, lambda x: x.txt.replace(' ', '').encode('utf-8').isalpha()] line_parser = LineParser(result, filters) line_result = line_parser.parse() print('-------------') print(line_result) print('-------------') conf = line_parser.confidence if int(image_type) == 0: parser = CETParser(line_result) elif int(image_type) == 1: parser = TEMParser(line_result) else: raise Exception('无法识别') # 字段逻辑处理后对res(dict) ocr_res = parser.parse() res = { "confidence": conf, "card_type": str(image_type), "orientation": angle, # 原angle是逆时针,转成顺时针 **ocr_res } print(res) return res # def _get_type(self, image) -> int: