from dataclasses import dataclass import cv2 import numpy as np import math from paddleocr import PaddleOCR, draw_ocr from core.direction import * from core.line_parser import LineParser from core.parser import * from PIL import Image @dataclass class BankOcr: ocr: PaddleOCR angle_detector: AngleDetector def predict(self, image: np.ndarray): image, angle, ori_result = self._pre_process(image) print(f'---------- detect angle: {angle} 角度 --------') _, _, result = self._ocr(image) return self._post_process(result, angle) def imshow(self, image, result): img = Image.fromarray(image).convert("RGB") boxes = [line[0] for line in result] txts = [line[1][0] for line in result] scores = [line[1][1] for line in result] im_show = draw_ocr(img, boxes, txts, scores, font_path="./simfang.ttf") im_show = Image.fromarray(im_show) im_show.save("./img.jpg") def _pre_process(self, image: np.ndarray): angle, result = self.angle_detector.detect_angle(image) if angle == 1: image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE) if angle == 2: image = cv2.rotate(image, cv2.ROTATE_180) if angle == 3: image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE) h, w, _ = image.shape h_ratio = 1 if h <= 1000 else h / 1000 w_ratio = 1 if w <= 1000 else w / 1000 if h_ratio != 1 or w_ratio != 1: ratio = h_ratio if h_ratio > w_ratio else w_ratio image = cv2.resize(image, (w // math.ceil(ratio), h // math.ceil(ratio))) print(image.shape) return image, angle, result def _ocr(self, image): # 获取模型检测结果,因为是正的照片了,所以不需要方向分类器 result = self.ocr.ocr(image, cls=False) print("------------------") print("result:", result) print("------------------") # result=[] 就用官方再检测 if not result: result = self.angle_detector.origin_detect(image) # 如果还是空,那就检测不出来 if not result: raise Exception('识别出错') confs = [line[1][1] for line in result] txts = [line[1][0] for line in result] return txts, confs, result if result: confs = [line[1][1] for line in result] if len(result) == 1: if confs[0] > 0.987: txts = [line[1][0] for line in result] return txts, confs, result else: result = self.angle_detector.origin_detect(image) elif len(result) == 2: is_oneline = self.angle_detector.det_oneline(result) if not is_oneline: txts = [line[1][0] for line in result] if not (any(map(lambda x: x > 0.987, confs)) and len(re.findall('\d{16,20}', txts)) > 0): result = self.angle_detector.origin_detect(image) elif is_oneline: if all(map(lambda x: x > 0.987, confs)): l_box, r_box = [], [] l_box.extend(result[0][0]) r_box.extend(result[1][0]) l_max, _ = np.max(l_box, 0) r_min, _ = np.min(r_box, 0) if l_max > r_min: result = self.angle_detector.origin_detect(image) else: result = self.angle_detector.origin_detect(image) elif len(result) > 2: result = self.angle_detector.origin_detect(image) confs = [line[1][1] for line in result] txts = [line[1][0] for line in result] return txts, confs, result def _post_process(self, raw_result, angle: int): line_parser = LineParser(raw_result) line_results = line_parser.parse() conf = line_parser.confidence parser = Parser(line_results) content = parser.parse() return { "confidence": conf, "orientation": angle, "number": content["number"].to_dict(), }