from dataclasses import dataclass import cv2 import numpy as np import math from paddleocr import PaddleOCR, draw_ocr from core.direction import * from core.line_parser import LineParser from core.parser import * from PIL import Image @dataclass class BankOcr: ocr: PaddleOCR angle_detector: AngleDetector def predict(self, image: np.ndarray): image, angle, ori_result = self._pre_process(image) print(f'---------- detect angle: {angle} 角度 --------') # 这里使用自己训练的检测识别模型,在此之前,理想情况下,所有的银行卡的角度都已经是0,(正向) _, _, result = self._ocr(image) # self.imshow(image, result) # 将检测图片保存 return self._post_process(result, angle) def imshow(self, image, result): img = Image.fromarray(image).convert("RGB") boxes = [line[0] for line in result] txts = [line[1][0] for line in result] scores = [line[1][1] for line in result] im_show = draw_ocr(img, boxes, txts, scores, font_path="./simfang.ttf") im_show = Image.fromarray(im_show) im_show.save("./img.jpg") def _pre_process(self, image: np.ndarray): angle, result = self.angle_detector.detect_angle(image) if angle == 1: image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE) if angle == 2: image = cv2.rotate(image, cv2.ROTATE_180) if angle == 3: image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE) # if -60 <= rotate_angle <= -20 or 20 <= rotate_angle <= 60: # print("需要旋转角度") # image = imutils.rotate(image, rotate_angle) # 因为有些img像素过大,导致检测框效果不好,识别就会出问题 h, w, _ = image.shape h_ratio = 1 if h <= 1000 else h / 1000 w_ratio = 1 if w <= 1000 else w / 1000 if h_ratio == 1 and w_ratio == 1: return image, angle, result elif h_ratio != 1 or w_ratio != 1: ratio = h_ratio if h_ratio > w_ratio else w_ratio image = cv2.resize(image, (w // math.ceil(ratio), h // math.ceil(ratio))) print(image.shape) return image, angle, result def _ocr(self, image): # 获取模型检测结果,因为是正的照片了,所以不需要方向分类器 result = self.ocr.ocr(image, cls=False) print("------------------") print("result:", result) print("------------------") # result=[] 就用官方再检测 if not result: print("需要再次进行官方的检测代码。。。。。。。。。。。。") result = self.angle_detector.origin_detect(image) # 如果还是空,那就检测不出来 if not result: raise Exception('经过两次检测都无法识别!!!') confs = [line[1][1] for line in result] txts = [line[1][0] for line in result] return txts, confs, result # result!=[] 就判断一些规则 if result: confs = [line[1][1] for line in result] print("自己的检测模型得到的conf:", confs) # 根绝len(result)分规则判断 if len(result) == 1: if confs[0] > 0.987: txts = [line[1][0] for line in result] return txts, confs, result else: print("len(result)=1时,再次用官方代码检测。。。。。。") result = self.angle_detector.origin_detect(image) elif len(result) == 2: # 1.判断两个检测框在不在一行 is_oneline = self.angle_detector.det_oneline(result) # 2.如果不在一行 if not is_oneline: txts = [line[1][0] for line in result] if not (any(map(lambda x: x > 0.987, confs)) and len(re.findall('\d{16,20}', txts)) > 0): print("len(result)=2,但是不在一行。。。。。。") result = self.angle_detector.origin_detect(image) # 3. 如果在一行 elif is_oneline: if all(map(lambda x: x > 0.987, confs)): l_box, r_box = [], [] l_box.extend(result[0][0]) r_box.extend(result[1][0]) l_max, _ = np.max(l_box, 0) r_min, _ = np.min(r_box, 0) if l_max > r_min: print("len(result)=2,在一行,但有重叠。。。。。。") result = self.angle_detector.origin_detect(image) else: print("len(result)=2,在一行,但有一个检测不行。。。。。。") result = self.angle_detector.origin_detect(image) elif len(result) > 2: print("len(result)=3,直接换官方检测。。。。。。") result = self.angle_detector.origin_detect(image) # elif len(result) == 2 and all(map(lambda x: x > 0.975, confs)): # l_box, r_box = [], [] # l_box.extend(result[0][0]) # r_box.extend(result[1][0]) # # l_max, _ = np.max(l_box, 0) # r_min, _ = np.min(r_box, 0) # # if l_max > r_min: # print("说明自己的检测模型不好") # result = self.angle_detector.origin_detect(image) # else: # # 一般情况下,len=1 # flag = 0 # if all(map(lambda x: x >= 0.975, confs)): # flag = 1 # # if flag == 0: # print("需要再次进行官方的检测代码。。。。。。。。。。。。") # result = self.angle_detector.origin_detect(image) # 如果还是空,那就检测不出来 if not result: raise Exception('经过两次检测都无法识别!!!') confs = [line[1][1] for line in result] # 将检测到的文字放到一个列表中 txts = [line[1][0] for line in result] return txts, confs, result def _post_process(self, raw_result, angle: int): # 把测试图片 喂给 OCR 返回给 self.raw_results line_parser = LineParser(raw_result) line_results = line_parser.parse() conf = line_parser.confidence parser = Parser(line_results) content = parser.parse() return { "confidence": conf, "orientation": angle, "number": content["number"].to_dict(), }