123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120 |
- from dataclasses import dataclass
- import cv2
- import numpy as np
- import math
- from paddleocr import PaddleOCR, draw_ocr
- from core.direction import *
- from core.line_parser import LineParser
- from core.parser import *
- from PIL import Image
- @dataclass
- class BankOcr:
- ocr: PaddleOCR
- angle_detector: AngleDetector
- def predict(self, image: np.ndarray):
- image, angle, ori_result = self._pre_process(image)
- print(f'---------- detect angle: {angle} 角度 --------')
- _, _, result = self._ocr(image)
- return self._post_process(result, angle)
- def imshow(self, image, result):
- img = Image.fromarray(image).convert("RGB")
- boxes = [line[0] for line in result]
- txts = [line[1][0] for line in result]
- scores = [line[1][1] for line in result]
- im_show = draw_ocr(img, boxes, txts, scores, font_path="./simfang.ttf")
- im_show = Image.fromarray(im_show)
- im_show.save("./img.jpg")
- def _pre_process(self, image: np.ndarray):
- angle, result = self.angle_detector.detect_angle(image)
- if angle == 1:
- image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
- if angle == 2:
- image = cv2.rotate(image, cv2.ROTATE_180)
- if angle == 3:
- image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
- h, w, _ = image.shape
- h_ratio = 1 if h <= 1000 else h / 1000
- w_ratio = 1 if w <= 1000 else w / 1000
- if h_ratio != 1 or w_ratio != 1:
- ratio = h_ratio if h_ratio > w_ratio else w_ratio
- image = cv2.resize(image, (w // math.ceil(ratio), h // math.ceil(ratio)))
- print(image.shape)
- return image, angle, result
- def _ocr(self, image):
- # 获取模型检测结果,因为是正的照片了,所以不需要方向分类器
- result = self.ocr.ocr(image, cls=False)
- print("------------------")
- print("result:", result)
- print("------------------")
- # result=[] 就用官方再检测
- if not result:
- result = self.angle_detector.origin_detect(image)
- # 如果还是空,那就检测不出来
- if not result:
- raise Exception('识别出错')
- confs = [line[1][1] for line in result]
- txts = [line[1][0] for line in result]
- return txts, confs, result
- if result:
- confs = [line[1][1] for line in result]
- if len(result) == 1:
- if confs[0] > 0.987:
- txts = [line[1][0] for line in result]
- return txts, confs, result
- else:
- result = self.angle_detector.origin_detect(image)
- elif len(result) == 2:
- is_oneline = self.angle_detector.det_oneline(result)
- if not is_oneline:
- txts = [line[1][0] for line in result]
- if not (any(map(lambda x: x > 0.987, confs)) and len(re.findall('\d{16,20}', txts)) > 0):
- result = self.angle_detector.origin_detect(image)
- elif is_oneline:
- if all(map(lambda x: x > 0.987, confs)):
- l_box, r_box = [], []
- l_box.extend(result[0][0])
- r_box.extend(result[1][0])
- l_max, _ = np.max(l_box, 0)
- r_min, _ = np.min(r_box, 0)
- if l_max > r_min:
- result = self.angle_detector.origin_detect(image)
- else:
- result = self.angle_detector.origin_detect(image)
- elif len(result) > 2:
- result = self.angle_detector.origin_detect(image)
- confs = [line[1][1] for line in result]
- txts = [line[1][0] for line in result]
- return txts, confs, result
- def _post_process(self, raw_result, angle: int):
- line_parser = LineParser(raw_result)
- line_results = line_parser.parse()
- conf = line_parser.confidence
- parser = Parser(line_results)
- content = parser.parse()
- return {
- "confidence": conf,
- "orientation": angle,
- "number": content["number"].to_dict(),
- }
|