sxwl_DL
/
hr-ocr-bankcard


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
							from dataclasses import dataclass

import cv2
import numpy as np
import math
from paddleocr import PaddleOCR, draw_ocr

from core.direction import *
from core.line_parser import LineParser
from core.parser import *

from PIL import Image


@dataclass
class BankOcr:
    ocr: PaddleOCR
    angle_detector: AngleDetector

    def predict(self, image: np.ndarray):
        image, angle, ori_result = self._pre_process(image)
        print(f'---------- detect angle: {angle} 角度 --------')
        # 这里使用自己训练的检测识别模型，在此之前，理想情况下，所有的银行卡的角度都已经是0，（正向）
        _, _, result = self._ocr(image)
        # self.imshow(image, result)  # 将检测图片保存
        return self._post_process(result, angle)

    def imshow(self, image, result):
        img = Image.fromarray(image).convert("RGB")
        boxes = [line[0] for line in result]
        txts = [line[1][0] for line in result]
        scores = [line[1][1] for line in result]
        im_show = draw_ocr(img, boxes, txts, scores, font_path="./simfang.ttf")
        im_show = Image.fromarray(im_show)
        im_show.save("./img.jpg")

    def _pre_process(self, image: np.ndarray):
        angle, result = self.angle_detector.detect_angle(image)

        if angle == 1:
            image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
        if angle == 2:
            image = cv2.rotate(image, cv2.ROTATE_180)
        if angle == 3:
            image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)

        # if -60 <= rotate_angle <= -20 or 20 <= rotate_angle <= 60:
        #     print("需要旋转角度")
        #     image = imutils.rotate(image, rotate_angle)

        # 因为有些img像素过大，导致检测框效果不好，识别就会出问题
        h, w, _ = image.shape
        h_ratio = 1 if h <= 1000 else h / 1000
        w_ratio = 1 if w <= 1000 else w / 1000

        if h_ratio == 1 and w_ratio == 1:
            return image, angle, result
        elif h_ratio != 1 or w_ratio != 1:
            ratio = h_ratio if h_ratio > w_ratio else w_ratio
            image = cv2.resize(image, (w // math.ceil(ratio), h // math.ceil(ratio)))
            print(image.shape)

            return image, angle, result

    def _ocr(self, image):
        # 获取模型检测结果,因为是正的照片了，所以不需要方向分类器
        result = self.ocr.ocr(image, cls=False)
        print("------------------")
        print("result:", result)
        print("------------------")

        # result=[] 就用官方再检测
        if not result:
            print("需要再次进行官方的检测代码。。。。。。。。。。。。")
            result = self.angle_detector.origin_detect(image)
            # 如果还是空，那就检测不出来
            if not result:
                raise Exception('经过两次检测都无法识别！！！')

            confs = [line[1][1] for line in result]
            txts = [line[1][0] for line in result]
            return txts, confs, result

        # result!=[] 就判断一些规则
        if result:
            confs = [line[1][1] for line in result]
            print("自己的检测模型得到的conf：", confs)

            # 根绝len(result)分规则判断
            if len(result) == 1:
                if confs[0] > 0.987:
                    txts = [line[1][0] for line in result]
                    return txts, confs, result
                else:
                    print("len(result)=1时，再次用官方代码检测。。。。。。")
                    result = self.angle_detector.origin_detect(image)
            elif len(result) == 2:
                # 1.判断两个检测框在不在一行
                is_oneline = self.angle_detector.det_oneline(result)
                # 2.如果不在一行
                if not is_oneline:
                    txts = [line[1][0] for line in result]
                    if not (any(map(lambda x: x > 0.987, confs)) and len(re.findall('\d{16,20}', txts)) > 0):
                        print("len(result)=2,但是不在一行。。。。。。")
                        result = self.angle_detector.origin_detect(image)
                # 3. 如果在一行
                elif is_oneline:
                    if all(map(lambda x: x > 0.987, confs)):
                        l_box, r_box = [], []
                        l_box.extend(result[0][0])
                        r_box.extend(result[1][0])

                        l_max, _ = np.max(l_box, 0)
                        r_min, _ = np.min(r_box, 0)

                        if l_max > r_min:
                            print("len(result)=2,在一行，但有重叠。。。。。。")
                            result = self.angle_detector.origin_detect(image)
                    else:
                        print("len(result)=2,在一行，但有一个检测不行。。。。。。")
                        result = self.angle_detector.origin_detect(image)
            elif len(result) > 2:
                print("len(result)=3,直接换官方检测。。。。。。")
                result = self.angle_detector.origin_detect(image)

            # elif len(result) == 2 and all(map(lambda x: x > 0.975, confs)):
            #     l_box, r_box = [], []
            #     l_box.extend(result[0][0])
            #     r_box.extend(result[1][0])
            #
            #     l_max, _ = np.max(l_box, 0)
            #     r_min, _ = np.min(r_box, 0)
            #
            #     if l_max > r_min:
            #         print("说明自己的检测模型不好")
            #         result = self.angle_detector.origin_detect(image)
            # else:
            #     # 一般情况下，len=1
            #     flag = 0
            #     if all(map(lambda x: x >= 0.975, confs)):
            #         flag = 1
            #
            #     if flag == 0:
            #         print("需要再次进行官方的检测代码。。。。。。。。。。。。")
            #         result = self.angle_detector.origin_detect(image)

        # 如果还是空，那就检测不出来
        if not result:
            raise Exception('经过两次检测都无法识别！！！')

        confs = [line[1][1] for line in result]
        # 将检测到的文字放到一个列表中
        txts = [line[1][0] for line in result]
        return txts, confs, result

    def _post_process(self, raw_result, angle: int):
        # 把测试图片 喂给 OCR 返回给 self.raw_results
        line_parser = LineParser(raw_result)
        line_results = line_parser.parse()
        conf = line_parser.confidence
        parser = Parser(line_results)
        content = parser.parse()

        return {
            "confidence": conf,
            "orientation": angle,
            "number": content["number"].to_dict(),
        }