sxwl_DL
/
hr-ocr-idcard


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
							import re
from dataclasses import dataclass
from typing import Tuple

import cv2
import numpy as np
from paddleocr import PaddleOCR

from core.line_parser import LineParser


class OcrAnchor(object):
    # 输入识别anchor的名字, 如身份证号
    def __init__(self, name: str):
        self.name = name

    def get_rec_area(self, res) -> Tuple[float, float]:
        """获得整张身份证的识别区域, 返回识别区域的中心点"""
        boxes = []
        for row in res:
            for r in row:
                boxes.extend(r.box)
        boxes = np.stack(boxes)
        l, t = np.min(boxes, 0)
        r, b = np.max(boxes, 0)
        # 识别区域的box
        # big_box = [[l, t], [r, t], [r, b], [l, b]]
        # w, h = (r - l, b - t)
        return (l + r) / 2, (t + b) / 2

    def is_anchor(self, txt, box) -> bool:
        pass

    def find_anchor(self, res) -> Tuple[bool, float, float]:
        """寻找身份证号的识别区域以及中心点，根据身份证的w > h判断是否水平"""
        for row in res:
            for r in row:
                txt = r.txt.replace('-', '').replace(' ', '')
                box = r.box
                if self.is_anchor(txt, box):
                    l, t = np.min(box, 0)
                    r, b = np.max(box, 0)
                    return True, (l + r) / 2, (t + b) / 2
        return False, 0., 0.

    def locate_anchor(self, res, is_horizontal) -> int:
        found, id_cx, id_cy = self.find_anchor(res)

        # 如果识别不到身份证号
        if not found: raise Exception(f'识别不到anchor{self.name}')
        cx, cy = self.get_rec_area(res)
        # print(f'id_cx: {id_cx}, id_cy: {id_cy}')
        # print(f'cx: {cx}, cy: {cy}')
        if is_horizontal:
            # 如果是水平的，身份证号的位置在相对识别区域的下方，方向则为0度，否则是180度
            return 0 if id_cy > cy else 2
        else:
            # 如果是竖直的，身份证号的相对位置如果在左边，方向为90度，否则270度
            return 1 if id_cx < cx else 3


class FrontSideAnchor(OcrAnchor):
    def __init__(self, name: str):
        super(FrontSideAnchor, self).__init__(name)

    def is_anchor(self, txt, box) -> bool:
        txts = re.findall('\d{10,18}', txt)
        if len(txts) > 0:
            return True
        return False

    def locate_anchor(self, res, is_horizontal) -> int:
        return super(FrontSideAnchor, self).locate_anchor(res, is_horizontal)

class BackSideAnchor(OcrAnchor):
    def __init__(self, name: str):
        super(BackSideAnchor, self).__init__(name)

    def is_anchor(self, txt, box) -> bool:
        txt = txt.replace('.', '')
        txts = re.findall('有效期', txt)
        if len(txts) > 0:
            return True
        return False

    def locate_anchor(self, res, is_horizontal) -> int:
        return super(BackSideAnchor, self).locate_anchor(res, is_horizontal)


def detect_angle(result, ocr_anchor: OcrAnchor):
    lp = LineParser(result)
    res = lp.parse()
    print('------ angle ocr -------')
    print(res)
    print('------ angle ocr -------')
    is_horizontal = lp.is_horizontal
    return ocr_anchor.locate_anchor(res, is_horizontal)


@dataclass
# 角度检测器
class AngleDetector(object):
    ocr: PaddleOCR

    def detect_angle(self, img, image_type):
        image_type = int(image_type)
        ocr_anchor = BackSideAnchor('有效期') if image_type != 0 else FrontSideAnchor('身份证号')
        result = self.ocr.ocr(img, cls=True)

        try:
            angle = detect_angle(result, ocr_anchor)
            return angle, result

        except Exception as e:
            print(e)
            # 如果第一次识别不到，旋转90度再识别
            img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
            result = self.ocr.ocr(img, cls=True)
            angle = detect_angle(result, ocr_anchor)
            # 旋转90度之后要重新计算角度
            return (angle - 1 + 4) % 4, result

    def _detect_back(self, image):
        mask = np.zeros(image.shape, dtype=np.uint8)
        gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        blur = cv2.GaussianBlur(gray, (3, 3), 0)
        adaptive = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 15, 4)

        cnts = cv2.findContours(adaptive, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        cnts = cnts[0] if len(cnts) == 2 else cnts[1]

        for c in cnts:
            area = cv2.contourArea(c)
            if area < 45000 and area > 20:
                cv2.drawContours(mask, [c], -1, (255, 255, 255), -1)

        mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
        h, w = mask.shape

        # Horizontal
        if w > h:
            left = mask[0:h, 0:0 + w // 2]
            right = mask[0:h, w // 2:]
            left_pixels = cv2.countNonZero(left)
            right_pixels = cv2.countNonZero(right)
            print(f'left: {left_pixels}, right: {right_pixels}')
            angle = 0 if left_pixels >= right_pixels else 2
        # Vertical
        else:
            top = mask[0:h // 2, 0:w]
            bottom = mask[h // 2:, 0:w]
            top_pixels = cv2.countNonZero(top)
            bottom_pixels = cv2.countNonZero(bottom)
            print(f'top: {top_pixels}, bottom: {bottom_pixels}')
            angle = 1 if bottom_pixels <= top_pixels else 3
        return angle, None