|
@@ -0,0 +1,206 @@
|
|
|
+import re
|
|
|
+from dataclasses import dataclass
|
|
|
+from enum import Enum
|
|
|
+from typing import Tuple, List
|
|
|
+
|
|
|
+import cv2
|
|
|
+import numpy as np
|
|
|
+from paddleocr import PaddleOCR
|
|
|
+
|
|
|
+from core.line_parser import LineParser
|
|
|
+
|
|
|
+
|
|
|
+class Direction(Enum):
|
|
|
+ TOP = 0
|
|
|
+ RIGHT = 1
|
|
|
+ BOTTOM = 2
|
|
|
+ LEFT = 3
|
|
|
+
|
|
|
+
|
|
|
+# 父类
|
|
|
+class OcrAnchor(object):
|
|
|
+ # 输入识别anchor的名字, 如身份证号
|
|
|
+ def __init__(self, name: str, d: List[Direction]):
|
|
|
+ self.name = name
|
|
|
+ # anchor位置
|
|
|
+ self.direction = d
|
|
|
+
|
|
|
+ def t_func(anchor, c, is_horizontal):
|
|
|
+ if is_horizontal:
|
|
|
+ return 0 if anchor[1] < c[1] else 2
|
|
|
+ else:
|
|
|
+ return 1 if anchor[0] > c[0] else 3
|
|
|
+
|
|
|
+ def l_func(anchor, c, is_horizontal):
|
|
|
+ if is_horizontal:
|
|
|
+ return 0 if anchor[0] < c[0] else 2
|
|
|
+ else:
|
|
|
+ return 1 if anchor[1] < c[1] else 3
|
|
|
+
|
|
|
+ def b_func(anchor, c, is_horizontal):
|
|
|
+ if is_horizontal:
|
|
|
+ return 0 if anchor[1] > c[1] else 2
|
|
|
+ else:
|
|
|
+ return 1 if anchor[0] < c[0] else 3
|
|
|
+
|
|
|
+ def r_func(anchor, c, is_horizontal):
|
|
|
+ if is_horizontal:
|
|
|
+ return 0 if anchor[0] > c[0] else 2
|
|
|
+ else:
|
|
|
+ return 1 if anchor[1] > c[1] else 3
|
|
|
+
|
|
|
+ self.direction_funcs = {
|
|
|
+ Direction.TOP: t_func,
|
|
|
+ Direction.BOTTOM: b_func,
|
|
|
+ Direction.LEFT: l_func,
|
|
|
+ Direction.RIGHT: r_func,
|
|
|
+ }
|
|
|
+
|
|
|
+ # 获取中心区域坐标 -> (x, y)
|
|
|
+ def get_rec_area(self, res) -> Tuple[float, float]:
|
|
|
+ """获得整张身份证的识别区域, 返回识别区域的中心点"""
|
|
|
+ boxes = []
|
|
|
+ for row in res:
|
|
|
+ for r in row:
|
|
|
+ boxes.extend(r.box)
|
|
|
+ boxes = np.stack(boxes)
|
|
|
+ l, t = np.min(boxes, 0)
|
|
|
+ r, b = np.max(boxes, 0)
|
|
|
+ # 识别区域的box
|
|
|
+ # big_box = [[l, t], [r, t], [r, b], [l, b]]
|
|
|
+ # w, h = (r - l, b - t)
|
|
|
+ return (l + r) / 2, (t + b) / 2
|
|
|
+
|
|
|
+ # 判断是否是 锚点
|
|
|
+ def is_anchor(self, txt, box) -> bool:
|
|
|
+ pass
|
|
|
+
|
|
|
+ # 找 锚点 -> 锚点坐标
|
|
|
+ def find_anchor(self, res) -> Tuple[bool, float, float]:
|
|
|
+ """
|
|
|
+ 寻找锚点 中心点坐标
|
|
|
+ """
|
|
|
+ for row in res:
|
|
|
+ for r in row:
|
|
|
+ txt = r.txt.replace('-', '').replace(' ', '')
|
|
|
+ box = r.box
|
|
|
+ if self.is_anchor(txt, box):
|
|
|
+ l, t = np.min(box, 0)
|
|
|
+ r, b = np.max(box, 0)
|
|
|
+ return True, (l + r) / 2, (t + b) / 2
|
|
|
+ return False, 0., 0.
|
|
|
+
|
|
|
+ # 定位 锚点 -> 角度
|
|
|
+ # -> 锚点(x, y) pic(x, y) is_horizontal
|
|
|
+ def locate_anchor(self, res, is_horizontal) -> int:
|
|
|
+ found, id_cx, id_cy = self.find_anchor(res)
|
|
|
+
|
|
|
+ # 如果识别不到身份证号
|
|
|
+ if not found: raise Exception(f'识别不到anchor{self.name}')
|
|
|
+ cx, cy = self.get_rec_area(res)
|
|
|
+ # print(f'id_cx: {id_cx}, id_cy: {id_cy}')
|
|
|
+ # print(f'cx: {cx}, cy: {cy}')
|
|
|
+ pre = None
|
|
|
+ for d in self.direction:
|
|
|
+ f = self.direction_funcs.get(d, None)
|
|
|
+ angle = f((id_cx, id_cy), (cx, cy), is_horizontal)
|
|
|
+ if pre is None:
|
|
|
+ pre = angle
|
|
|
+ else:
|
|
|
+ if angle != pre:
|
|
|
+ raise Exception('angle is not compatiable')
|
|
|
+ return pre
|
|
|
+
|
|
|
+ # if is_horizontal:
|
|
|
+ # # 如果是水平的,身份证号的位置在相对识别区域的下方,方向则为0度,否则是180度
|
|
|
+ # return 0 if id_cy > cy else 2
|
|
|
+ # else:
|
|
|
+ # # 如果是竖直的,身份证号的相对位置如果在左边,方向为90度,否则270度
|
|
|
+ # return 1 if id_cx < cx else 3
|
|
|
+
|
|
|
+
|
|
|
+# 子类1 人像面
|
|
|
+class CETAnchor(OcrAnchor):
|
|
|
+ def __init__(self, name: str, d: List[Direction]):
|
|
|
+ super(CETAnchor, self).__init__(name, d)
|
|
|
+
|
|
|
+ def is_anchor(self, txt, box) -> bool:
|
|
|
+ txts = re.findall('全国大学英语', txt)
|
|
|
+ if len(txts) > 0:
|
|
|
+ return True
|
|
|
+ return False
|
|
|
+
|
|
|
+ def locate_anchor(self, res, is_horizontal) -> int:
|
|
|
+ return super(CETAnchor, self).locate_anchor(res, is_horizontal)
|
|
|
+
|
|
|
+
|
|
|
+# 子类2 国徽面
|
|
|
+class TEMAnchor(OcrAnchor):
|
|
|
+ def __init__(self, name: str, d: List[Direction]):
|
|
|
+ super(TEMAnchor, self).__init__(name, d)
|
|
|
+
|
|
|
+ def is_anchor(self, txt, box) -> bool:
|
|
|
+ txts = re.findall('证书编号', txt)
|
|
|
+ if len(txts) > 0:
|
|
|
+ return True
|
|
|
+ return False
|
|
|
+
|
|
|
+ def locate_anchor(self, res, is_horizontal) -> int:
|
|
|
+ return super(TEMAnchor, self).locate_anchor(res, is_horizontal)
|
|
|
+
|
|
|
+
|
|
|
+# 调用以上 🔧工具
|
|
|
+# <- ocr_生数据
|
|
|
+# == ocr_熟数据(行处理后)
|
|
|
+# -> 角度0/1/2/3
|
|
|
+def detect_angle(result, ocr_anchor: OcrAnchor):
|
|
|
+ filters = [lambda x: x.is_slope, lambda x: x.txt.replace(' ', '').encode('utf-8').isalpha()]
|
|
|
+ lp = LineParser(result, filters)
|
|
|
+ res = lp.parse()
|
|
|
+ print('------ angle ocr -------')
|
|
|
+ print(res)
|
|
|
+ print('------ angle ocr -------')
|
|
|
+ is_horizontal = lp.is_horizontal
|
|
|
+ return ocr_anchor.locate_anchor(res, is_horizontal)
|
|
|
+
|
|
|
+
|
|
|
+@dataclass
|
|
|
+class AngleDetector(object):
|
|
|
+ """
|
|
|
+ 角度检测器
|
|
|
+ """
|
|
|
+ ocr: PaddleOCR
|
|
|
+
|
|
|
+ # 角度检测器
|
|
|
+ # <- img(cv2格式) img_type
|
|
|
+ # == result <- img(cv2)
|
|
|
+ # -> angle result(ocr生)
|
|
|
+ def detect_angle(self, img):
|
|
|
+ # image_type = int(image_type)
|
|
|
+ # result = self.ocr.ocr(img, cls=True)
|
|
|
+
|
|
|
+ image_type, result = self.detect_img(img)
|
|
|
+
|
|
|
+ ocr_anchor = CETAnchor('CET', [Direction.TOP]) if image_type == 0 else TEMAnchor('TEM', [
|
|
|
+ Direction.BOTTOM])
|
|
|
+
|
|
|
+ try:
|
|
|
+ angle = detect_angle(result, ocr_anchor)
|
|
|
+ return angle, result, image_type
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ print(e)
|
|
|
+ # 如果第一次识别不到,旋转90度再识别
|
|
|
+ img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
|
|
|
+ result = self.ocr.ocr(img, cls=True)
|
|
|
+ angle = detect_angle(result, ocr_anchor)
|
|
|
+ # 旋转90度之后要重新计算角度
|
|
|
+ return (angle - 1 + 4) % 4, result, image_type
|
|
|
+
|
|
|
+ def detect_img(self, img):
|
|
|
+ result = self.ocr.ocr(img, cls=True)
|
|
|
+ for res in result:
|
|
|
+ if "报告单" in res[1][0]:
|
|
|
+ return 0, result
|
|
|
+ raise Exception("不支持专四专八")
|
|
|
+ # return 1, result
|