123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190 |
- import re
- from dataclasses import dataclass
- from enum import Enum
- from typing import Tuple, List
- import cv2
- import numpy as np
- from paddleocr import PaddleOCR
- from core.line_parser import LineParser
- from utils.time import timeit
- class Direction(Enum):
- TOP = 0
- RIGHT = 1
- BOTTOM = 2
- LEFT = 3
- class OcrAnchor(object):
- def __init__(self, name: str, d: List[Direction]):
- self.name = name
- # anchor位置
- self.direction = d
- def t_func(anchor, c, is_horizontal):
- if is_horizontal:
- return 0 if anchor[1] < c[1] else 2
- else:
- return 1 if anchor[0] > c[0] else 3
- def l_func(anchor, c, is_horizontal):
- if is_horizontal:
- return 0 if anchor[0] < c[0] else 2
- else:
- return 1 if anchor[1] < c[1] else 3
- def b_func(anchor, c, is_horizontal):
- if is_horizontal:
- return 0 if anchor[1] > c[1] else 2
- else:
- return 1 if anchor[0] < c[0] else 3
- def r_func(anchor, c, is_horizontal):
- if is_horizontal:
- return 0 if anchor[0] > c[0] else 2
- else:
- return 1 if anchor[1] > c[1] else 3
- self.direction_funcs = {
- Direction.TOP: t_func,
- Direction.BOTTOM: b_func,
- Direction.LEFT: l_func,
- Direction.RIGHT: r_func,
- }
- # 获取中心区域坐标 -> (x, y)
- def get_pic_center(self, res) -> Tuple[float, float]:
- """
- 获得整张图片的识别区域,
- 返回识别区域的中心点坐标
- """
- boxes = []
- for row in res:
- for r in row:
- boxes.extend(r.box)
- boxes = np.stack(boxes)
- l, t = np.min(boxes, 0)
- r, b = np.max(boxes, 0)
- return (l + r) / 2, (t + b) / 2
- def is_anchor(self, txt, box) -> bool:
- pass
- def find_anchor(self, res) -> Tuple[bool, float, float]:
- """
- 寻找锚点 中心点坐标
- """
- for row in res:
- for r in row:
- if self.is_anchor(r.txt, r.box):
- return True, r.center[0], r.center[1]
- return False, 0., 0.
- def locate_anchor(self, res, is_horizontal):
- found, a_cx, a_cy = self.find_anchor(res)
- cx, cy = self.get_pic_center(res)
- if found is False: raise Exception(f'识别不到anchor{self.name}')
- pre = None
- for d in self.direction:
- angle_func = self.direction_funcs.get(d, None)
- angle = angle_func((a_cx, a_cy), (cx, cy), is_horizontal)
- if pre is None:
- pre = angle
- else:
- if pre != angle:
- raise Exception('angle is not compatible')
- return pre
- # 子类0: 教育部学籍在线验证报告
- class ReportAnchor(OcrAnchor):
- def __init__(self, name: str, d: List[Direction]):
- super(ReportAnchor, self).__init__(name, d)
- def is_anchor(self, txt, box):
- txts = re.findall('查看该', txt) or re.findall('更新日期', txt)
- if len(txts) > 0:
- return True
- return False
- def locate_anchor(self, res, is_horizontal):
- return super(ReportAnchor, self).locate_anchor(res, is_horizontal)
- # 子类1: 教育部学历证书电子注册备案表
- class RecordAnchor(OcrAnchor):
- def __init__(self, name: str, d: List[Direction]):
- super(RecordAnchor, self).__init__(name, d)
- def is_anchor(self, txt, box):
- txts = re.findall('注册备案表', txt)
- if len(txts) > 0:
- return True
- return False
- def locate_anchor(self, res, is_horizontal):
- return super(RecordAnchor, self).locate_anchor(res, is_horizontal)
- # 子类2: 中国高等教育证书查询结果(零散查询)
- class ScattedAnchor(OcrAnchor):
- def __init__(self, name: str, d: List[Direction]):
- super(ScattedAnchor, self).__init__(name, d)
- def is_anchor(self, txt, box):
- txts = re.findall('教育学历', txt)
- if len(txts) > 0:
- return True
- return False
- def locate_anchor(self, res, is_horizontal):
- return super(ScattedAnchor, self).locate_anchor(res, is_horizontal)
- @timeit
- def detect_angle(result, ocr_anchor: OcrAnchor):
- lp = LineParser(result)
- res = lp.parse()
- print('------ angle ocr -------')
- print(res)
- print('------ angle ocr -------')
- is_horizontal = lp.is_horizontal
- return ocr_anchor.locate_anchor(res, is_horizontal)
- @dataclass
- class AngleDetector(object):
- """
- 角度检测器
- """
- ocr: PaddleOCR
- def detect_angle(self, img, image_type):
- image_type = int(image_type)
- if image_type == 0:
- ocr_anchor = ReportAnchor('0:教育部学历证书电子注册备案表', [Direction.TOP])
- elif image_type == 1:
- ocr_anchor = RecordAnchor('1:教育部学籍在线验证报告', [Direction.TOP])
- elif image_type == 2:
- ocr_anchor = ScattedAnchor('2:中国高等教育证书查询结果(零散查询)', [Direction.TOP])
- else:
- raise Exception('未传入 image_type')
- result = self.ocr.ocr(img, cls=True)
- try:
- angle = detect_angle(result, ocr_anchor)
- return angle, result
- except Exception as e:
- print(e)
- img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
- result = self.ocr.ocr(img, cls=True)
- angle = detect_angle(result, ocr_anchor)
- return (angle - 1 + 4) % 4, result
|