chenguilong
/
hr-ocr-cet


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
							import operator
from pathlib import Path
from typing import List

import numpy as np
from mdutils.mdutils import MdUtils
import cv2
import requests
from dataclasses import dataclass
import json
import time
import base64
from itertools import chain


class MarkdownTable(object):
    def __init__(self, name):
        self.name = name
        self.mdFile = MdUtils(file_name=time.strftime("%m-%d", time.localtime()) + name)
        self.field_table = ['字段', '正确率']
        self.true_table = ['图片', '识别结果']
        self.false_table = ['图片', '识别结果']

    def add_field_table(self, fields: List):
        self.field_table.extend(fields)

    def add_true_table(self, image_and_field: List):
        self.true_table.extend(image_and_field)

    def add_false_table(self, image_and_field: List):
        self.false_table.extend(image_and_field)


@dataclass
class Image:
    path: Path
    rotate: int


    @property
    def fn(self):
        return self.path.stem

    @property
    def json_path(self):
        return self.path.parent / f'{self.path.stem}.json'

    def get_base64(self, rotate=0):
        return 'dsf'


class DataSet(object):
    def __init__(self, image_path, rotate=False):
        self.image_path = image_path
        self.image_list = []
        for p in chain(*[Path(self.image_path).rglob('*.jpg')]):
            self.image_list.append(Image(p, 0))

        self.attrs = ['orientation','name','id','language', 'level', 'exam_time', 'score']

        self.tp = {k: 0 for k in self.attrs}


        # self.field_rate = {
        #     'orientation': self.count,
        #     'name': self.count,
        #     'id': self.count,
        #     'language': self.count,
        #     'level': self.count,
        #     'exam_time': self.count,
        #     'score': self.count,
        # }
        # self.del_field = del_field


    def _evaluate_one(self, image: Image):


    @property
    def count(self):
        return len(list(chain(*[self.image_paths.rglob('*.jpg')]))) * 4 if self.is_rotate else 1

    @property
    def images(self):
        images_path_list = list(chain(*[Path(self.image_paths).rglob('*.jpg')]))
        images_path_dict = {path.name: [str(path), str(path.parent / f'{path.stem}.json')] for path in images_path_list}
        return {i: images_path_dict[i] for i in sorted(images_path_dict)}

    def revise_field_rate(self, field):
        self.field_rate[field] = self.field_rate[field] - 1

    @property
    def field_rate_2_list(self):
        table = []
        for k, v in self.field_rate.items():
            table.extend((k, "{:.2f}%".format(v / self.count * 100)))
        return table

    def image_2_base64(self, image):
        dire = self.image_paths.parent / (".ro_dire")
        if not dire.exists(): dire.mkdir()

        image_path = Path(self.images[image][0])

        if self.is_rotate:
            img = cv2.imread(str(image_path))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            base64_imgs = []
            for rotate in {cv2.ROTATE_90_CLOCKWISE, cv2.ROTATE_180, cv2.ROTATE_90_COUNTERCLOCKWISE}:
                img_rotated = cv2.rotate(img, rotate)
                img_rotated = cv2.cvtColor(img_rotated, cv2.COLOR_BGR2RGB)
                img_rotated_path = dire / f"{image_path.stem}_{str(rotate + 1)}.jpg"
                cv2.imread(str(img_rotated_path), img_rotated)
                with img_rotated_path.open('rb') as f:
                    img_str: str = base64.encodebytes(f.read()).decode('utf-8')
                    base64_imgs.extend(img_str)
            return base64_imgs
        else:
            with image_path.open('rb') as f:
                img_str: str = base64.encodebytes(f.read()).decode('utf-8')
                return [img_str]

    def res_2_dict(self, r):
        if r['status'] == '000':
            r = r['result']
            if r:
                del r['confidence']
            if self.del_field is not None: del r[self.del_field]
            return {k: v['text'] if isinstance(v, dict) else v for k, v in r.items()}
        elif r['status'] == '101':
            return r['msg']

    def json_2_dict(self, image):
        json_path = Path(self.images[image][1])
        with json_path.open('r') as f:
            json_dict = json.load(f)
            if self.del_field is not None: del json_dict[self.del_field]
            return json_dict

    def compare_dict(self, MT: MarkdownTable, res_dict, json_dict, image_path):
        image_mark = MT.mdFile.new_inline_image(text='', path=image_path)
        if operator.eq(res_dict, json_dict):
            MT.add_true_table([image_mark, json_dict])
        elif type(res_dict) == dict:
            err_str = ""
            for key in res_dict:
                if res_dict[key] != res_dict[key]:
                    err_str = f"{err_str}正确:{res_dict[key]}<br>返回:{res_dict[key]}<br>"
                    self.revise_field_rate(key)
            MT.add_false_table(([image_mark, err_str]))
        elif type(res_dict) == str:
            MT.add_false_table([image_mark, res_dict])