2 роки тому · 6571a67e62
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 
				 .DS_Store
			
 
				 .idea
			
 
				+.vscode
			
 
				 __pycache__/
			
 
				-*.json
			
 
				-*.md
			
 
				+#*.json
			
 
				+#*.md
			
--- a/HR_OCR/TestAllOcr/config.py
+++ b/HR_OCR/TestAllOcr/config.py
@@ -1,3 +1,11 @@
 
				+'''
			
 
				+Author: zeke-chin zeke-chin@icloud.com
			
 
				+Date: 2022-09-26 14:58:10
			
 
				+LastEditors: zeke-chin zeke-chin@icloud.com
			
 
				+LastEditTime: 2022-09-30 09:59:43
			
 
				+FilePath: /to_md/HR_OCR/TestAllOcr/config.py
			
 
				+Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
			
 
				+'''
			
 
				 import base64
			
 
				 import requests
			
 
				 
			
--- a/HR_OCR/TestAllOcr/test_interface.py
+++ b/HR_OCR/TestAllOcr/test_interface.py
@@ -1,3 +1,11 @@
 
				+'''
			
 
				+Author: zeke-chin zeke-chin@icloud.com
			
 
				+Date: 2022-09-28 20:28:41
			
 
				+LastEditors: zeke-chin zeke-chin@icloud.com
			
 
				+LastEditTime: 2022-09-30 10:06:35
			
 
				+FilePath: /to_md/HR_OCR/TestAllOcr/test_interface.py
			
 
				+Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
			
 
				+'''
			
 
				 from pathlib import Path
			
 
				 import unittest
			
 
				 import config
			
@@ -6,7 +14,7 @@ from config import send_request
 
				 image_path = 'image'
			
 
				 
			
 
				 # DX_test, DX_sb, DX_test, DX_sb
			
 
				-envl = 'DX_test'
			
 
				+envl = 'TX_sb'
			
 
				 url = config.URL[envl]
			
 
				 token = config.TOKEN[envl]
			
 
				 
			
--- a/HR_OCR/test_script/to_md/README.md
+++ b/HR_OCR/test_script/to_md/README.md
@@ -0,0 +1,46 @@
 
				+# 人力OCR
			
 
				+
			
 
				+## 生成markdown测试报告脚本
			
 
				+
			
 
				+1. 生成**目的文件夹**下图片的**json文件**
			
 
				+
			
 
				+- **目的文件夹**: 
			
 
				+  - 存放所需测试**图片文件夹**
			
 
				+  - 对**文件夹**内图片进行标准化
			
 
				+    - 运行`suffix.py`脚本
			
 
				+    - 产生*.jpg
			
 
				+  - 生成<u>算法推理json文件</u>
			
 
				+    - 运行`convert_json.py`脚本
			
 
				+    - 产生对应jpg 文件的json文件
			
 
				+  - 修改<u>算法推理json文件</u>成**正确的json文件**
			
 
				+
			
 
				+2. 跑生成md报告脚本
			
 
				+
			
 
				+- 修改use.py 并运行
			
 
				+
			
 
				+  ```python
			
 
				+  # config
			
 
				+  
			
 
				+  # 目的文件夹
			
 
				+  image_path = Path('/Users/zeke/work/sx/OCR/image_data/户口本9.30/0/img/')
			
 
				+  # 图片type(如果接口不存在传0不影响结果)
			
 
				+  image_type = 0
			
 
				+  # 是否旋转
			
 
				+  image_rotate = False
			
 
				+  
			
 
				+  # ocr地址选择
			
 
				+  # 本地环境、腾讯云测试环境、腾讯云生产环境、电信云测试环境、电信云生产环境
			
 
				+  ocr_address = 'local'  # 'local' 'TXtest' 'TXsb' 'DXtest' 'DXsb'
			
 
				+  # ocr能力选择
			
 
				+  ocr_name = 'regbook'  # 'cet' 'idcard' 'bankcard' 'regbook' 'schoolcert' 'business_license'
			
 
				+  # 生产MD文件名
			
 
				+  md_name = 'RegBook'
			
 
				+  # ocr能力对应字段（）
			
 
				+  filed = 'regbook'
			
 
				+  
			
 
				+  # 若md_path为None 则默认使用图片父路径为markdown保存路径
			
 
				+  # md_path = '/Users/zeke/work/sx/OCR/HROCR/to_md/example' or image_path.parent
			
 
				+  md_path = None or image_path.parent
			
 
				+  ```
			
 
				+
			
 
				+  
			
--- a/HR_OCR/test_script/to_md/example/img/1_img.jpg
+++ b/HR_OCR/test_script/to_md/example/img/1_img.jpg
--- a/HR_OCR/test_script/to_md/example/img/1_img.json
+++ b/HR_OCR/test_script/to_md/example/img/1_img.json
@@ -0,0 +1,9 @@
 
				+{
			
 
				+    "orientation": 0,
			
 
				+    "name": "鉴康",
			
 
				+    "id": "152801200003178527",
			
 
				+    "language": "英语",
			
 
				+    "level": "CET4",
			
 
				+    "exam_time": "2021年6月",
			
 
				+    "score": "451"
			
 
				+}
			
--- a/HR_OCR/test_script/to_md/example/img/2_img.jpg
+++ b/HR_OCR/test_script/to_md/example/img/2_img.jpg
--- a/HR_OCR/test_script/to_md/example/img/2_img.json
+++ b/HR_OCR/test_script/to_md/example/img/2_img.json
@@ -0,0 +1,9 @@
 
				+{
			
 
				+    "orientation": 0,
			
 
				+    "name": "张鑫",
			
 
				+    "id": "140227199809282317",
			
 
				+    "language": "英语",
			
 
				+    "level": "CET4",
			
 
				+    "exam_time": "2021年6月",
			
 
				+    "score": "445"
			
 
				+}
			
--- a/HR_OCR/test_script/to_md/new.py
+++ b/HR_OCR/test_script/to_md/new.py
@@ -0,0 +1,270 @@
 
				+from pathlib import Path
			
 
				+from typing import List, Optional
			
 
				+import cv2
			
 
				+import requests
			
 
				+from mdutils.mdutils import MdUtils
			
 
				+from dataclasses import dataclass
			
 
				+import json
			
 
				+import time
			
 
				+import base64
			
 
				+from itertools import chain
			
 
				+from tqdm import tqdm
			
 
				+from ocr_config import OCR_CONFIGS, Filed
			
 
				+
			
 
				+
			
 
				+class Image:
			
 
				+    def __init__(self, path: Path, rotate, is_rotate):
			
 
				+        self._path = path
			
 
				+        self.rotate = rotate
			
 
				+        self._ocr_result = None
			
 
				+        self.category = True
			
 
				+        self.is_rotate = is_rotate
			
 
				+        try:
			
 
				+            self.gt_result = self.get_json()
			
 
				+        except Exception as e:
			
 
				+            print(self.json_path)
			
 
				+            raise e
			
 
				+
			
 
				+    def __repr__(self):
			
 
				+        return f'path: {self.path}, rotate: {self.rotate}, gt_result: {self.gt_result}, cate: {self.category}'
			
 
				+
			
 
				+    # 将方法转换为相同名称的只读属性
			
 
				+    @property
			
 
				+    def path(self):
			
 
				+        return self._path
			
 
				+
			
 
				+    @path.setter
			
 
				+    def path(self, path):
			
 
				+        self._path = path
			
 
				+
			
 
				+    @property
			
 
				+    def fn(self):
			
 
				+        return self._path.stem
			
 
				+
			
 
				+    @property
			
 
				+    def ocr_result(self):
			
 
				+        return self._ocr_result
			
 
				+
			
 
				+    @ocr_result.setter
			
 
				+    def ocr_result(self, value):
			
 
				+        self._ocr_result = value
			
 
				+
			
 
				+    def get_gt_result(self, key):# sourcery skip: merge-duplicate-blocks, remove-redundant-if
			
 
				+        if key == 'orientation':
			
 
				+            if self.is_rotate:
			
 
				+                return self.rotate + 1 if self.rotate is not None else 0
			
 
				+            else:
			
 
				+                return self.gt_result[key]
			
 
				+        elif key in self.gt_result:
			
 
				+            return self.gt_result[key]
			
 
				+        else:
			
 
				+            return None
			
 
				+
			
 
				+    @property
			
 
				+    def json_path(self):
			
 
				+        return self.path.parent / f'{self.path.stem}.json'
			
 
				+
			
 
				+    def save_image(self, img, rotate):
			
 
				+        dst = self.path.parent.parent / (".ro_dst")
			
 
				+        if not dst.exists(): dst.mkdir()
			
 
				+        self.path = dst / f'{self.path.stem}-{rotate + 1}.jpg'
			
 
				+        # print('save image', self.path)
			
 
				+        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
			
 
				+        cv2.imwrite(str(self.path), img)
			
 
				+        return self.path
			
 
				+
			
 
				+    def get_base64(self, rotate=None):
			
 
				+        # print(self.path)
			
 
				+        img = cv2.imread(str(self.path))
			
 
				+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
			
 
				+        path = self.path
			
 
				+        if rotate is not None:
			
 
				+            img = cv2.rotate(img, rotate)
			
 
				+            path = self.save_image(img, rotate)
			
 
				+            # imencode 将图片编码到缓存，并保存到本地
			
 
				+        with open(path, 'rb') as f:
			
 
				+            return base64.encodebytes(f.read()).decode('utf-8')
			
 
				+
			
 
				+    def get_json(self):
			
 
				+        with open(self.json_path, 'r') as f:
			
 
				+            return json.load(f)
			
 
				+
			
 
				+
			
 
				+def send_request(image: Image, ocr_name, ocr_address, image_type=None):
			
 
				+    base64_str = image.get_base64(image.rotate)
			
 
				+    config = OCR_CONFIGS[ocr_name][ocr_address]
			
 
				+    headers = {
			
 
				+        'Content-Type': 'application/json',
			
 
				+        'Authorization': config.token
			
 
				+    }
			
 
				+    data = {
			
 
				+        'image': base64_str,
			
 
				+    }
			
 
				+    if image_type is not None:
			
 
				+        data['image_type'] = image_type
			
 
				+    response = requests.post(config.url, headers=headers, json=data)
			
 
				+    return response.json()
			
 
				+
			
 
				+
			
 
				+def parser_path(path: Path, rotate: bool):
			
 
				+    name = time.strftime("%m-%d_", time.localtime()) + path.name
			
 
				+    if rotate:
			
 
				+        name = f'{name}_R.md'
			
 
				+    return path.parent / name
			
 
				+
			
 
				+
			
 
				+class Dataset(object):
			
 
				+    def __init__(self, images_path, image_type, ocr_name, ocr_address, field, rotate=False):
			
 
				+        self.image_type = image_type
			
 
				+        self.ocr_name = ocr_name
			
 
				+        self.ocr_address = ocr_address
			
 
				+        self.images_path = images_path
			
 
				+        self.image_list = []
			
 
				+        # chain 迭代器，首先返回第一个可迭代对象中所有元素，接着返回下一个可迭代对象中所有元素，直到耗尽所有可迭代对象中的元素
			
 
				+        # eg：chain('ABC', 'DEF') --> A B C D E F
			
 
				+
			
 
				+        for p in chain(*[Path(self.images_path).rglob('*.jpg')]):
			
 
				+            if rotate:
			
 
				+                self.image_list.extend(Image(p, r, rotate) for r in [None, 0, 1, 2])
			
 
				+            else:
			
 
				+                self.image_list.append(Image(p, None, rotate))
			
 
				+
			
 
				+        self.field = Filed.get(field)
			
 
				+
			
 
				+        self.correct = {k: 0 for k in self.field}
			
 
				+        self.error = {k: 0 for k in self.field}
			
 
				+
			
 
				+    def __len__(self):
			
 
				+        return len(self.image_list)
			
 
				+
			
 
				+    def _evaluate_one(self, image: Image):
			
 
				+        def _get_predict(r, key):
			
 
				+            # isinstance() 函数来判断一个对象是否是一个已知的类型
			
 
				+            if isinstance(r[key], dict):
			
 
				+                return r[key]['text']
			
 
				+            else:
			
 
				+                return r[key]
			
 
				+
			
 
				+        if image.rotate is not None: image.gt_result['orientation'] = image.rotate + 1
			
 
				+        r = send_request(image, self.ocr_name, self.ocr_address, self.image_type)
			
 
				+        err_str = ''
			
 
				+        if r['status'] == '000':
			
 
				+            res = r['result']
			
 
				+            for key in self.field:
			
 
				+                # print('attr: ', key)
			
 
				+                if key in res:
			
 
				+                    gt = image.get_gt_result(key)
			
 
				+                    predict = _get_predict(res, key)
			
 
				+                    # print(f'gt: {gt}, predict: {predict}')
			
 
				+                    if predict == gt:
			
 
				+                        self.correct[key] += 1
			
 
				+                    else:
			
 
				+                        image.category = False
			
 
				+                        self.error[key] += 1
			
 
				+                        err_str += f'-------{key}-------<br>正确:{gt}<br>返回:{predict}<br>'
			
 
				+            if image.category:
			
 
				+                image.ocr_result = image.gt_result
			
 
				+            else:
			
 
				+                image.ocr_result = err_str
			
 
				+        else:
			
 
				+            image.ocr_result = r['msg']
			
 
				+            image.category = False
			
 
				+            for key in self.field:
			
 
				+                self.error[key] += 1
			
 
				+
			
 
				+    def __call__(self):  # sourcery skip: yield-from
			
 
				+        # yield 返回一个生成器
			
 
				+        for image in self.image_list:
			
 
				+            yield image
			
 
				+
			
 
				+    # 比较
			
 
				+    def evaluate(self):
			
 
				+        for image in tqdm(self.image_list):
			
 
				+            self._evaluate_one(image)
			
 
				+
			
 
				+    # 计算总体准确度
			
 
				+    @property
			
 
				+    def accuracy(self):
			
 
				+        return sum(list(self.correct.values())) / sum(list(self.correct.values()) + list(self.error.values()))
			
 
				+
			
 
				+    # 计算元素准确度
			
 
				+    @property
			
 
				+    def attrs_accuracy(self):
			
 
				+        return {k: self.correct[k] / (self.correct[k] + self.error[k]) for k in self.field}
			
 
				+
			
 
				+
			
 
				+class MD(object):
			
 
				+    def __init__(self, file_path: Path):
			
 
				+        self.name = file_path.name
			
 
				+        self.f = MdUtils(file_name=str(file_path))
			
 
				+        self.field_table: List = ['字段', '正确率']
			
 
				+        self.true_table: List = ['图片', '识别结果']
			
 
				+        self.false_table: List = ['图片', '识别结果']
			
 
				+        self.write_header(f'{self.name}测试报告')
			
 
				+
			
 
				+    def write_header(self, title, level=1):
			
 
				+        self.f.new_header(level=level, title=title)
			
 
				+
			
 
				+    def write_total_accuracy(self, ds: Dataset):
			
 
				+        def get_format_total_accuracy(ds: Dataset):
			
 
				+            acc = ds.accuracy * 100
			
 
				+            return "{:.2f}%".format(acc)
			
 
				+
			
 
				+        # 1. 拿到format之后的百分数
			
 
				+        res = get_format_total_accuracy(ds)
			
 
				+
			
 
				+        # 2. 写入
			
 
				+        self.f.new_paragraph(res)
			
 
				+
			
 
				+    def write_table_accuracy(self, ds: Dataset, columns=2, text_align='center'):
			
 
				+        def format_table_accuracy(ds: Dataset):
			
 
				+            table = ds.attrs_accuracy
			
 
				+            for k, v in table.items():
			
 
				+                acc = v * 100
			
 
				+                table[k] = "{:.2f}%".format(acc)
			
 
				+            return table
			
 
				+
			
 
				+        def dict_2_list(dic: dict):
			
 
				+            l = []
			
 
				+            for k, v in dic.items():
			
 
				+                l.extend((k, v))
			
 
				+            return l
			
 
				+
			
 
				+        table_dict = format_table_accuracy(ds)
			
 
				+        table_list = dict_2_list(table_dict)
			
 
				+        self.field_table.extend(table_list)
			
 
				+
			
 
				+        rows = len(self.field_table) // columns
			
 
				+        self.f.new_table(columns=columns, rows=rows, text=self.field_table, text_align=text_align)
			
 
				+
			
 
				+    def write_table_result(self, ds: Dataset, columns=2, text_align='center'):
			
 
				+        for image in ds.image_list:
			
 
				+            md_image = self.f.new_inline_image(text='', path=f'{image.path.parent.name}/{image.path.name}')
			
 
				+            if image.category:
			
 
				+                self.true_table.extend([md_image, image.ocr_result])
			
 
				+            else:
			
 
				+                self.false_table.extend([md_image, image.ocr_result])
			
 
				+
			
 
				+        true_rows = len(self.true_table) // columns
			
 
				+        false_rows = len(self.false_table) // columns
			
 
				+        self.write_header('True')
			
 
				+        self.f.new_table(columns=columns, rows=true_rows, text=self.true_table, text_align=text_align)
			
 
				+        self.write_header('False')
			
 
				+        self.f.new_table(columns=columns, rows=false_rows, text=self.false_table, text_align='left')
			
 
				+
			
 
				+# if __name__ == '__main__':
			
 
				+#     markdown = MD('英语等级证书')
			
 
				+#
			
 
				+#     dataset = Dataset(Path(''), 'cet', 'local', False)
			
 
				+#     print(len(dataset))
			
 
				+#     for d in dataset():
			
 
				+#         print(d)
			
 
				+#
			
 
				+#     dataset.evaluate()
			
 
				+#     print(dataset.accuracy)
			
 
				+#
			
 
				+#     markdown.write_total_accuracy(dataset)
			
 
				+#     markdown.write_table_accuracy(dataset)
			
 
				+#     markdown.write_table_result(dataset)
			
 
				+#
			
 
				+#     markdown.f.create_md_file()
			
--- a/HR_OCR/test_script/to_md/ocr_config.py
+++ b/HR_OCR/test_script/to_md/ocr_config.py
@@ -0,0 +1,109 @@
 
				+from dataclasses import dataclass
			
 
				+from typing import List
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class Type:
			
 
				+    image_type: int
			
 
				+    image_field: List
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class RequestConfig:
			
 
				+    url: str
			
 
				+    token: str
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class Configs:
			
 
				+    request: RequestConfig
			
 
				+    type: Type
			
 
				+
			
 
				+
			
 
				+# cet
			
 
				+cet_local_config = RequestConfig(url='http://192.168.199.27:18050/ocr_system/cet', token='')
			
 
				+cet_TXtest_config = RequestConfig(
			
 
				+    url='http://aihubtest.digitalyili.com/aiSquare/openApi/reasoning-services/rlocrxm//cettest/cet',
			
 
				+    token='8ae1e5f1-1337-4f22-8d46-ff4c110d68fd')
			
 
				+cet_TXsb_config = RequestConfig(
			
 
				+    url='http://aihub.digitalyili.com/aiSquare/openApi/reasoning-services/rlocrxm/cet/cet',
			
 
				+    token='dcae8cc6-0e49-4db8-a2d2-94ef84da3636')
			
 
				+cet_DXtest_config = RequestConfig(
			
 
				+    url='http://aihubpre-idc.digitalyili.com/aiSquare/openApi/reasoning-services/hrocr/cettest/cet',
			
 
				+    token='4e00c444-620b-4d3c-85f4-777e64276f0e')
			
 
				+cet_DXsb_config = RequestConfig(
			
 
				+    url='http://aihub-idc.digitalyili.com/aiSquare/openApi/reasoning-services/hrocr/cet/cet',
			
 
				+    token='e045de0a-e97f-4f23-b4d5-6a032c39a81e')
			
 
				+
			
 
				+CET_CONFIGS = {
			
 
				+    'local': cet_local_config,
			
 
				+    'TXtest': cet_TXtest_config,
			
 
				+    'TXsb': cet_TXsb_config,
			
 
				+    'DXtest': cet_DXtest_config,
			
 
				+    'DXsb': cet_DXsb_config
			
 
				+}
			
 
				+
			
 
				+# regbook
			
 
				+regbook_local_config = RequestConfig(url='http://192.168.199.27:18040/ocr_system/regbook', token='')
			
 
				+regbook_TXtest_config = RequestConfig(
			
 
				+    url='http://aihubtest.digitalyili.com/aiSquare/openApi/reasoning-services/rlocrxm//hkbsbtest/regbook',
			
 
				+    token='8ae1e5f1-1337-4f22-8d46-ff4c110d68fd')
			
 
				+regbook_TXsb_config = RequestConfig(
			
 
				+    url='http://aihub.digitalyili.com/aiSquare/openApi/reasoning-services/rlocrxm/hkbsb/regbook',
			
 
				+    token='dcae8cc6-0e49-4db8-a2d2-94ef84da3636')
			
 
				+regbook_DXtest_config = RequestConfig(
			
 
				+    url='http://aihubpre-idc.digitalyili.com/aiSquare/openApi/reasoning-services/hrocr/hkbsbtest/regbook',
			
 
				+    token='4e00c444-620b-4d3c-85f4-777e64276f0e')
			
 
				+regbook_DXsb_config = RequestConfig(
			
 
				+    url='http://aihub-idc.digitalyili.com/aiSquare/openApi/reasoning-services/hrocr/hkbsb/regbook',
			
 
				+    token='e045de0a-e97f-4f23-b4d5-6a032c39a81e')
			
 
				+
			
 
				+REGBOOK_CONFIGS = {
			
 
				+    'local': regbook_local_config,
			
 
				+    'TXtest': regbook_TXtest_config,
			
 
				+    'TXsb': regbook_TXsb_config,
			
 
				+    'DXtest': regbook_DXtest_config,
			
 
				+    'DXsb': regbook_DXsb_config
			
 
				+}
			
 
				+
			
 
				+# business_license
			
 
				+blfe_local_config = RequestConfig(url='http://192.168.199.27:18060/ocr_system/business_license', token='')
			
 
				+blfe_TXtest_config = RequestConfig(
			
 
				+    url='http://aihubtest.digitalyili.com/aiSquare/openApi/reasoning-services/rlocrxm/blfetest/blfe',
			
 
				+    token='8ae1e5f1-1337-4f22-8d46-ff4c110d68fd')
			
 
				+blfe_TXsb_config = RequestConfig(
			
 
				+    url='http://aihub.digitalyili.com/aiSquare/openApi/reasoning-services/rlocrxm/blfe/blfe',
			
 
				+    token='dcae8cc6-0e49-4db8-a2d2-94ef84da3636')
			
 
				+blfe_DXtest_config = RequestConfig(
			
 
				+    url='http://aihubpre-idc.digitalyili.com/aiSquare/openApi/reasoning-services/hrocr/blfetest/blfe',
			
 
				+    token='4e00c444-620b-4d3c-85f4-777e64276f0e')
			
 
				+blfe_DXsb_config = RequestConfig(
			
 
				+    url='http://aihub-idc.digitalyili.com/aiSquare/openApi/reasoning-services/hrocr/blfe/blfe',
			
 
				+    token='e045de0a-e97f-4f23-b4d5-6a032c39a81e')
			
 
				+
			
 
				+BLFE_CONFIGS = {
			
 
				+    'local': blfe_local_config,
			
 
				+    'TXtest': blfe_TXtest_config,
			
 
				+    'TXsb': blfe_TXsb_config,
			
 
				+    'DXtest': blfe_DXtest_config,
			
 
				+    'DXsb': blfe_DXsb_config
			
 
				+}
			
 
				+
			
 
				+OCR_CONFIGS = {
			
 
				+    'cet': CET_CONFIGS,
			
 
				+    'regbook': REGBOOK_CONFIGS,
			
 
				+    'business_license': BLFE_CONFIGS
			
 
				+}
			
 
				+
			
 
				+# 字段
			
 
				+cet_field = ['orientation', 'name', 'id', 'language', 'level', 'exam_time', 'score']
			
 
				+regbook_field = ['orientation', 'name', 'id', 'gender', 'birthplace', 'birthplace_province', 'birthplace_city',
			
 
				+                 'birthplace_region', 'native_place', 'native_place_province', 'native_place_city',
			
 
				+                 'native_place_region', 'blood_type', 'religion']
			
 
				+business_license = ['orientation', 'social_code', 'company_name', 'legal_person', 'registered_capital', 'type',
			
 
				+                    'start_date', 'business_scope', 'expire_date', 'address', 'stamp']
			
 
				+Filed = {
			
 
				+    'cet': cet_field,
			
 
				+    'regbook': regbook_field,
			
 
				+    'business_license': business_license
			
 
				+}
			
--- a/HR_OCR/test_script/to_md/use.py
+++ b/HR_OCR/test_script/to_md/use.py
@@ -0,0 +1,49 @@
 
				+'''
			
 
				+Author: zeke-chin zeke-chin@icloud.com
			
 
				+Date: 2022-09-28 20:28:41
			
 
				+LastEditors: zeke-chin zeke-chin@icloud.com
			
 
				+LastEditTime: 2022-09-30 15:08:48
			
 
				+FilePath: /to_md/HR_OCR/to_md/use.py
			
 
				+Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
			
 
				+'''
			
 
				+
			
 
				+from pathlib import Path
			
 
				+
			
 
				+from new import MD, Image, Dataset, parser_path
			
 
				+
			
 
				+# config
			
 
				+# 图片路径
			
 
				+image_path = Path('/Users/zeke/work/sx/OCR/image_data/户口本9.30/0/img/')
			
 
				+image_type = 0
			
 
				+# 是否旋转
			
 
				+image_rotate = False
			
 
				+ocr_address = 'DXtest'  # 'local' 'TXtest' 'TXsb' 'DXtest' 'DXsb'
			
 
				+
			
 
				+ocr_name = 'cet'  # 'cet' 'idcard' 'bankcard' 'regbook' 'schoolcert' 'business_license'
			
 
				+md_name = 'CET'
			
 
				+filed = 'cet'
			
 
				+
			
 
				+# 若md_path为None 则默认使用图片父路径为markdown保存路径
			
 
				+# md_path = '/Users/zeke/work/sx/OCR/HROCR/to_md/example' or image_path.parent
			
 
				+md_path = None or image_path.parent
			
 
				+
			
 
				+md_file = parser_path(Path(md_path) / Path(md_name + image_path.stem), image_rotate)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    markdown = MD(md_file)
			
 
				+
			
 
				+    dataset = Dataset(image_path, image_type, ocr_name, ocr_address, filed, image_rotate)
			
 
				+    print(len(dataset))
			
 
				+    for d in dataset():
			
 
				+        print(d)
			
 
				+
			
 
				+    dataset.evaluate()
			
 
				+    print(dataset.accuracy)
			
 
				+
			
 
				+    markdown.write_total_accuracy(dataset)
			
 
				+    markdown.write_table_accuracy(dataset)
			
 
				+    markdown.write_table_result(dataset)
			
 
				+
			
 
				+    print(md_file)
			
 
				+    markdown.f.create_md_file()
			
--- a/HR_OCR/test_script/tools/README.md
+++ b/HR_OCR/test_script/tools/README.md
@@ -0,0 +1,34 @@
 
				+# 人力OCR
			
 
				+
			
 
				+## convert_json.py
			
 
				+
			
 
				+```python
			
 
				+# 项目url
			
 
				+url = 'http://192.168.199.27:18040'
			
 
				+# 目标文件夹
			
 
				+imgs_path = './HR_OCR/to_md/example/img'
			
 
				+
			
 
				+def send_request(img_path, image_type = 0):
			
 
				+    with open(img_path, 'rb') as f:
			
 
				+        img_str: str = base64.encodebytes(f.read()).decode('utf-8')
			
 
				+        data = {
			
 
				+            'image': img_str,
			
 
				+            'image_type': image_type
			
 
				+        }
			
 
				+        idc_header = {
			
 
				+            'Content-Type': 'application/json',
			
 
				+            'Authorization': 'Bearer 4e00c444-620b-4d3c-85f4-777e64276f0e'
			
 
				+        }
			
 
				+        r = requests.post(f'{url}/cettest/cet', json=data, headers=idc_header)
			
 
				+        # r = requests.post(f'{url}/hkbsbtest/regbook', json=data)
			
 
				+        print(r.json())
			
 
				+        return r.json()
			
 
				+```
			
 
				+
			
 
				+## suffix.py
			
 
				+
			
 
				+```python
			
 
				+# 需要格式化的目的文件夹路径
			
 
				+target_path = './HR_OCR/to_md/example/img'
			
 
				+```
			
 
				+
			
--- a/HR_OCR/test_script/tools/convert_json.py
+++ b/HR_OCR/test_script/tools/convert_json.py
@@ -0,0 +1,53 @@
 
				+from pathlib import Path
			
 
				+
			
 
				+import requests
			
 
				+import json
			
 
				+import base64
			
 
				+from itertools import chain
			
 
				+
			
 
				+url = 'http://aihubpre-idc.digitalyili.com/aiSquare/openApi/reasoning-services/hrocr'
			
 
				+imgs_path = './HR_OCR/to_md/example/img'
			
 
				+
			
 
				+def send_request(img_path, image_type = 0):
			
 
				+    with open(img_path, 'rb') as f:
			
 
				+        img_str: str = base64.encodebytes(f.read()).decode('utf-8')
			
 
				+        data = {
			
 
				+            'image': img_str,
			
 
				+            'image_type': image_type
			
 
				+        }
			
 
				+        idc_header = {
			
 
				+            'Content-Type': 'application/json',
			
 
				+            'Authorization': 'Bearer 4e00c444-620b-4d3c-85f4-777e64276f0e'
			
 
				+        }
			
 
				+        r = requests.post(f'{url}/cettest/cet', json=data, headers=idc_header)
			
 
				+        # r = requests.post(f'{url}/hkbsbtest/regbook', json=data)
			
 
				+        print(r.json())
			
 
				+        return r.json()
			
 
				+
			
 
				+
			
 
				+def _parse_result(r):
			
 
				+    if r['status'] == '000':
			
 
				+        r = r['result']
			
 
				+        if r:
			
 
				+            del r['confidence']
			
 
				+        return {k: v['text'] if isinstance(v, dict) else v for k, v in r.items()}
			
 
				+    elif r['status'] == '101':
			
 
				+        return "101"
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+
			
 
				+    # 0
			
 
				+    # img_paths = chain(*[Path(root / imgs_path).rglob(f'*.{ext}') for ext in ['jpeg', 'jpg', 'png', 'JPG', 'PNG']])
			
 
				+    img_paths = chain(*[Path(imgs_path).rglob(f'*.{ext}') for ext in ['jpg']])
			
 
				+    for img_path in img_paths:
			
 
				+        print(img_path)
			
 
				+        r = send_request(img_path)
			
 
				+        res = _parse_result(r)
			
 
				+        print(res)
			
 
				+        img_path: Path = img_path
			
 
				+        d = img_path.parent
			
 
				+        fn = f'{img_path.stem}.json'
			
 
				+
			
 
				+        with (d / fn).open('w', encoding='utf-8') as f:
			
 
				+            json.dump(res, f, ensure_ascii=False, indent=4)
			
--- a/HR_OCR/test_script/tools/suffix.py
+++ b/HR_OCR/test_script/tools/suffix.py
@@ -0,0 +1,28 @@
 
				+import os
			
 
				+from pathlib import Path
			
 
				+from itertools import chain
			
 
				+import sys
			
 
				+
			
 
				+# conf
			
 
				+target_path = './HR_OCR/to_md/example/img'
			
 
				+#suffix = sys.argv[2]
			
 
				+suffix = 'jpg' if len(sys.argv) != 3 else sys.argv[2]
			
 
				+
			
 
				+def get_range(n):
			
 
				+    len_n = len(str(n))
			
 
				+    for num in range(1, n + 1):
			
 
				+        output = str(num)
			
 
				+        while len(output) < len_n:
			
 
				+            output = f'0{output}'
			
 
				+        yield output
			
 
				+
			
 
				+
			
 
				+file_paths = list(chain(*[Path(target_path).rglob(f'*.{ext}') for ext in ['jpeg', 'jpg', 'png', 'JPG', 'PNG']]))
			
 
				+print(len(file_paths))
			
 
				+num = len(file_paths)
			
 
				+file_name_list = list(get_range(num))
			
 
				+for i in range(num):
			
 
				+    file = file_paths[i]
			
 
				+    print(file)
			
 
				+    new = file.parent / f'{file_name_list[i]}_img.{suffix}'
			
 
				+    file.rename(new)
			
--- a/HR_OCR/to_md/README.md
+++ b/HR_OCR/to_md/README.md
@@ -0,0 +1,46 @@
 
				+# 人力OCR
			
 
				+
			
 
				+## 生成markdown测试报告脚本
			
 
				+
			
 
				+1. 生成**目的文件夹**下图片的**json文件**
			
 
				+
			
 
				+- **目的文件夹**: 
			
 
				+  - 存放所需测试**图片文件夹**
			
 
				+  - 对**文件夹**内图片进行标准化
			
 
				+    - 运行`suffix.py`脚本
			
 
				+    - 产生*.jpg
			
 
				+  - 生成<u>算法推理json文件</u>
			
 
				+    - 运行`convert_json.py`脚本
			
 
				+    - 产生对应jpg 文件的json文件
			
 
				+  - 修改<u>算法推理json文件</u>成**正确的json文件**
			
 
				+
			
 
				+2. 跑生成md报告脚本
			
 
				+
			
 
				+- 修改use.py 并运行
			
 
				+
			
 
				+  ```python
			
 
				+  # config
			
 
				+  
			
 
				+  # 目的文件夹
			
 
				+  image_path = Path('/Users/zeke/work/sx/OCR/image_data/户口本9.30/0/img/')
			
 
				+  # 图片type(如果接口不存在传0不影响结果)
			
 
				+  image_type = 0
			
 
				+  # 是否旋转
			
 
				+  image_rotate = False
			
 
				+  
			
 
				+  # ocr地址选择
			
 
				+  # 本地环境、腾讯云测试环境、腾讯云生产环境、电信云测试环境、电信云生产环境
			
 
				+  ocr_address = 'local'  # 'local' 'TXtest' 'TXsb' 'DXtest' 'DXsb'
			
 
				+  # ocr能力选择
			
 
				+  ocr_name = 'regbook'  # 'cet' 'idcard' 'bankcard' 'regbook' 'schoolcert' 'business_license'
			
 
				+  # 生产MD文件名
			
 
				+  md_name = 'RegBook'
			
 
				+  # ocr能力对应字段（）
			
 
				+  filed = 'regbook'
			
 
				+  
			
 
				+  # 若md_path为None 则默认使用图片父路径为markdown保存路径
			
 
				+  # md_path = '/Users/zeke/work/sx/OCR/HROCR/to_md/example' or image_path.parent
			
 
				+  md_path = None or image_path.parent
			
 
				+  ```
			
 
				+
			
 
				+  
			
--- a/HR_OCR/to_md/example/img/1_img.jpg
+++ b/HR_OCR/to_md/example/img/1_img.jpg
--- a/HR_OCR/to_md/example/img/1_img.json
+++ b/HR_OCR/to_md/example/img/1_img.json
@@ -0,0 +1,9 @@
 
				+{
			
 
				+    "orientation": 0,
			
 
				+    "name": "鉴康",
			
 
				+    "id": "152801200003178527",
			
 
				+    "language": "英语",
			
 
				+    "level": "CET4",
			
 
				+    "exam_time": "2021年6月",
			
 
				+    "score": "451"
			
 
				+}
			
--- a/HR_OCR/to_md/example/img/2_img.jpg
+++ b/HR_OCR/to_md/example/img/2_img.jpg
--- a/HR_OCR/to_md/example/img/2_img.json
+++ b/HR_OCR/to_md/example/img/2_img.json
@@ -0,0 +1,9 @@
 
				+{
			
 
				+    "orientation": 0,
			
 
				+    "name": "张鑫",
			
 
				+    "id": "140227199809282317",
			
 
				+    "language": "英语",
			
 
				+    "level": "CET4",
			
 
				+    "exam_time": "2021年6月",
			
 
				+    "score": "445"
			
 
				+}
			
--- a/HR_OCR/to_md/ocr_config.py
+++ b/HR_OCR/to_md/ocr_config.py
@@ -1,4 +1,3 @@
 
				-
			
 
				 from dataclasses import dataclass
			
 
				 from typing import List
			
 
				 
			
@@ -14,6 +13,7 @@ class RequestConfig:
 
				     url: str
			
 
				     token: str
			
 
				 
			
 
				+
			
 
				 @dataclass
			
 
				 class Configs:
			
 
				     request: RequestConfig
			
@@ -98,11 +98,12 @@ OCR_CONFIGS = {
 
				 # 字段
			
 
				 cet_field = ['orientation', 'name', 'id', 'language', 'level', 'exam_time', 'score']
			
 
				 regbook_field = ['orientation', 'name', 'id', 'gender', 'birthplace', 'birthplace_province', 'birthplace_city',
			
 
				-                          'birthplace_region', 'native_place', 'native_place_province', 'native_place_city',
			
 
				-                          'native_place_region', 'blood_type', 'religion']
			
 
				-business_license = ['orientation', 'social_code', 'company_name', 'legal_person', 'registered_capital', 'type', 'start_date', 'business_scope', 'expire_date', 'address', 'stamp']
			
 
				+                 'birthplace_region', 'native_place', 'native_place_province', 'native_place_city',
			
 
				+                 'native_place_region', 'blood_type', 'religion']
			
 
				+business_license = ['orientation', 'social_code', 'company_name', 'legal_person', 'registered_capital', 'type',
			
 
				+                    'start_date', 'business_scope', 'expire_date', 'address', 'stamp']
			
 
				 Filed = {
			
 
				     'cet': cet_field,
			
 
				     'regbook': regbook_field,
			
 
				     'business_license': business_license
			
 
				-}
			
 
				+}
			
--- a/HR_OCR/to_md/use.py
+++ b/HR_OCR/to_md/use.py
@@ -1,3 +1,11 @@
 
				+'''
			
 
				+Author: zeke-chin zeke-chin@icloud.com
			
 
				+Date: 2022-09-28 20:28:41
			
 
				+LastEditors: zeke-chin zeke-chin@icloud.com
			
 
				+LastEditTime: 2022-09-30 15:08:48
			
 
				+FilePath: /to_md/HR_OCR/to_md/use.py
			
 
				+Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
			
 
				+'''
			
 
				 
			
 
				 from pathlib import Path
			
 
				 
			
@@ -5,15 +13,17 @@ from new import MD, Image, Dataset, parser_path
 
				 
			
 
				 # config
			
 
				 # 图片路径
			
 
				+
			
 
				 image_path = Path('/Users/sxkj/Desktop/9.29/1/img')
			
 
				 image_type = 1
			
 
				+
			
 
				 # 是否旋转
			
 
				 image_rotate = False
			
 
				-ocr_address = 'local'  # 'local' 'TXtest' 'TXsb''DXtest' 'DXsb'
			
 
				+ocr_address = 'DXtest'  # 'local' 'TXtest' 'TXsb' 'DXtest' 'DXsb'
			
 
				 
			
 
				-ocr_name = 'regbook'  # 'cet' 'idcard' 'bankcard' 'regbook' 'schoolcert''business_license'
			
 
				-md_name = 'RegBook'
			
 
				-filed = 'regbook'
			
 
				+ocr_name = 'cet'  # 'cet' 'idcard' 'bankcard' 'regbook' 'schoolcert' 'business_license'
			
 
				+md_name = 'CET'
			
 
				+filed = 'cet'
			
 
				 
			
 
				 # 若md_path为None 则默认使用图片父路径为markdown保存路径
			
 
				 # md_path = '/Users/zeke/work/sx/OCR/HROCR/to_md/example' or image_path.parent
			
--- a/HR_OCR/tools/README.md
+++ b/HR_OCR/tools/README.md
@@ -0,0 +1,34 @@
 
				+# 人力OCR
			
 
				+
			
 
				+## convert_json.py
			
 
				+
			
 
				+```python
			
 
				+# 项目url
			
 
				+url = 'http://192.168.199.27:18040'
			
 
				+# 目标文件夹
			
 
				+imgs_path = './HR_OCR/to_md/example/img'
			
 
				+
			
 
				+def send_request(img_path, image_type = 0):
			
 
				+    with open(img_path, 'rb') as f:
			
 
				+        img_str: str = base64.encodebytes(f.read()).decode('utf-8')
			
 
				+        data = {
			
 
				+            'image': img_str,
			
 
				+            'image_type': image_type
			
 
				+        }
			
 
				+        idc_header = {
			
 
				+            'Content-Type': 'application/json',
			
 
				+            'Authorization': 'Bearer 4e00c444-620b-4d3c-85f4-777e64276f0e'
			
 
				+        }
			
 
				+        r = requests.post(f'{url}/cettest/cet', json=data, headers=idc_header)
			
 
				+        # r = requests.post(f'{url}/hkbsbtest/regbook', json=data)
			
 
				+        print(r.json())
			
 
				+        return r.json()
			
 
				+```
			
 
				+
			
 
				+## suffix.py
			
 
				+
			
 
				+```python
			
 
				+# 需要格式化的目的文件夹路径
			
 
				+target_path = './HR_OCR/to_md/example/img'
			
 
				+```
			
 
				+
			
--- a/HR_OCR/tools/convert_json.py
+++ b/HR_OCR/tools/convert_json.py
@@ -14,11 +14,12 @@ def send_request(img_path):
 
				         data = {
			
 
				             'image': img_str
			
 
				         }
			
 
				-        # idc_header = {
			
 
				-        #     'Content-Type': 'application/json',
			
 
				-        #     'Authorization': 'Bearer 4e00c444-620b-4d3c-85f4-777e64276f0e'
			
 
				-        # }
			
 
				-        r = requests.post(f'{url}/ocr_system/regbook', json=data)
			
 
				+        idc_header = {
			
 
				+            'Content-Type': 'application/json',
			
 
				+            'Authorization': 'Bearer 4e00c444-620b-4d3c-85f4-777e64276f0e'
			
 
				+        }
			
 
				+        r = requests.post(f'{url}/cettest/cet', json=data, headers=idc_header)
			
 
				+        # r = requests.post(f'{url}/hkbsbtest/regbook', json=data)
			
 
				         print(r.json())
			
 
				         return r.json()
			
 
				 
			
@@ -36,10 +37,8 @@ def _parse_result(r):
 
				 if __name__ == '__main__':
			
 
				 
			
 
				     # 0
			
 
				-    root = Path(__file__).parent
			
 
				-    print(root)
			
 
				     # img_paths = chain(*[Path(root / imgs_path).rglob(f'*.{ext}') for ext in ['jpeg', 'jpg', 'png', 'JPG', 'PNG']])
			
 
				-    img_paths = chain(*[Path(root / imgs_path).rglob(f'*.{ext}') for ext in ['jpg']])
			
 
				+    img_paths = chain(*[Path(imgs_path).rglob(f'*.{ext}') for ext in ['jpg']])
			
 
				     for img_path in img_paths:
			
 
				         print(img_path)
			
 
				         r = send_request(img_path)
			
--- a/HR_OCR/tools/suffix.py
+++ b/HR_OCR/tools/suffix.py
@@ -4,7 +4,11 @@ from itertools import chain
 
				 import sys
			
 
				 
			
 
				 # conf
			
 
				+<<<<<<< HEAD
			
 
				 target_path = '/Users/sxkj/to_md/9.29/1'
			
 
				+=======
			
 
				+target_path = './HR_OCR/to_md/example/img'
			
 
				+>>>>>>> a0b1c051142a2687d8ae2c63f543f021d3239f1f
			
 
				 #suffix = sys.argv[2]
			
 
				 suffix = 'jpg' if len(sys.argv) != 3 else sys.argv[2]
			
 
				 
			
@@ -18,6 +22,7 @@ def get_range(n):
 
				 
			
 
				 
			
 
				 file_paths = list(chain(*[Path(target_path).rglob(f'*.{ext}') for ext in ['jpeg', 'jpg', 'png', 'JPG', 'PNG']]))
			
 
				+print(len(file_paths))
			
 
				 num = len(file_paths)
			
 
				 file_name_list = list(get_range(num))
			
 
				 for i in range(num):
			
--- a/内包—植选豆乳以团之名形象定制包装周艺轩版.json
+++ b/内包—植选豆乳以团之名形象定制包装周艺轩版.json
@@ -0,0 +1,25 @@
 
				+{
			
 
				+    "productCategory": "产品种类：调制豆乳",
			
 
				+    "ingredients": "配料：饮用水、大豆（非转基因）、白砂糖",
			
 
				+    "proStanCode": "产品标准代号：GB/T30885",
			
 
				+    "productionDate": "生产日期：见瓶盖",
			
 
				+    "shelfLife": "保质期：常温密闭条件下9个月",
			
 
				+    "storageConditions": "贮存条件：请保存于阴凉干燥处，避免阳光直晒、高温",
			
 
				+    "conSerHotline": "消费者服务热线：4008169999",
			
 
				+    "tips": "温馨提示：请勿带包装置于微波炉中加热。",
			
 
				+    "welcome": "欢迎访问：www.yili.com",
			
 
				+    "noKeyList": [
			
 
				+        "植选",
			
 
				+        "浓香豆乳畅饮系列",
			
 
				+        "大豆添加量：44g/瓶",
			
 
				+        "原味",
			
 
				+        "全程非转基因可追溯大豆",
			
 
				+        "3.0g/100mL",
			
 
				+        "优质植物蛋白",
			
 
				+        "保持环境清洁请勿乱抛空瓶",
			
 
				+        "为保证产品风味，开启后需冷藏并尽快饮用完毕。",
			
 
				+        "可能会有少量蛋白沉淀和脂肪上浮，属正常现象，请放心饮用。如发现涨瓶，请勿开启。",
			
 
				+        "净含量:315mL",
			
 
				+        "6907992515007"
			
 
				+    ]
			
 
				+}
			
--- a/内包—植选豆乳以团之名形象定制包装周艺轩版.md
+++ b/内包—植选豆乳以团之名形象定制包装周艺轩版.md
@@ -0,0 +1,33 @@
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# 测试结果
			
 
				+
			
 
				+## 正确率：71.43%
			
 
				+
			
 
				+### 共21个字段,正确15个,错误6个
			
 
				+
			
 
				+|key值|正确答案|ocr返回结果|是否正确|
			
 
				+| :---: | :---: | :---: | :---: |
			
 
				+|productCategory|产品种类：调制豆乳|产品种类：调制豆乳|✅|
			
 
				+|ingredients|配料：饮用水、大豆（非转基因）、白砂糖|配料：饮用水、大豆(非转基因)白砂糖大豆添加量：44g/瓶|❌|
			
 
				+|proStanCode|产品标准代号：GB/T30885|产品标准代号：GB/T30885|✅|
			
 
				+|productionDate|生产日期：见瓶盖|生产日期：见瓶盖|✅|
			
 
				+|shelfLife|保质期：常温密闭条件下9个月|保质期：常温密闭条件下9个月|✅|
			
 
				+|storageConditions|贮存条件：请保存于阴凉干燥处，避免阳光直晒、高温|贮存条件：请保存于阴凉干燥处，避免阳光直晒、高温。|❌|
			
 
				+|conSerHotline|消费者服务热线：4008169999|消费者服务热线：4008169999|✅|
			
 
				+|tips|温馨提示：请勿带包装置于微波炉中加热。|温馨提示：请勿带包装置于微波炉中加热。|✅|
			
 
				+|welcome|欢迎访问：www.yili.com|欢迎访问：www.ili.com|❌|
			
 
				+|无key值|植选|植选|✅|
			
 
				+|无key值|浓香豆乳畅饮系列|浓香豆乳畅饮系列|✅|
			
 
				+|无key值|大豆添加量：44g/瓶|生产日期：见瓶盖|❌|
			
 
				+|无key值|原味|原味|✅|
			
 
				+|无key值|全程非转基因可追溯大豆|全程非转基因可追溯大豆|✅|
			
 
				+|无key值|3.0g/100mL|3.0g|❌|
			
 
				+|无key值|优质植物蛋白|优质植物蛋白|✅|
			
 
				+|无key值|保持环境清洁请勿乱抛空瓶|保持环境清洁请勿乱抛空瓶|✅|
			
 
				+|无key值|为保证产品风味，开启后需冷藏并尽快饮用完毕。|为保证产品风味，开启后需冷藏并尽快饮用完毕。|✅|
			
 
				+|无key值|可能会有少量蛋白沉淀和脂肪上浮，属正常现象，请放心饮用。如发现涨瓶，请勿开启。|可能会有少量蛋白沉淀和脂肪上浮属正常现象，请放心饮用。如发现胀瓶，请勿开启。|❌|
			
 
				+|无key值|净含量:315mL|净含量:315mL|✅|
			
 
				+|无key值|6907992515007|6907992515007|✅|
			
--- a/YQ_OCR/img/巧克力味牛奶饮品.json
+++ b/YQ_OCR/img/巧克力味牛奶饮品.json
@@ -0,0 +1,23 @@
 
				+{
			
 
				+    "productCategory": "产品种类：配制型含乳饮料",
			
 
				+    "ingredients": "配料：生牛乳、饮用水、白砂糖、可可粉、食品添加剂(微晶纤维素、单，双甘油脂肪酸酯、蔗糖脂肪酸酯、柠檬酸钠、结冷胶、安赛蜜、三氯蔗糖、食品用香精)",
			
 
				+    "proStanCode": "产品标准代号：GB/T21732",
			
 
				+    "productionDate": "生产日期：见盒顶部",
			
 
				+    "shelfLife": "保质期：常温密闭条件下6个月",
			
 
				+    "storageConditions": "贮存条件：未开启前，无需冷藏，开启之后，立即饮用。",
			
 
				+    "conSerHotline": "消费者服务热线：4008169999",
			
 
				+    "tips": "友情提示：喝前摇一摇",
			
 
				+    "welcome": "欢迎访问：www.yili.com",
			
 
				+    "noKeyList": [
			
 
				+        "牛奶饮品",
			
 
				+        "产品名称：巧克力味牛奶饮品",
			
 
				+        "生产日期：见箱体",
			
 
				+        "切勿带包装置于微波炉中加热",
			
 
				+        "清真",
			
 
				+        "保持环境清洁请勿乱抛空包",
			
 
				+        "伊利",
			
 
				+        "（具体生产商/产地见生产日期末端代码）",
			
 
				+        "净含量:250mL",
			
 
				+        "6907992500102"
			
 
				+    ]
			
 
				+}
			
--- a/YQ_OCR/img/巧克力味牛奶饮品.md
+++ b/YQ_OCR/img/巧克力味牛奶饮品.md
@@ -0,0 +1,30 @@
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# 测试结果
			
 
				+
			
 
				+## 正确率：66.67%
			
 
				+
			
 
				+### 共18个字段,正确12个,错误6个
			
 
				+
			
 
				+|key值|正确答案|ocr返回结果|是否正确|
			
 
				+| :---: | :---: | :---: | :---: |
			
 
				+|productCategory|产品种类：配制型含乳饮料|产品种类：配制型含乳饮料|✅|
			
 
				+|ingredients|配料：生牛乳、饮用水、白砂糖、可可粉、食品添加剂(微晶纤维素、单，双甘油脂肪酸酯、蔗糖脂肪酸酯、柠檬酸钠、结冷胶、安赛蜜、三氯蔗糖、食品用香精)|配料：生牛乳、饮用水、白砂糖可可粉、食品添加剂(微晶纤维素、单，双甘油脂肪酸酯、蔗糖脂肪酸酯柠檬酸钠、结冷胶、安赛蜜、三氯蔗糖、食品用香精)|❌|
			
 
				+|proStanCode|产品标准代号：GB/T21732|产品标准代号：GB/T21732|✅|
			
 
				+|productionDate|生产日期：见盒顶部|生产日期：见盒顶部|✅|
			
 
				+|storageConditions|贮存条件：未开启前，无需冷藏，开启之后，立即饮用。|贮存条件：未开启前无需冷藏开启之后 立即饮用|❌|
			
 
				+|conSerHotline|消费者服务热线：4008169999|消费者服务热线：4008169999|✅|
			
 
				+|tips|友情提示：喝前摇一摇|友情提示：喝前摇一摇|✅|
			
 
				+|welcome|欢迎访问：www.yili.com|欢迎访问：www.yli.com|❌|
			
 
				+|无key值|牛奶饮品|牛奶饮品|✅|
			
 
				+|无key值|产品名称：巧克力味牛奶饮品|产品名称：巧克力味牛奶饮品|✅|
			
 
				+|无key值|生产日期：见箱体|生产日期：见盒顶部|❌|
			
 
				+|无key值|切勿带包装置于微波炉中加热|切勿带包装置于微波炉中加热|✅|
			
 
				+|无key值|清真|清真|✅|
			
 
				+|无key值|保持环境清洁请勿乱抛空包|保持环境清洁请勿乱抛空包|✅|
			
 
				+|无key值|伊利|伊利|✅|
			
 
				+|无key值|（具体生产商/产地见生产日期末端代码）|(具体生产商/产地见生产日期末端代码)|❌|
			
 
				+|无key值|净含量:250mL|净含量：250mL|❌|
			
 
				+|无key值|6907992500102|6907992500102|✅|
			
--- a/YQ_OCR/img/餐饮纯牛奶内包.json
+++ b/YQ_OCR/img/餐饮纯牛奶内包.json
@@ -0,0 +1,23 @@
 
				+{
			
 
				+    "productCategory": "产品种类：全脂灭菌纯牛乳",
			
 
				+    "ingredients": "配料：生牛乳",
			
 
				+    "proStanCode": "产品标准代号：GB25190",
			
 
				+    "productionDate": "生产日期：见盒顶部",
			
 
				+    "shelfLife": "保质期：常温密闭条件下6个月",
			
 
				+    "storageConditions": "贮存条件：未开启前无需冷藏开启之后请贮存于2-6℃并于2日内饮用完毕",
			
 
				+    "conSerHotline": "消费者服务热线：4008169999",
			
 
				+    "welcome": "欢迎访问：www.yili.com",
			
 
				+    "noKeyList": [
			
 
				+        "纯牛奶",
			
 
				+        "餐饮之选",
			
 
				+        "非脂乳固体≥8.5%",
			
 
				+        "保持环境清洁请勿乱抛空包",
			
 
				+        "切勿带包装置于微波炉中加热。",
			
 
				+        "净含量:1L",
			
 
				+        "6907992513621",
			
 
				+        "内蒙古伊利实业集团股份有限公司出品 地址：内蒙古自治区呼和浩特市金山开发区金山大街1号",
			
 
				+        "宁夏伊利乳业有限责任公司(A12) 产地及地址：宁夏吴忠市利通区金积工业园区 食品生产许可证编号：SC10564030200130",
			
 
				+        "阜新伊利乳品有限责任公司(B6) 产地及地址：辽宁省阜新市阜蒙县园区路2号 食品生产许可证编号：SC10521090000011",
			
 
				+        "定州伊利乳业有限责任公司(C1) 产地及地址：河北省定州市伊利工业园区 食品生产许可证编号：SC10613068200020"
			
 
				+    ]
			
 
				+}
			
--- a/YQ_OCR/img/餐饮纯牛奶内包.md
+++ b/YQ_OCR/img/餐饮纯牛奶内包.md
@@ -0,0 +1,31 @@
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# 测试结果
			
 
				+
			
 
				+## 正确率：94.74%
			
 
				+
			
 
				+### 共19个字段,正确18个,错误1个
			
 
				+
			
 
				+|key值|正确答案|ocr返回结果|是否正确|
			
 
				+| :---: | :---: | :---: | :---: |
			
 
				+|productCategory|产品种类：全脂灭菌纯牛乳|产品种类：全脂灭菌纯牛乳|✅|
			
 
				+|ingredients|配料：生牛乳|配料：生牛乳|✅|
			
 
				+|proStanCode|产品标准代号：GB25190|产品标准代号：GB25190|✅|
			
 
				+|productionDate|生产日期：见盒顶部|生产日期：见盒顶部|✅|
			
 
				+|shelfLife|保质期：常温密闭条件下6个月|保质期：常温密闭条件下6个月|✅|
			
 
				+|storageConditions|贮存条件：未开启前无需冷藏开启之后请贮存于2-6℃并于2日内饮用完毕|贮存条件：未开启前无需冷藏开启之后请贮存于2-6℃并于2日内饮用完毕|✅|
			
 
				+|conSerHotline|消费者服务热线：4008169999|消费者服务热线：4008169999|✅|
			
 
				+|welcome|欢迎访问：www.yili.com|欢迎访问：www.yili.com|✅|
			
 
				+|无key值|纯牛奶|纯牛奶|✅|
			
 
				+|无key值|餐饮之选|餐饮之选|✅|
			
 
				+|无key值|非脂乳固体≥8.5%|非脂乳固体≥8.5%|✅|
			
 
				+|无key值|保持环境清洁请勿乱抛空包|保持环境清洁请勿乱抛空包|✅|
			
 
				+|无key值|切勿带包装置于微波炉中加热。|切勿带包装置于微波炉中加热|❌|
			
 
				+|无key值|净含量:1L|净含量:1L|✅|
			
 
				+|无key值|6907992513621|6907992513621|✅|
			
 
				+|无key值|内蒙古伊利实业集团股份有限公司出品 地址：内蒙古自治区呼和浩特市金山开发区金山大街1号|内蒙古伊利实业集团股份有限公司出品地址：内蒙古自治区呼和浩特市金山开发区金山大街1号|✅|
			
 
				+|无key值|宁夏伊利乳业有限责任公司(A12) 产地及地址：宁夏吴忠市利通区金积工业园区 食品生产许可证编号：SC10564030200130|宁夏伊利乳业有限责任公司(A12)产地及地址：宁夏吴忠市利通区金积工业园区食品生产许可证编号：SC10564030200130|✅|
			
 
				+|无key值|阜新伊利乳品有限责任公司(B6) 产地及地址：辽宁省阜新市阜蒙县园区路2号 食品生产许可证编号：SC10521090000011|阜新伊利乳品有限责任公司(B6)产地及地址：辽宁省阜新市阜蒙县园区路2号食品生产许可证编号：SC10521090000011|✅|
			
 
				+|无key值|定州伊利乳业有限责任公司(C1) 产地及地址：河北省定州市伊利工业园区 食品生产许可证编号：SC10613068200020|定州伊利乳业有限责任公司(C1)产地及地址：河北省定州市伊利工业园区食品生产许可证编号：SC10613068200020|✅|