2 rokov pred · 6f1a65c588
--- a/HR_OCR/test_script/to_md/example/img/1_img.jpg
+++ b/HR_OCR/test_script/to_md/example/img/1_img.jpg
--- a/HR_OCR/test_script/to_md/example/img/2_img.jpg
+++ b/HR_OCR/test_script/to_md/example/img/2_img.jpg
--- a/HR_OCR/test_script/to_md/new.py
+++ b/HR_OCR/test_script/to_md/new.py
@@ -0,0 +1,270 @@
 
				+from pathlib import Path
			
 
				+from typing import List, Optional
			
 
				+import cv2
			
 
				+import requests
			
 
				+from mdutils.mdutils import MdUtils
			
 
				+from dataclasses import dataclass
			
 
				+import json
			
 
				+import time
			
 
				+import base64
			
 
				+from itertools import chain
			
 
				+from tqdm import tqdm
			
 
				+from ocr_config import OCR_CONFIGS, Filed
			
 
				+
			
 
				+
			
 
				+class Image:
			
 
				+    def __init__(self, path: Path, rotate, is_rotate):
			
 
				+        self._path = path
			
 
				+        self.rotate = rotate
			
 
				+        self._ocr_result = None
			
 
				+        self.category = True
			
 
				+        self.is_rotate = is_rotate
			
 
				+        try:
			
 
				+            self.gt_result = self.get_json()
			
 
				+        except Exception as e:
			
 
				+            print(self.json_path)
			
 
				+            raise e
			
 
				+
			
 
				+    def __repr__(self):
			
 
				+        return f'path: {self.path}, rotate: {self.rotate}, gt_result: {self.gt_result}, cate: {self.category}'
			
 
				+
			
 
				+    # 将方法转换为相同名称的只读属性
			
 
				+    @property
			
 
				+    def path(self):
			
 
				+        return self._path
			
 
				+
			
 
				+    @path.setter
			
 
				+    def path(self, path):
			
 
				+        self._path = path
			
 
				+
			
 
				+    @property
			
 
				+    def fn(self):
			
 
				+        return self._path.stem
			
 
				+
			
 
				+    @property
			
 
				+    def ocr_result(self):
			
 
				+        return self._ocr_result
			
 
				+
			
 
				+    @ocr_result.setter
			
 
				+    def ocr_result(self, value):
			
 
				+        self._ocr_result = value
			
 
				+
			
 
				+    def get_gt_result(self, key):# sourcery skip: merge-duplicate-blocks, remove-redundant-if
			
 
				+        if key == 'orientation':
			
 
				+            if self.is_rotate:
			
 
				+                return self.rotate + 1 if self.rotate is not None else 0
			
 
				+            else:
			
 
				+                return self.gt_result[key]
			
 
				+        elif key in self.gt_result:
			
 
				+            return self.gt_result[key]
			
 
				+        else:
			
 
				+            return None
			
 
				+
			
 
				+    @property
			
 
				+    def json_path(self):
			
 
				+        return self.path.parent / f'{self.path.stem}.json'
			
 
				+
			
 
				+    def save_image(self, img, rotate):
			
 
				+        dst = self.path.parent.parent / (".ro_dst")
			
 
				+        if not dst.exists(): dst.mkdir()
			
 
				+        self.path = dst / f'{self.path.stem}-{rotate + 1}.jpg'
			
 
				+        # print('save image', self.path)
			
 
				+        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
			
 
				+        cv2.imwrite(str(self.path), img)
			
 
				+        return self.path
			
 
				+
			
 
				+    def get_base64(self, rotate=None):
			
 
				+        # print(self.path)
			
 
				+        img = cv2.imread(str(self.path))
			
 
				+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
			
 
				+        path = self.path
			
 
				+        if rotate is not None:
			
 
				+            img = cv2.rotate(img, rotate)
			
 
				+            path = self.save_image(img, rotate)
			
 
				+            # imencode 将图片编码到缓存，并保存到本地
			
 
				+        with open(path, 'rb') as f:
			
 
				+            return base64.encodebytes(f.read()).decode('utf-8')
			
 
				+
			
 
				+    def get_json(self):
			
 
				+        with open(self.json_path, 'r') as f:
			
 
				+            return json.load(f)
			
 
				+
			
 
				+
			
 
				+def send_request(image: Image, ocr_name, ocr_address, image_type=None):
			
 
				+    base64_str = image.get_base64(image.rotate)
			
 
				+    config = OCR_CONFIGS[ocr_name][ocr_address]
			
 
				+    headers = {
			
 
				+        'Content-Type': 'application/json',
			
 
				+        'Authorization': config.token
			
 
				+    }
			
 
				+    data = {
			
 
				+        'image': base64_str,
			
 
				+    }
			
 
				+    if image_type is not None:
			
 
				+        data['image_type'] = image_type
			
 
				+    response = requests.post(config.url, headers=headers, json=data)
			
 
				+    return response.json()
			
 
				+
			
 
				+
			
 
				+def parser_path(path: Path, rotate: bool):
			
 
				+    name = time.strftime("%m-%d_", time.localtime()) + path.name
			
 
				+    if rotate:
			
 
				+        name = f'{name}_R.md'
			
 
				+    return path.parent / name
			
 
				+
			
 
				+
			
 
				+class Dataset(object):
			
 
				+    def __init__(self, images_path, image_type, ocr_name, ocr_address, field, rotate=False):
			
 
				+        self.image_type = image_type
			
 
				+        self.ocr_name = ocr_name
			
 
				+        self.ocr_address = ocr_address
			
 
				+        self.images_path = images_path
			
 
				+        self.image_list = []
			
 
				+        # chain 迭代器，首先返回第一个可迭代对象中所有元素，接着返回下一个可迭代对象中所有元素，直到耗尽所有可迭代对象中的元素
			
 
				+        # eg：chain('ABC', 'DEF') --> A B C D E F
			
 
				+
			
 
				+        for p in chain(*[Path(self.images_path).rglob('*.jpg')]):
			
 
				+            if rotate:
			
 
				+                self.image_list.extend(Image(p, r, rotate) for r in [None, 0, 1, 2])
			
 
				+            else:
			
 
				+                self.image_list.append(Image(p, None, rotate))
			
 
				+
			
 
				+        self.field = Filed.get(field)
			
 
				+
			
 
				+        self.correct = {k: 0 for k in self.field}
			
 
				+        self.error = {k: 0 for k in self.field}
			
 
				+
			
 
				+    def __len__(self):
			
 
				+        return len(self.image_list)
			
 
				+
			
 
				+    def _evaluate_one(self, image: Image):
			
 
				+        def _get_predict(r, key):
			
 
				+            # isinstance() 函数来判断一个对象是否是一个已知的类型
			
 
				+            if isinstance(r[key], dict):
			
 
				+                return r[key]['text']
			
 
				+            else:
			
 
				+                return r[key]
			
 
				+
			
 
				+        if image.rotate is not None: image.gt_result['orientation'] = image.rotate + 1
			
 
				+        r = send_request(image, self.ocr_name, self.ocr_address, self.image_type)
			
 
				+        err_str = ''
			
 
				+        if r['status'] == '000':
			
 
				+            res = r['result']
			
 
				+            for key in self.field:
			
 
				+                # print('attr: ', key)
			
 
				+                if key in res:
			
 
				+                    gt = image.get_gt_result(key)
			
 
				+                    predict = _get_predict(res, key)
			
 
				+                    # print(f'gt: {gt}, predict: {predict}')
			
 
				+                    if predict == gt:
			
 
				+                        self.correct[key] += 1
			
 
				+                    else:
			
 
				+                        image.category = False
			
 
				+                        self.error[key] += 1
			
 
				+                        err_str += f'-------{key}-------<br>正确:{gt}<br>返回:{predict}<br>'
			
 
				+            if image.category:
			
 
				+                image.ocr_result = image.gt_result
			
 
				+            else:
			
 
				+                image.ocr_result = err_str
			
 
				+        else:
			
 
				+            image.ocr_result = r['msg']
			
 
				+            image.category = False
			
 
				+            for key in self.field:
			
 
				+                self.error[key] += 1
			
 
				+
			
 
				+    def __call__(self):  # sourcery skip: yield-from
			
 
				+        # yield 返回一个生成器
			
 
				+        for image in self.image_list:
			
 
				+            yield image
			
 
				+
			
 
				+    # 比较
			
 
				+    def evaluate(self):
			
 
				+        for image in tqdm(self.image_list):
			
 
				+            self._evaluate_one(image)
			
 
				+
			
 
				+    # 计算总体准确度
			
 
				+    @property
			
 
				+    def accuracy(self):
			
 
				+        return sum(list(self.correct.values())) / sum(list(self.correct.values()) + list(self.error.values()))
			
 
				+
			
 
				+    # 计算元素准确度
			
 
				+    @property
			
 
				+    def attrs_accuracy(self):
			
 
				+        return {k: self.correct[k] / (self.correct[k] + self.error[k]) for k in self.field}
			
 
				+
			
 
				+
			
 
				+class MD(object):
			
 
				+    def __init__(self, file_path: Path):
			
 
				+        self.name = file_path.name
			
 
				+        self.f = MdUtils(file_name=str(file_path))
			
 
				+        self.field_table: List = ['字段', '正确率']
			
 
				+        self.true_table: List = ['图片', '识别结果']
			
 
				+        self.false_table: List = ['图片', '识别结果']
			
 
				+        self.write_header(f'{self.name}测试报告')
			
 
				+
			
 
				+    def write_header(self, title, level=1):
			
 
				+        self.f.new_header(level=level, title=title)
			
 
				+
			
 
				+    def write_total_accuracy(self, ds: Dataset):
			
 
				+        def get_format_total_accuracy(ds: Dataset):
			
 
				+            acc = ds.accuracy * 100
			
 
				+            return "{:.2f}%".format(acc)
			
 
				+
			
 
				+        # 1. 拿到format之后的百分数
			
 
				+        res = get_format_total_accuracy(ds)
			
 
				+
			
 
				+        # 2. 写入
			
 
				+        self.f.new_paragraph(res)
			
 
				+
			
 
				+    def write_table_accuracy(self, ds: Dataset, columns=2, text_align='center'):
			
 
				+        def format_table_accuracy(ds: Dataset):
			
 
				+            table = ds.attrs_accuracy
			
 
				+            for k, v in table.items():
			
 
				+                acc = v * 100
			
 
				+                table[k] = "{:.2f}%".format(acc)
			
 
				+            return table
			
 
				+
			
 
				+        def dict_2_list(dic: dict):
			
 
				+            l = []
			
 
				+            for k, v in dic.items():
			
 
				+                l.extend((k, v))
			
 
				+            return l
			
 
				+
			
 
				+        table_dict = format_table_accuracy(ds)
			
 
				+        table_list = dict_2_list(table_dict)
			
 
				+        self.field_table.extend(table_list)
			
 
				+
			
 
				+        rows = len(self.field_table) // columns
			
 
				+        self.f.new_table(columns=columns, rows=rows, text=self.field_table, text_align=text_align)
			
 
				+
			
 
				+    def write_table_result(self, ds: Dataset, columns=2, text_align='center'):
			
 
				+        for image in ds.image_list:
			
 
				+            md_image = self.f.new_inline_image(text='', path=f'{image.path.parent.name}/{image.path.name}')
			
 
				+            if image.category:
			
 
				+                self.true_table.extend([md_image, image.ocr_result])
			
 
				+            else:
			
 
				+                self.false_table.extend([md_image, image.ocr_result])
			
 
				+
			
 
				+        true_rows = len(self.true_table) // columns
			
 
				+        false_rows = len(self.false_table) // columns
			
 
				+        self.write_header('True')
			
 
				+        self.f.new_table(columns=columns, rows=true_rows, text=self.true_table, text_align=text_align)
			
 
				+        self.write_header('False')
			
 
				+        self.f.new_table(columns=columns, rows=false_rows, text=self.false_table, text_align='left')
			
 
				+
			
 
				+# if __name__ == '__main__':
			
 
				+#     markdown = MD('英语等级证书')
			
 
				+#
			
 
				+#     dataset = Dataset(Path(''), 'cet', 'local', False)
			
 
				+#     print(len(dataset))
			
 
				+#     for d in dataset():
			
 
				+#         print(d)
			
 
				+#
			
 
				+#     dataset.evaluate()
			
 
				+#     print(dataset.accuracy)
			
 
				+#
			
 
				+#     markdown.write_total_accuracy(dataset)
			
 
				+#     markdown.write_table_accuracy(dataset)
			
 
				+#     markdown.write_table_result(dataset)
			
 
				+#
			
 
				+#     markdown.f.create_md_file()
			
--- a/HR_OCR/test_script/to_md/ocr_config.py
+++ b/HR_OCR/test_script/to_md/ocr_config.py
@@ -0,0 +1,109 @@
 
				+from dataclasses import dataclass
			
 
				+from typing import List
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class Type:
			
 
				+    image_type: int
			
 
				+    image_field: List
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class RequestConfig:
			
 
				+    url: str
			
 
				+    token: str
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class Configs:
			
 
				+    request: RequestConfig
			
 
				+    type: Type
			
 
				+
			
 
				+
			
 
				+# cet
			
 
				+cet_local_config = RequestConfig(url='http://192.168.199.27:18050/ocr_system/cet', token='')
			
 
				+cet_TXtest_config = RequestConfig(
			
 
				+    url='http://aihubtest.digitalyili.com/aiSquare/openApi/reasoning-services/rlocrxm//cettest/cet',
			
 
				+    token='8ae1e5f1-1337-4f22-8d46-ff4c110d68fd')
			
 
				+cet_TXsb_config = RequestConfig(
			
 
				+    url='http://aihub.digitalyili.com/aiSquare/openApi/reasoning-services/rlocrxm/cet/cet',
			
 
				+    token='dcae8cc6-0e49-4db8-a2d2-94ef84da3636')
			
 
				+cet_DXtest_config = RequestConfig(
			
 
				+    url='http://aihubpre-idc.digitalyili.com/aiSquare/openApi/reasoning-services/hrocr/cettest/cet',
			
 
				+    token='4e00c444-620b-4d3c-85f4-777e64276f0e')
			
 
				+cet_DXsb_config = RequestConfig(
			
 
				+    url='http://aihub-idc.digitalyili.com/aiSquare/openApi/reasoning-services/hrocr/cet/cet',
			
 
				+    token='e045de0a-e97f-4f23-b4d5-6a032c39a81e')
			
 
				+
			
 
				+CET_CONFIGS = {
			
 
				+    'local': cet_local_config,
			
 
				+    'TXtest': cet_TXtest_config,
			
 
				+    'TXsb': cet_TXsb_config,
			
 
				+    'DXtest': cet_DXtest_config,
			
 
				+    'DXsb': cet_DXsb_config
			
 
				+}
			
 
				+
			
 
				+# regbook
			
 
				+regbook_local_config = RequestConfig(url='http://192.168.199.27:18040/ocr_system/regbook', token='')
			
 
				+regbook_TXtest_config = RequestConfig(
			
 
				+    url='http://aihubtest.digitalyili.com/aiSquare/openApi/reasoning-services/rlocrxm//hkbsbtest/regbook',
			
 
				+    token='8ae1e5f1-1337-4f22-8d46-ff4c110d68fd')
			
 
				+regbook_TXsb_config = RequestConfig(
			
 
				+    url='http://aihub.digitalyili.com/aiSquare/openApi/reasoning-services/rlocrxm/hkbsb/regbook',
			
 
				+    token='dcae8cc6-0e49-4db8-a2d2-94ef84da3636')
			
 
				+regbook_DXtest_config = RequestConfig(
			
 
				+    url='http://aihubpre-idc.digitalyili.com/aiSquare/openApi/reasoning-services/hrocr/hkbsbtest/regbook',
			
 
				+    token='4e00c444-620b-4d3c-85f4-777e64276f0e')
			
 
				+regbook_DXsb_config = RequestConfig(
			
 
				+    url='http://aihub-idc.digitalyili.com/aiSquare/openApi/reasoning-services/hrocr/hkbsb/regbook',
			
 
				+    token='e045de0a-e97f-4f23-b4d5-6a032c39a81e')
			
 
				+
			
 
				+REGBOOK_CONFIGS = {
			
 
				+    'local': regbook_local_config,
			
 
				+    'TXtest': regbook_TXtest_config,
			
 
				+    'TXsb': regbook_TXsb_config,
			
 
				+    'DXtest': regbook_DXtest_config,
			
 
				+    'DXsb': regbook_DXsb_config
			
 
				+}
			
 
				+
			
 
				+# business_license
			
 
				+blfe_local_config = RequestConfig(url='http://192.168.199.27:18060/ocr_system/business_license', token='')
			
 
				+blfe_TXtest_config = RequestConfig(
			
 
				+    url='http://aihubtest.digitalyili.com/aiSquare/openApi/reasoning-services/rlocrxm/blfetest/blfe',
			
 
				+    token='8ae1e5f1-1337-4f22-8d46-ff4c110d68fd')
			
 
				+blfe_TXsb_config = RequestConfig(
			
 
				+    url='http://aihub.digitalyili.com/aiSquare/openApi/reasoning-services/rlocrxm/blfe/blfe',
			
 
				+    token='dcae8cc6-0e49-4db8-a2d2-94ef84da3636')
			
 
				+blfe_DXtest_config = RequestConfig(
			
 
				+    url='http://aihubpre-idc.digitalyili.com/aiSquare/openApi/reasoning-services/hrocr/blfetest/blfe',
			
 
				+    token='4e00c444-620b-4d3c-85f4-777e64276f0e')
			
 
				+blfe_DXsb_config = RequestConfig(
			
 
				+    url='http://aihub-idc.digitalyili.com/aiSquare/openApi/reasoning-services/hrocr/blfe/blfe',
			
 
				+    token='e045de0a-e97f-4f23-b4d5-6a032c39a81e')
			
 
				+
			
 
				+BLFE_CONFIGS = {
			
 
				+    'local': blfe_local_config,
			
 
				+    'TXtest': blfe_TXtest_config,
			
 
				+    'TXsb': blfe_TXsb_config,
			
 
				+    'DXtest': blfe_DXtest_config,
			
 
				+    'DXsb': blfe_DXsb_config
			
 
				+}
			
 
				+
			
 
				+OCR_CONFIGS = {
			
 
				+    'cet': CET_CONFIGS,
			
 
				+    'regbook': REGBOOK_CONFIGS,
			
 
				+    'business_license': BLFE_CONFIGS
			
 
				+}
			
 
				+
			
 
				+# 字段
			
 
				+cet_field = ['orientation', 'name', 'id', 'language', 'level', 'exam_time', 'score']
			
 
				+regbook_field = ['orientation', 'name', 'id', 'gender', 'birthplace', 'birthplace_province', 'birthplace_city',
			
 
				+                 'birthplace_region', 'native_place', 'native_place_province', 'native_place_city',
			
 
				+                 'native_place_region', 'blood_type', 'religion']
			
 
				+business_license = ['orientation', 'social_code', 'company_name', 'legal_person', 'registered_capital', 'type',
			
 
				+                    'start_date', 'business_scope', 'expire_date', 'address', 'stamp']
			
 
				+Filed = {
			
 
				+    'cet': cet_field,
			
 
				+    'regbook': regbook_field,
			
 
				+    'business_license': business_license
			
 
				+}
			
--- a/HR_OCR/test_script/to_md/use.py
+++ b/HR_OCR/test_script/to_md/use.py
@@ -0,0 +1,49 @@
 
				+'''
			
 
				+Author: zeke-chin zeke-chin@icloud.com
			
 
				+Date: 2022-09-28 20:28:41
			
 
				+LastEditors: zeke-chin zeke-chin@icloud.com
			
 
				+LastEditTime: 2022-09-30 15:08:48
			
 
				+FilePath: /to_md/HR_OCR/to_md/use.py
			
 
				+Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
			
 
				+'''
			
 
				+
			
 
				+from pathlib import Path
			
 
				+
			
 
				+from new import MD, Image, Dataset, parser_path
			
 
				+
			
 
				+# config
			
 
				+# 图片路径
			
 
				+image_path = Path('/Users/zeke/work/sx/OCR/image_data/户口本9.30/0/img/')
			
 
				+image_type = 0
			
 
				+# 是否旋转
			
 
				+image_rotate = False
			
 
				+ocr_address = 'DXtest'  # 'local' 'TXtest' 'TXsb' 'DXtest' 'DXsb'
			
 
				+
			
 
				+ocr_name = 'cet'  # 'cet' 'idcard' 'bankcard' 'regbook' 'schoolcert' 'business_license'
			
 
				+md_name = 'CET'
			
 
				+filed = 'cet'
			
 
				+
			
 
				+# 若md_path为None 则默认使用图片父路径为markdown保存路径
			
 
				+# md_path = '/Users/zeke/work/sx/OCR/HROCR/to_md/example' or image_path.parent
			
 
				+md_path = None or image_path.parent
			
 
				+
			
 
				+md_file = parser_path(Path(md_path) / Path(md_name + image_path.stem), image_rotate)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    markdown = MD(md_file)
			
 
				+
			
 
				+    dataset = Dataset(image_path, image_type, ocr_name, ocr_address, filed, image_rotate)
			
 
				+    print(len(dataset))
			
 
				+    for d in dataset():
			
 
				+        print(d)
			
 
				+
			
 
				+    dataset.evaluate()
			
 
				+    print(dataset.accuracy)
			
 
				+
			
 
				+    markdown.write_total_accuracy(dataset)
			
 
				+    markdown.write_table_accuracy(dataset)
			
 
				+    markdown.write_table_result(dataset)
			
 
				+
			
 
				+    print(md_file)
			
 
				+    markdown.f.create_md_file()
			
--- a/HR_OCR/test_script/tools/convert_json.py
+++ b/HR_OCR/test_script/tools/convert_json.py
@@ -0,0 +1,53 @@
 
				+from pathlib import Path
			
 
				+
			
 
				+import requests
			
 
				+import json
			
 
				+import base64
			
 
				+from itertools import chain
			
 
				+
			
 
				+url = 'http://aihubpre-idc.digitalyili.com/aiSquare/openApi/reasoning-services/hrocr'
			
 
				+imgs_path = './HR_OCR/to_md/example/img'
			
 
				+
			
 
				+def send_request(img_path, image_type = 0):
			
 
				+    with open(img_path, 'rb') as f:
			
 
				+        img_str: str = base64.encodebytes(f.read()).decode('utf-8')
			
 
				+        data = {
			
 
				+            'image': img_str,
			
 
				+            'image_type': image_type
			
 
				+        }
			
 
				+        idc_header = {
			
 
				+            'Content-Type': 'application/json',
			
 
				+            'Authorization': 'Bearer 4e00c444-620b-4d3c-85f4-777e64276f0e'
			
 
				+        }
			
 
				+        r = requests.post(f'{url}/cettest/cet', json=data, headers=idc_header)
			
 
				+        # r = requests.post(f'{url}/hkbsbtest/regbook', json=data)
			
 
				+        print(r.json())
			
 
				+        return r.json()
			
 
				+
			
 
				+
			
 
				+def _parse_result(r):
			
 
				+    if r['status'] == '000':
			
 
				+        r = r['result']
			
 
				+        if r:
			
 
				+            del r['confidence']
			
 
				+        return {k: v['text'] if isinstance(v, dict) else v for k, v in r.items()}
			
 
				+    elif r['status'] == '101':
			
 
				+        return "101"
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+
			
 
				+    # 0
			
 
				+    # img_paths = chain(*[Path(root / imgs_path).rglob(f'*.{ext}') for ext in ['jpeg', 'jpg', 'png', 'JPG', 'PNG']])
			
 
				+    img_paths = chain(*[Path(imgs_path).rglob(f'*.{ext}') for ext in ['jpg']])
			
 
				+    for img_path in img_paths:
			
 
				+        print(img_path)
			
 
				+        r = send_request(img_path)
			
 
				+        res = _parse_result(r)
			
 
				+        print(res)
			
 
				+        img_path: Path = img_path
			
 
				+        d = img_path.parent
			
 
				+        fn = f'{img_path.stem}.json'
			
 
				+
			
 
				+        with (d / fn).open('w', encoding='utf-8') as f:
			
 
				+            json.dump(res, f, ensure_ascii=False, indent=4)
			
--- a/HR_OCR/test_script/tools/suffix.py
+++ b/HR_OCR/test_script/tools/suffix.py
@@ -0,0 +1,28 @@
 
				+import os
			
 
				+from pathlib import Path
			
 
				+from itertools import chain
			
 
				+import sys
			
 
				+
			
 
				+# conf
			
 
				+target_path = './HR_OCR/to_md/example/img'
			
 
				+#suffix = sys.argv[2]
			
 
				+suffix = 'jpg' if len(sys.argv) != 3 else sys.argv[2]
			
 
				+
			
 
				+def get_range(n):
			
 
				+    len_n = len(str(n))
			
 
				+    for num in range(1, n + 1):
			
 
				+        output = str(num)
			
 
				+        while len(output) < len_n:
			
 
				+            output = f'0{output}'
			
 
				+        yield output
			
 
				+
			
 
				+
			
 
				+file_paths = list(chain(*[Path(target_path).rglob(f'*.{ext}') for ext in ['jpeg', 'jpg', 'png', 'JPG', 'PNG']]))
			
 
				+print(len(file_paths))
			
 
				+num = len(file_paths)
			
 
				+file_name_list = list(get_range(num))
			
 
				+for i in range(num):
			
 
				+    file = file_paths[i]
			
 
				+    print(file)
			
 
				+    new = file.parent / f'{file_name_list[i]}_img.{suffix}'
			
 
				+    file.rename(new)
			
--- a/HR_OCR/to_md/example/img/1_img.jpg
+++ b/HR_OCR/to_md/example/img/1_img.jpg
--- a/HR_OCR/to_md/example/img/2_img.jpg
+++ b/HR_OCR/to_md/example/img/2_img.jpg
--- a/HR_OCR/to_md/use.py
+++ b/HR_OCR/to_md/use.py
@@ -17,11 +17,11 @@ image_path = Path('/Users/zeke/work/sx/OCR/image_data/户口本9.30/0/img/')
 
				 image_type = 0
			
 
				 # 是否旋转
			
 
				 image_rotate = False
			
 
				-ocr_address = 'local'  # 'local' 'TXtest' 'TXsb' 'DXtest' 'DXsb'
			
 
				+ocr_address = 'DXtest'  # 'local' 'TXtest' 'TXsb' 'DXtest' 'DXsb'
			
 
				 
			
 
				-ocr_name = 'regbook'  # 'cet' 'idcard' 'bankcard' 'regbook' 'schoolcert''business_license'
			
 
				-md_name = 'RegBook'
			
 
				-filed = 'regbook'
			
 
				+ocr_name = 'cet'  # 'cet' 'idcard' 'bankcard' 'regbook' 'schoolcert' 'business_license'
			
 
				+md_name = 'CET'
			
 
				+filed = 'cet'
			
 
				 
			
 
				 # 若md_path为None 则默认使用图片父路径为markdown保存路径
			
 
				 # md_path = '/Users/zeke/work/sx/OCR/HROCR/to_md/example' or image_path.parent
			
--- a/HR_OCR/tools/convert_json.py
+++ b/HR_OCR/tools/convert_json.py
@@ -5,8 +5,8 @@ import json
 
				 import base64
			
 
				 from itertools import chain
			
 
				 
			
 
				-url = 'http://192.168.199.27:18040'
			
 
				-imgs_path = '../img'
			
 
				+url = 'http://aihubpre-idc.digitalyili.com/aiSquare/openApi/reasoning-services/hrocr'
			
 
				+imgs_path = './HR_OCR/to_md/example/img'
			
 
				 
			
 
				 def send_request(img_path, image_type = 0):
			
 
				     with open(img_path, 'rb') as f:
			
@@ -15,11 +15,12 @@ def send_request(img_path, image_type = 0):
 
				             'image': img_str,
			
 
				             'image_type': image_type
			
 
				         }
			
 
				-        # idc_header = {
			
 
				-        #     'Content-Type': 'application/json',
			
 
				-        #     'Authorization': 'Bearer 4e00c444-620b-4d3c-85f4-777e64276f0e'
			
 
				-        # }
			
 
				-        r = requests.post(f'{url}/ocr_system/regbook', json=data)
			
 
				+        idc_header = {
			
 
				+            'Content-Type': 'application/json',
			
 
				+            'Authorization': 'Bearer 4e00c444-620b-4d3c-85f4-777e64276f0e'
			
 
				+        }
			
 
				+        r = requests.post(f'{url}/cettest/cet', json=data, headers=idc_header)
			
 
				+        # r = requests.post(f'{url}/hkbsbtest/regbook', json=data)
			
 
				         print(r.json())
			
 
				         return r.json()
			
 
				 
			
@@ -37,10 +38,8 @@ def _parse_result(r):
 
				 if __name__ == '__main__':
			
 
				 
			
 
				     # 0
			
 
				-    root = Path(__file__).parent
			
 
				-    print(root)
			
 
				     # img_paths = chain(*[Path(root / imgs_path).rglob(f'*.{ext}') for ext in ['jpeg', 'jpg', 'png', 'JPG', 'PNG']])
			
 
				-    img_paths = chain(*[Path(root / imgs_path).rglob(f'*.{ext}') for ext in ['jpg']])
			
 
				+    img_paths = chain(*[Path(imgs_path).rglob(f'*.{ext}') for ext in ['jpg']])
			
 
				     for img_path in img_paths:
			
 
				         print(img_path)
			
 
				         r = send_request(img_path)
			
--- a/HR_OCR/tools/suffix.py
+++ b/HR_OCR/tools/suffix.py
@@ -4,7 +4,7 @@ from itertools import chain
 
				 import sys
			
 
				 
			
 
				 # conf
			
 
				-target_path = '../户口本/0'
			
 
				+target_path = './HR_OCR/to_md/example/img'
			
 
				 #suffix = sys.argv[2]
			
 
				 suffix = 'jpg' if len(sys.argv) != 3 else sys.argv[2]
			
 
				 
			
@@ -18,6 +18,7 @@ def get_range(n):
 
				 
			
 
				 
			
 
				 file_paths = list(chain(*[Path(target_path).rglob(f'*.{ext}') for ext in ['jpeg', 'jpg', 'png', 'JPG', 'PNG']]))
			
 
				+print(len(file_paths))
			
 
				 num = len(file_paths)
			
 
				 file_name_list = list(get_range(num))
			
 
				 for i in range(num):