hace 2 años · b5ca515636
--- a/TestAllOcr/config.py
+++ b/TestAllOcr/config.py
@@ -0,0 +1,39 @@
 
				+import base64
			
 
				+import requests
			
 
				+
			
 
				+# 测试url
			
 
				+test_url = 'http://aihub-test.digitalyili.com/aiSquare/openApi/reasoning-services/rlocrxm'
			
 
				+sb_url = 'http://aihub.digitalyili.com/aiSquare/openApi/reasoning-services/rlocrxm'
			
 
				+idc_url = 'http://aihubpre-idc.digitalyili.com/aiSquare/openApi/reasoning-services/hrocr'
			
 
				+
			
 
				+URL = {
			
 
				+    'test': test_url,
			
 
				+    'sb': sb_url,
			
 
				+    'idc': idc_url
			
 
				+}
			
 
				+
			
 
				+# 测试token
			
 
				+test_header = {
			
 
				+    'Content-Type': 'application/json',
			
 
				+    'Authorization': 'Bearer 9679c2b3-b90b-4029-a3c7-f347b4d242f7'
			
 
				+}
			
 
				+sb_header = {
			
 
				+    'Content-Type': 'application/json',
			
 
				+    'Authorization': 'Bearer dcae8cc6-0e49-4db8-a2d2-94ef84da3636'
			
 
				+}
			
 
				+idc_header = {
			
 
				+    'Content-Type': 'application/json',
			
 
				+    'Authorization': 'Bearer 4e00c444-620b-4d3c-85f4-777e64276f0e'
			
 
				+}
			
 
				+
			
 
				+TOKEN = {
			
 
				+    'test': test_header,
			
 
				+    'sb': sb_header,
			
 
				+    'idc': idc_header
			
 
				+}
			
 
				+
			
 
				+def send_request(image_path, suffix, URL, TOKEN):
			
 
				+    with open(image_path, 'rb') as f:
			
 
				+        img_str: str = base64.encodebytes(f.read()).decode('utf-8')
			
 
				+        r = requests.post(f'{URL}{suffix}', json={'image': img_str, 'image_type': '0'}, headers=TOKEN)
			
 
				+        return r.json()
			
--- a/TestAllOcr/image/bankcard.jpg
+++ b/TestAllOcr/image/bankcard.jpg
--- a/TestAllOcr/image/bankcard.json
+++ b/TestAllOcr/image/bankcard.json
@@ -0,0 +1 @@
 
				+"101"
			
--- a/TestAllOcr/image/cet.jpg
+++ b/TestAllOcr/image/cet.jpg
--- a/TestAllOcr/image/cet.json
+++ b/TestAllOcr/image/cet.json
@@ -0,0 +1,9 @@
 
				+{
			
 
				+    "orientation": 0,
			
 
				+    "name": "鉴康",
			
 
				+    "id": "152801200003178527",
			
 
				+    "language": "英语",
			
 
				+    "level": "CET4",
			
 
				+    "exam_time": "2021年6月",
			
 
				+    "score": "451"
			
 
				+}
			
--- a/TestAllOcr/image/idcard.jpg
+++ b/TestAllOcr/image/idcard.jpg
--- a/TestAllOcr/image/idcard.json
+++ b/TestAllOcr/image/idcard.json
@@ -0,0 +1 @@
 
				+"101"
			
--- a/TestAllOcr/image/regbook.jpg
+++ b/TestAllOcr/image/regbook.jpg
--- a/TestAllOcr/image/regbook.json
+++ b/TestAllOcr/image/regbook.json
@@ -0,0 +1 @@
 
				+"101"
			
--- a/TestAllOcr/image/schoolcert.jpg
+++ b/TestAllOcr/image/schoolcert.jpg
--- a/TestAllOcr/image/schoolcert.json
+++ b/TestAllOcr/image/schoolcert.json
@@ -0,0 +1 @@
 
				+"101"
			
--- a/TestAllOcr/test_interface.py
+++ b/TestAllOcr/test_interface.py
@@ -0,0 +1,44 @@
 
				+import base64
			
 
				+from pathlib import Path
			
 
				+import unittest
			
 
				+import config
			
 
				+from config import send_request
			
 
				+
			
 
				+image_path = 'image'
			
 
				+
			
 
				+# test, sb ,idc
			
 
				+url = config.URL['idc']
			
 
				+token = config.TOKEN['idc']
			
 
				+
			
 
				+class TestOcr(unittest.TestCase):
			
 
				+
			
 
				+    def _helper(self, image_path, suffix):
			
 
				+        root = Path(__file__).parent
			
 
				+        image_path = str(root / image_path)
			
 
				+        r = send_request(image_path, suffix, url, token)
			
 
				+        self.assertIn('result', r, f'{suffix} test error')
			
 
				+
			
 
				+    def test_bank(self):
			
 
				+        image_path = './image/bankcard.jpg'
			
 
				+        suffix = '/yhksb/bankcard'
			
 
				+        self._helper(image_path, suffix)
			
 
				+
			
 
				+    def test_idcard(self):
			
 
				+        image_path = './image/idcard.jpg'
			
 
				+        suffix = '/sfzsb/idcard'
			
 
				+        self._helper(image_path, suffix)
			
 
				+
			
 
				+    def test_regbook(self):
			
 
				+        image_path = './image/regbook.jpg'
			
 
				+        suffix = '/hkbsb/regbook'
			
 
				+        self._helper(image_path, suffix)
			
 
				+
			
 
				+    def test_schoolcert(self):
			
 
				+        image_path = './image/schoolcert.jpg'
			
 
				+        suffix = '/xxw/schoolcert'
			
 
				+        self._helper(image_path, suffix)
			
 
				+
			
 
				+    def test_cet(self):
			
 
				+        image_path = './image/cet.jpg'
			
 
				+        suffix = '/cet/cet'
			
 
				+        self._helper(image_path, suffix)
			
--- a/convert_json.py
+++ b/convert_json.py
@@ -0,0 +1,54 @@
 
				+from pathlib import Path
			
 
				+
			
 
				+import requests
			
 
				+import json
			
 
				+import base64
			
 
				+from itertools import chain
			
 
				+
			
 
				+url = 'http://192.168.199.27:18040'
			
 
				+imgs_path = '户口本/首页'
			
 
				+
			
 
				+def send_request(img_path, image_type = 1):
			
 
				+    with open(img_path, 'rb') as f:
			
 
				+        img_str: str = base64.encodebytes(f.read()).decode('utf-8')
			
 
				+        data = {
			
 
				+            'image': img_str,
			
 
				+            'image_type': image_type
			
 
				+        }
			
 
				+        # idc_header = {
			
 
				+        #     'Content-Type': 'application/json',
			
 
				+        #     'Authorization': 'Bearer 4e00c444-620b-4d3c-85f4-777e64276f0e'
			
 
				+        # }
			
 
				+        r = requests.post(f'{url}/ocr_system/regbook', json=data)
			
 
				+        print(r.json())
			
 
				+        return r.json()
			
 
				+
			
 
				+
			
 
				+def _parse_result(r):
			
 
				+    if r['status'] == '000':
			
 
				+        r = r['result']
			
 
				+        if r:
			
 
				+            del r['confidence']
			
 
				+        return {k: v['text'] if isinstance(v, dict) else v for k, v in r.items()}
			
 
				+    elif r['status'] == '101':
			
 
				+        return "101"
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+
			
 
				+    # 0
			
 
				+    root = Path(__file__).parent
			
 
				+    print(root)
			
 
				+    # img_paths = chain(*[Path(root / imgs_path).rglob(f'*.{ext}') for ext in ['jpeg', 'jpg', 'png', 'JPG', 'PNG']])
			
 
				+    img_paths = chain(*[Path(root / imgs_path).rglob(f'*.{ext}') for ext in ['jpg']])
			
 
				+    for img_path in img_paths:
			
 
				+        print(img_path)
			
 
				+        r = send_request(img_path)
			
 
				+        res = _parse_result(r)
			
 
				+        print(res)
			
 
				+        img_path: Path = img_path
			
 
				+        d = img_path.parent
			
 
				+        fn = f'{img_path.stem}.json'
			
 
				+
			
 
				+        with (d / fn).open('w', encoding='utf-8') as f:
			
 
				+            json.dump(res, f, ensure_ascii=False, indent=4)
			
--- a/example/.ro_dst/20-1.jpg
+++ b/example/.ro_dst/20-1.jpg
--- a/example/.ro_dst/20-2.jpg
+++ b/example/.ro_dst/20-2.jpg
--- a/example/.ro_dst/20-3.jpg
+++ b/example/.ro_dst/20-3.jpg
--- a/example/.ro_dst/25_img-1.jpg
+++ b/example/.ro_dst/25_img-1.jpg
--- a/example/.ro_dst/25_img-2.jpg
+++ b/example/.ro_dst/25_img-2.jpg
--- a/example/.ro_dst/25_img-3.jpg
+++ b/example/.ro_dst/25_img-3.jpg
--- a/example/08-23_CET-tem_R.md
+++ b/example/08-23_CET-tem_R.md
@@ -1,35 +0,0 @@
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-# 08-23_CET-tem_R.md测试报告
			
 
				-
			
 
				-
			
 
				-92.86%
			
 
				-|字段|正确率|
			
 
				-| :---: | :---: |
			
 
				-|orientation|100.00%|
			
 
				-|name|50.00%|
			
 
				-|id|100.00%|
			
 
				-|language|100.00%|
			
 
				-|level|100.00%|
			
 
				-|exam_time|100.00%|
			
 
				-|score|100.00%|
			
 
				-
			
 
				-# True
			
 
				-
			
 
				-|图片|识别结果|
			
 
				-| :---: | :---: |
			
 
				-|![](img/20.jpg)|{'orientation': 0, 'name': '于海龙', 'id': '150424199401132415', 'language': '英语', 'level': 'CET4', 'exam_time': '2013年6月', 'score': '438'}|
			
 
				-|![](.ro_dst/20-1.jpg)|{'orientation': 1, 'name': '于海龙', 'id': '150424199401132415', 'language': '英语', 'level': 'CET4', 'exam_time': '2013年6月', 'score': '438'}|
			
 
				-|![](.ro_dst/20-2.jpg)|{'orientation': 2, 'name': '于海龙', 'id': '150424199401132415', 'language': '英语', 'level': 'CET4', 'exam_time': '2013年6月', 'score': '438'}|
			
 
				-|![](.ro_dst/20-3.jpg)|{'orientation': 3, 'name': '于海龙', 'id': '150424199401132415', 'language': '英语', 'level': 'CET4', 'exam_time': '2013年6月', 'score': '438'}|
			
 
				-
			
 
				-# False
			
 
				-
			
 
				-|图片|识别结果|
			
 
				-| :---: | :---: |
			
 
				-|![](img/25_img.jpg)|正确:徐潇然<br>返回:徐然<br>|
			
 
				-|![](.ro_dst/25_img-1.jpg)|正确:徐潇然<br>返回:徐然<br>|
			
 
				-|![](.ro_dst/25_img-2.jpg)|正确:徐潇然<br>返回:徐然<br>|
			
 
				-|![](.ro_dst/25_img-3.jpg)|正确:徐潇然<br>返回:徐然<br>|
			
--- a/example/img/20.jpg
+++ b/example/img/20.jpg
--- a/example/img/20.json
+++ b/example/img/20.json
@@ -1,9 +0,0 @@
 
				-{
			
 
				-    "orientation": 0,
			
 
				-    "name": "于海龙",
			
 
				-    "id": "150424199401132415",
			
 
				-    "language": "英语",
			
 
				-    "level": "CET4",
			
 
				-    "exam_time": "2013年6月",
			
 
				-    "score": "438"
			
 
				-}
			
--- a/example/img/25_img.jpg
+++ b/example/img/25_img.jpg
--- a/example/img/25_img.json
+++ b/example/img/25_img.json
@@ -1,9 +0,0 @@
 
				-{
			
 
				-    "orientation": 0,
			
 
				-    "name": "徐潇然",
			
 
				-    "id": "210404199610292117",
			
 
				-    "language": "英语",
			
 
				-    "level": "CET6",
			
 
				-    "exam_time": "2018年06月",
			
 
				-    "score": "435"
			
 
				-}
			
--- a/ocr_config.py
+++ b/ocr_config.py
@@ -20,7 +20,7 @@ class Configs:
 
				 
			
 
				 
			
 
				 # cet
			
 
				-cet_local_config = RequestConfig(url='http://192.168.199.249:18050/ocr_system/cet', token='')
			
 
				+cet_local_config = RequestConfig(url='http://192.168.199.27:18050/ocr_system/cet', token='')
			
 
				 cet_test_config = RequestConfig(
			
 
				     url='http://aihub-test.digitalyili.com/aiSquare/openApi/reasoning-services/rlocrxm/cettest/cet',
			
 
				     token='9679c2b3-b90b-4029-a3c7-f347b4d242f7')
			
@@ -35,7 +35,7 @@ CET_CONFIGS = {
 
				 }
			
 
				 
			
 
				 # regbook
			
 
				-regbook_local_config = RequestConfig(url='http://192.168.199.249:18040/ocr_system/regbook', token='')
			
 
				+regbook_local_config = RequestConfig(url='http://192.168.199.27:18040/ocr_system/regbook', token='')
			
 
				 regbook_test_config = RequestConfig(
			
 
				     url='http://aihub-test.digitalyili.com/aiSquare/openApi/reasoning-services/rlocrxm/hkbsb/regbook',
			
 
				     token='9679c2b3-b90b-4029-a3c7-f347b4d242f7')
			
--- a/suffix.py
+++ b/suffix.py
@@ -0,0 +1,27 @@
 
				+import os
			
 
				+from pathlib import Path
			
 
				+from itertools import chain
			
 
				+import sys
			
 
				+
			
 
				+# conf
			
 
				+target_path = '户口本'
			
 
				+#suffix = sys.argv[2]
			
 
				+suffix = 'jpg' if len(sys.argv) != 3 else sys.argv[2]
			
 
				+
			
 
				+def get_range(n):
			
 
				+    len_n = len(str(n))
			
 
				+    for num in range(1, n + 1):
			
 
				+        output = str(num)
			
 
				+        while len(output) < len_n:
			
 
				+            output = f'0{output}'
			
 
				+        yield output
			
 
				+
			
 
				+
			
 
				+file_paths = list(chain(*[Path(target_path).rglob(f'*.{ext}') for ext in ['jpeg', 'jpg', 'png', 'JPG', 'PNG']]))
			
 
				+num = len(file_paths)
			
 
				+file_name_list = list(get_range(num))
			
 
				+for i in range(num):
			
 
				+    file = file_paths[i]
			
 
				+    print(file)
			
 
				+    new = file.parent / f'{file_name_list[i]}_img.{suffix}'
			
 
				+    file.rename(new)
			
--- a/use.py
+++ b/use.py
@@ -5,14 +5,14 @@ from new import MD, Image, Dataset, parser_path
 
				 
			
 
				 # config
			
 
				 # 图片路径
			
 
				-image_path = Path('/Users/zeke/Downloads/9.1/专四/img')
			
 
				-image_type = 0
			
 
				+image_path = Path('/Users/sxkj/Desktop/测试文档/四六级/9.8/身份证/img')
			
 
				+image_type = 1
			
 
				 # 是否旋转
			
 
				-image_rotate = False
			
 
				+image_rotate = True
			
 
				 ocr_address = 'local'  # 'local' 'test' 'sb'
			
 
				-ocr_name = 'cet'  # 'cet' 'idcard' 'bankcard' 'regbook' 'schoolcert'
			
 
				-md_name = 'CET'
			
 
				-filed = 'cet'
			
 
				+ocr_name = 'regbook'  # 'cet' 'idcard' 'bankcard' 'regbook' 'schoolcert'
			
 
				+md_name = 'RegBook'
			
 
				+filed = 'regbook'
			
 
				 # 若md_path为None 则默认使用图片父路径为markdown保存路径
			
 
				 # md_path = '/Users/zeke/work/sx/OCR/HROCR/to_md/example' or image_path.parent
			
 
				 md_path = None or image_path.parent