Explorar el Código

等待被修复: 户口本多类型导致 to_md 不准确

zeke-chin hace 2 años
padre
commit
0fd86add65
Se han modificado 4 ficheros con 37 adiciones y 15 borrados
  1. 6 2
      HR_OCR/to_md/ocr_config.py
  2. 6 4
      HR_OCR/to_md/use.py
  3. 16 8
      HR_OCR/tools/convert_json.py
  4. 9 1
      HR_OCR/tools/suffix.py

+ 6 - 2
HR_OCR/to_md/ocr_config.py

@@ -46,7 +46,7 @@ CET_CONFIGS = {
 # regbook
 regbook_local_config = RequestConfig(url='http://192.168.199.27:18040/ocr_system/regbook', token='')
 regbook_TXtest_config = RequestConfig(
-    url='http://aihubtest.digitalyili.com/aiSquare/openApi/reasoning-services/rlocrxm//hkbsbtest/regbook',
+    url='http://aihubtest.digitalyili.com/aiSquare/openApi/reasoning-services/rlocrxm/hkbsbtest/regbook',
     token='8ae1e5f1-1337-4f22-8d46-ff4c110d68fd')
 regbook_TXsb_config = RequestConfig(
     url='http://aihub.digitalyili.com/aiSquare/openApi/reasoning-services/rlocrxm/hkbsb/regbook',
@@ -97,11 +97,15 @@ OCR_CONFIGS = {
 
 # 字段
 cet_field = ['orientation', 'name', 'id', 'language', 'level', 'exam_time', 'score']
-regbook_field = ['orientation', 'name', 'id', 'gender', 'birthplace', 'birthplace_province', 'birthplace_city',
+regbook_field = ['orientation', 'type', 'address', 'address_province', 'address_city', 'address_region',
+                 'address_detail', 'name', 'id', 'gender', 'birthplace', 'birthplace_province', 'birthplace_city',
                  'birthplace_region', 'native_place', 'native_place_province', 'native_place_city',
                  'native_place_region', 'blood_type', 'religion']
+
 business_license = ['orientation', 'social_code', 'company_name', 'legal_person', 'registered_capital', 'type',
                     'start_date', 'business_scope', 'expire_date', 'address', 'stamp']
+# business_license = ['orientation', 'social_code', 'company_name', 'legal_person', 'registered_capital', 'type',
+#                     'start_date', 'business_scope', 'expire_date', 'address']
 Filed = {
     'cet': cet_field,
     'regbook': regbook_field,

+ 6 - 4
HR_OCR/to_md/use.py

@@ -2,24 +2,26 @@
 Author: zeke-chin zeke-chin@icloud.com
 Date: 2022-09-28 20:28:41
 LastEditors: zeke-chin zeke-chin@icloud.com
-LastEditTime: 2022-10-10 21:14:29
+LastEditTime: 2022-10-12 21:15:26
 FilePath: /to_md/HR_OCR/to_md/use.py
 Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
 '''
 
 from pathlib import Path
+from tkinter import image_types
 
 from new import MD, Image, Dataset, parser_path
 
 # config
 # 图片路径
-image_path = Path('/Users/zeke/Downloads/户口本测试样本1011-常住人口页')
+image_path = Path('/Users/zeke/work/sx/OCR/image_data/RE_1012/户口本测试样本1011-首页')
+image_type = 1
 # 是否旋转
 image_rotate = False
-ocr_address = 'DXtest'  # 'local' 'TXtest' 'TXsb' 'DXtest' 'DXsb'
+ocr_address = 'local'  # 'local' 'TXtest' 'TXsb' 'DXtest' 'DXsb'
 
 ocr_name = 'regbook'  # 'cet' 'idcard' 'bankcard' 'regbook' 'schoolcert' 'business_license'
-md_name = 'RBook'
+md_name = 'RE_1.7'
 filed = 'regbook'
 
 # 若md_path为None 则默认使用图片父路径为markdown保存路径

+ 16 - 8
HR_OCR/tools/convert_json.py

@@ -1,3 +1,11 @@
+'''
+Author: zeke-chin zeke-chin@icloud.com
+Date: 2022-10-11 16:38:18
+LastEditors: zeke-chin zeke-chin@icloud.com
+LastEditTime: 2022-10-12 17:43:29
+FilePath: /test_script/HR_OCR/tools/convert_json.py
+Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
+'''
 from pathlib import Path
 
 import requests
@@ -5,8 +13,8 @@ import json
 import base64
 from itertools import chain
 
-url = 'http://aihubpre-idc.digitalyili.com/aiSquare/openApi/reasoning-services/hrocr'
-imgs_path = '/Users/zeke/Downloads/户口本测试样本1011-常住人口页'
+url = 'http://192.168.199.27:18060'
+imgs_path = '/Users/zeke/work/sx/OCR/image_data/营业执照90'
 
 
 def send_request(img_path):
@@ -15,12 +23,12 @@ def send_request(img_path):
         data = {
             'image': img_str
         }
-        idc_header = {
-            'Content-Type': 'application/json',
-            'Authorization': 'Bearer 4e00c444-620b-4d3c-85f4-777e64276f0e'
-        }
-        r = requests.post(f'{url}/hkbsbtest/regbook', json=data, headers=idc_header)
-        # r = requests.post(f'{url}/hkbsbtest/regbook', json=data)
+        # idc_header = {
+        #     'Content-Type': 'application/json',
+        #     'Authorization': 'Bearer 4e00c444-620b-4d3c-85f4-777e64276f0e'
+        # }
+        # r = requests.post(f'{url}/ocr_system/business_license', json=data, headers=idc_header)
+        r = requests.post(f'{url}/ocr_system/business_license', json=data)
         print(r.json())
         return r.json()
 

+ 9 - 1
HR_OCR/tools/suffix.py

@@ -1,10 +1,18 @@
+'''
+Author: zeke-chin zeke-chin@icloud.com
+Date: 2022-10-11 16:38:18
+LastEditors: zeke-chin zeke-chin@icloud.com
+LastEditTime: 2022-10-12 17:53:30
+FilePath: /test_script/HR_OCR/tools/suffix.py
+Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
+'''
 import os
 from pathlib import Path
 from itertools import chain
 import sys
 
 # conf
-target_path = '/Users/zeke/Downloads/户口本测试样本1011-常住人口页'
+target_path = '/Users/zeke/work/sx/OCR/image_data/营业执照90'
 #suffix = sys.argv[2]
 suffix = 'jpg' if len(sys.argv) != 3 else sys.argv[2]