1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253 |
- from pathlib import Path
- import requests
- import json
- import base64
- from itertools import chain
- url = 'http://aihubpre-idc.digitalyili.com/aiSquare/openApi/reasoning-services/hrocr'
- imgs_path = '/Users/zeke/Downloads/户口本测试样本1011-常住人口页'
- def send_request(img_path):
- with open(img_path, 'rb') as f:
- img_str: str = base64.encodebytes(f.read()).decode('utf-8')
- data = {
- 'image': img_str
- }
- idc_header = {
- 'Content-Type': 'application/json',
- 'Authorization': 'Bearer 4e00c444-620b-4d3c-85f4-777e64276f0e'
- }
- r = requests.post(f'{url}/hkbsbtest/regbook', json=data, headers=idc_header)
- # r = requests.post(f'{url}/hkbsbtest/regbook', json=data)
- print(r.json())
- return r.json()
- def _parse_result(r):
- if r['status'] == '000':
- r = r['result']
- if r:
- del r['confidence']
- return {k: v['text'] if isinstance(v, dict) else v for k, v in r.items()}
- elif r['status'] == '101':
- return "101"
- if __name__ == '__main__':
- # 0
- # img_paths = chain(*[Path(root / imgs_path).rglob(f'*.{ext}') for ext in ['jpeg', 'jpg', 'png', 'JPG', 'PNG']])
- img_paths = chain(*[Path(imgs_path).rglob(f'*.{ext}') for ext in ['jpg']])
- for img_path in img_paths:
- print(img_path)
- r = send_request(img_path)
- res = _parse_result(r)
- print(res)
- img_path: Path = img_path
- d = img_path.parent
- fn = f'{img_path.stem}.json'
- with (d / fn).open('w', encoding='utf-8') as f:
- json.dump(res, f, ensure_ascii=False, indent=4)
|