convert_json.py 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. from pathlib import Path
  2. import requests
  3. import json
  4. import base64
  5. from itertools import chain
  6. url = 'http://aihubpre-idc.digitalyili.com/aiSquare/openApi/reasoning-services/hrocr'
  7. imgs_path = '/Users/zeke/Downloads/户口本测试样本1011-常住人口页'
  8. def send_request(img_path):
  9. with open(img_path, 'rb') as f:
  10. img_str: str = base64.encodebytes(f.read()).decode('utf-8')
  11. data = {
  12. 'image': img_str
  13. }
  14. idc_header = {
  15. 'Content-Type': 'application/json',
  16. 'Authorization': 'Bearer 4e00c444-620b-4d3c-85f4-777e64276f0e'
  17. }
  18. r = requests.post(f'{url}/hkbsbtest/regbook', json=data, headers=idc_header)
  19. # r = requests.post(f'{url}/hkbsbtest/regbook', json=data)
  20. print(r.json())
  21. return r.json()
  22. def _parse_result(r):
  23. if r['status'] == '000':
  24. r = r['result']
  25. if r:
  26. del r['confidence']
  27. return {k: v['text'] if isinstance(v, dict) else v for k, v in r.items()}
  28. elif r['status'] == '101':
  29. return "101"
  30. if __name__ == '__main__':
  31. # 0
  32. # img_paths = chain(*[Path(root / imgs_path).rglob(f'*.{ext}') for ext in ['jpeg', 'jpg', 'png', 'JPG', 'PNG']])
  33. img_paths = chain(*[Path(imgs_path).rglob(f'*.{ext}') for ext in ['jpg']])
  34. for img_path in img_paths:
  35. print(img_path)
  36. r = send_request(img_path)
  37. res = _parse_result(r)
  38. print(res)
  39. img_path: Path = img_path
  40. d = img_path.parent
  41. fn = f'{img_path.stem}.json'
  42. with (d / fn).open('w', encoding='utf-8') as f:
  43. json.dump(res, f, ensure_ascii=False, indent=4)