convert_json.py 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. from pathlib import Path
  2. import requests
  3. import json
  4. import base64
  5. from itertools import chain
  6. url = 'http://aihubpre-idc.digitalyili.com/aiSquare/openApi/reasoning-services/hrocr'
  7. imgs_path = '/Users/zeke/Downloads/户口本测试样本1011-常住人口页'
  8. def send_request(img_path, image_type = 0):
  9. with open(img_path, 'rb') as f:
  10. img_str: str = base64.encodebytes(f.read()).decode('utf-8')
  11. data = {
  12. 'image': img_str,
  13. 'image_type': image_type
  14. }
  15. idc_header = {
  16. 'Content-Type': 'application/json',
  17. 'Authorization': 'Bearer 4e00c444-620b-4d3c-85f4-777e64276f0e'
  18. }
  19. r = requests.post(f'{url}/hkbsbtest/regbook', json=data, headers=idc_header)
  20. # r = requests.post(f'{url}/hkbsbtest/regbook', json=data)
  21. print(r.json())
  22. return r.json()
  23. def _parse_result(r):
  24. if r['status'] == '000':
  25. r = r['result']
  26. if r:
  27. del r['confidence']
  28. return {k: v['text'] if isinstance(v, dict) else v for k, v in r.items()}
  29. elif r['status'] == '101':
  30. return "101"
  31. if __name__ == '__main__':
  32. # 0
  33. # img_paths = chain(*[Path(root / imgs_path).rglob(f'*.{ext}') for ext in ['jpeg', 'jpg', 'png', 'JPG', 'PNG']])
  34. img_paths = chain(*[Path(imgs_path).rglob(f'*.{ext}') for ext in ['jpg']])
  35. for img_path in img_paths:
  36. print(img_path)
  37. r = send_request(img_path)
  38. res = _parse_result(r)
  39. print(res)
  40. img_path: Path = img_path
  41. d = img_path.parent
  42. fn = f'{img_path.stem}.json'
  43. with (d / fn).open('w', encoding='utf-8') as f:
  44. json.dump(res, f, ensure_ascii=False, indent=4)