convert_json.py 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. '''
  2. Author: zeke-chin zeke-chin@icloud.com
  3. Date: 2022-10-11 16:38:18
  4. LastEditors: zeke-chin zeke-chin@icloud.com
  5. LastEditTime: 2022-10-12 17:43:29
  6. FilePath: /test_script/HR_OCR/tools/convert_json.py
  7. Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
  8. '''
  9. from pathlib import Path
  10. import requests
  11. import json
  12. import base64
  13. from itertools import chain
  14. url = 'http://192.168.199.27:18060'
  15. imgs_path = '/Users/zeke/work/sx/OCR/image_data/营业执照90'
  16. def send_request(img_path):
  17. with open(img_path, 'rb') as f:
  18. img_str: str = base64.encodebytes(f.read()).decode('utf-8')
  19. data = {
  20. 'image': img_str
  21. }
  22. # idc_header = {
  23. # 'Content-Type': 'application/json',
  24. # 'Authorization': 'Bearer 4e00c444-620b-4d3c-85f4-777e64276f0e'
  25. # }
  26. # r = requests.post(f'{url}/ocr_system/business_license', json=data, headers=idc_header)
  27. r = requests.post(f'{url}/ocr_system/business_license', json=data)
  28. print(r.json())
  29. return r.json()
  30. def _parse_result(r):
  31. if r['status'] == '000':
  32. r = r['result']
  33. if r:
  34. del r['confidence']
  35. return {k: v['text'] if isinstance(v, dict) else v for k, v in r.items()}
  36. elif r['status'] == '101':
  37. return "101"
  38. if __name__ == '__main__':
  39. # 0
  40. # img_paths = chain(*[Path(root / imgs_path).rglob(f'*.{ext}') for ext in ['jpeg', 'jpg', 'png', 'JPG', 'PNG']])
  41. img_paths = chain(*[Path(imgs_path).rglob(f'*.{ext}') for ext in ['jpg']])
  42. for img_path in img_paths:
  43. print(img_path)
  44. r = send_request(img_path)
  45. res = _parse_result(r)
  46. print(res)
  47. img_path: Path = img_path
  48. d = img_path.parent
  49. fn = f'{img_path.stem}.json'
  50. with (d / fn).open('w', encoding='utf-8') as f:
  51. json.dump(res, f, ensure_ascii=False, indent=4)