Bladeren bron

整理目录

yan chuanli 2 jaren geleden
bovenliggende
commit
935255ab04
46 gewijzigde bestanden met toevoegingen van 435 en 662 verwijderingen
  1. 42 0
      HR_OCR/img2/10-14_CETimg.md
  2. 49 0
      HR_OCR/img2/10-14_REimg.md
  3. BIN
      HR_OCR/img2/img/样本-000002.jpg
  4. 23 0
      HR_OCR/img2/img/样本-000002.json
  5. BIN
      HR_OCR/img2/img/样本-0010002.jpg
  6. 23 0
      HR_OCR/img2/img/样本-0010002.json
  7. BIN
      HR_OCR/img2/img/样本-001002.jpg
  8. 23 0
      HR_OCR/img2/img/样本-001002.json
  9. BIN
      HR_OCR/img2/img/样本-0011002.jpg
  10. 23 0
      HR_OCR/img2/img/样本-0011002.json
  11. BIN
      HR_OCR/img2/img/样本-0012002.jpg
  12. 23 0
      HR_OCR/img2/img/样本-0012002.json
  13. BIN
      HR_OCR/img2/img/样本-0013002.jpg
  14. 23 0
      HR_OCR/img2/img/样本-0013002.json
  15. BIN
      HR_OCR/img2/img/样本-0014002.jpg
  16. 23 0
      HR_OCR/img2/img/样本-0014002.json
  17. BIN
      HR_OCR/img2/img/样本-002002.jpg
  18. 1 0
      HR_OCR/img2/img/样本-002002.json
  19. BIN
      HR_OCR/img2/img/样本-003002.jpg
  20. 23 0
      HR_OCR/img2/img/样本-003002.json
  21. BIN
      HR_OCR/img2/img/样本-004002.jpg
  22. 23 0
      HR_OCR/img2/img/样本-004002.json
  23. BIN
      HR_OCR/img2/img/样本-005002.jpg
  24. 23 0
      HR_OCR/img2/img/样本-005002.json
  25. BIN
      HR_OCR/img2/img/样本-006002.jpg
  26. 23 0
      HR_OCR/img2/img/样本-006002.json
  27. BIN
      HR_OCR/img2/img/样本-007002.jpg
  28. 23 0
      HR_OCR/img2/img/样本-007002.json
  29. BIN
      HR_OCR/img2/img/样本-008002.jpg
  30. 23 0
      HR_OCR/img2/img/样本-008002.json
  31. BIN
      HR_OCR/img2/img/样本-009002.jpg
  32. 23 0
      HR_OCR/img2/img/样本-009002.json
  33. 5 2
      HR_OCR/test_script/to_md/new.py
  34. 4 2
      HR_OCR/test_script/to_md/ocr_config.py
  35. 12 8
      HR_OCR/test_script/to_md/use.py
  36. 0 46
      HR_OCR/to_md/README.md
  37. BIN
      HR_OCR/to_md/example/img/1_img.jpg
  38. 0 9
      HR_OCR/to_md/example/img/1_img.json
  39. BIN
      HR_OCR/to_md/example/img/2_img.jpg
  40. 0 9
      HR_OCR/to_md/example/img/2_img.json
  41. 0 273
      HR_OCR/to_md/new.py
  42. 0 114
      HR_OCR/to_md/ocr_config.py
  43. 0 53
      HR_OCR/to_md/use.py
  44. 0 34
      HR_OCR/tools/README.md
  45. 0 76
      HR_OCR/tools/convert_json.py
  46. 0 36
      HR_OCR/tools/suffix.py

+ 42 - 0
HR_OCR/img2/10-14_CETimg.md

@@ -0,0 +1,42 @@
+
+
+
+
+# 10-14_CETimg测试报告
+
+
+0.00%
+|字段|正确率|
+| :---: | :---: |
+|orientation|0.00%|
+|name|0.00%|
+|id|0.00%|
+|language|0.00%|
+|level|0.00%|
+|exam_time|0.00%|
+|score|0.00%|
+
+# True
+
+|图片|识别结果|
+| :---: | :---: |
+
+# False
+
+|图片|识别结果|
+| :--- | :--- |
+|![](img/样本-002002.jpg)|Exception: 专四专八无法识别|
+|![](img/样本-0014002.jpg)|Exception: 识别不到anchorTEM|
+|![](img/样本-000002.jpg)|Exception: 识别不到anchorTEM|
+|![](img/样本-004002.jpg)|Exception: 专四专八无法识别|
+|![](img/样本-008002.jpg)|Exception: 专四专八无法识别|
+|![](img/样本-0010002.jpg)|Exception: 专四专八无法识别|
+|![](img/样本-006002.jpg)|Exception: 识别不到anchorTEM|
+|![](img/样本-0012002.jpg)|Exception: 识别不到anchorTEM|
+|![](img/样本-003002.jpg)|Exception: 专四专八无法识别|
+|![](img/样本-001002.jpg)|Exception: 专四专八无法识别|
+|![](img/样本-0011002.jpg)|Exception: 专四专八无法识别|
+|![](img/样本-005002.jpg)|Exception: 专四专八无法识别|
+|![](img/样本-009002.jpg)|Exception: 识别不到anchorTEM|
+|![](img/样本-0013002.jpg)|Exception: 识别不到anchorTEM|
+|![](img/样本-007002.jpg)|Exception: 专四专八无法识别|

+ 49 - 0
HR_OCR/img2/10-14_REimg.md

@@ -0,0 +1,49 @@
+
+
+
+
+# 10-14_REimg测试报告
+
+
+78.10%
+|字段|正确率|
+| :---: | :---: |
+|orientation|86.67%|
+|name|73.33%|
+|id|80.00%|
+|gender|80.00%|
+|birthplace|66.67%|
+|birthplace_province|80.00%|
+|birthplace_city|73.33%|
+|birthplace_region|60.00%|
+|native_place|73.33%|
+|native_place_province|86.67%|
+|native_place_city|80.00%|
+|native_place_region|80.00%|
+|blood_type|86.67%|
+|religion|86.67%|
+
+# True
+
+|图片|识别结果|
+| :---: | :---: |
+|![](img/样本-008002.jpg)|{'img_type': '0', 'orientation': 0, 'type': '', 'address': '', 'address_province': '', 'address_city': '', 'address_region': '', 'address_detail': '', 'name': '郝良君', 'id': '152122199409064819', 'gender': '男', 'birthplace': '内蒙古自治区呼伦贝尔市阿荣旗', 'birthplace_province': '内蒙古自治区', 'birthplace_city': '呼伦贝尔市', 'birthplace_region': '阿荣旗', 'native_place': '山东省菏泽市巨野县', 'native_place_province': '山东省', 'native_place_city': '菏泽市', 'native_place_region': '巨野县', 'blood_type': '不明', 'religion': '无宗教信仰'}|
+|![](img/样本-0010002.jpg)|{'img_type': '0', 'orientation': 0, 'type': '', 'address': '', 'address_province': '', 'address_city': '', 'address_region': '', 'address_detail': '', 'name': '张颖', 'id': '152122200012102421', 'gender': '女', 'birthplace': '内蒙古自治区呼伦贝尔市阿荣旗', 'birthplace_province': '内蒙古自治区', 'birthplace_city': '呼伦贝尔市', 'birthplace_region': '阿荣旗', 'native_place': '吉林省长春市德惠市', 'native_place_province': '吉林省', 'native_place_city': '长春市', 'native_place_region': '德惠市', 'blood_type': '不明', 'religion': '无宗教信仰'}|
+|![](img/样本-006002.jpg)|{'img_type': '0', 'orientation': 0, 'type': '', 'address': '', 'address_province': '', 'address_city': '', 'address_region': '', 'address_detail': '', 'name': '王凯', 'id': '320826199502090214', 'gender': '男', 'birthplace': '江苏省淮安市涟水县', 'birthplace_province': '江苏省', 'birthplace_city': '淮安市', 'birthplace_region': '涟水县', 'native_place': '江苏省淮安市涟水县', 'native_place_province': '江苏省', 'native_place_city': '淮安市', 'native_place_region': '涟水县', 'blood_type': '', 'religion': '无宗教信仰'}|
+|![](img/样本-003002.jpg)|{'img_type': '0', 'orientation': 0, 'type': '', 'address': '', 'address_province': '', 'address_city': '', 'address_region': '', 'address_detail': '', 'name': '刘书玉', 'id': '210623199804014764', 'gender': '女', 'birthplace': '辽宁省丹东市东港市', 'birthplace_province': '辽宁省', 'birthplace_city': '丹东市', 'birthplace_region': '东港市', 'native_place': '辽宁省丹东市东港市', 'native_place_province': '辽宁省', 'native_place_city': '丹东市', 'native_place_region': '东港市', 'blood_type': '不明', 'religion': '无宗教信仰'}|
+|![](img/样本-001002.jpg)|{'img_type': '0', 'orientation': 0, 'type': '', 'address': '', 'address_province': '', 'address_city': '', 'address_region': '', 'address_detail': '', 'name': '王文强', 'id': '152223199706015513', 'gender': '男', 'birthplace': '内蒙古自治区兴安盟扎赉特旗', 'birthplace_province': '内蒙古自治区', 'birthplace_city': '兴安盟', 'birthplace_region': '扎赉特旗', 'native_place': '内蒙古自治区兴安盟扎赉特旗', 'native_place_province': '内蒙古自治区', 'native_place_city': '兴安盟', 'native_place_region': '扎赉特旗', 'blood_type': '不明', 'religion': '无宗教信仰'}|
+|![](img/样本-005002.jpg)|{'img_type': '0', 'orientation': 0, 'type': '', 'address': '', 'address_province': '', 'address_city': '', 'address_region': '', 'address_detail': '', 'name': '王敏', 'id': '15222419990401602X', 'gender': '女', 'birthplace': '内蒙古自治区兴安盟突泉县', 'birthplace_province': '内蒙古自治区', 'birthplace_city': '兴安盟', 'birthplace_region': '突泉县', 'native_place': '内蒙古自治区兴安盟突泉县', 'native_place_province': '内蒙古自治区', 'native_place_city': '兴安盟', 'native_place_region': '突泉县', 'blood_type': '', 'religion': '无宗教信仰'}|
+|![](img/样本-007002.jpg)|{'img_type': '0', 'orientation': 0, 'type': '', 'address': '', 'address_province': '', 'address_city': '', 'address_region': '', 'address_detail': '', 'name': '刘昕', 'id': '152101200003040634', 'gender': '男', 'birthplace': '内蒙古自治区呼伦贝尔市海拉尔区', 'birthplace_province': '内蒙古自治区', 'birthplace_city': '呼伦贝尔市', 'birthplace_region': '海拉尔区', 'native_place': '吉林省四平市', 'native_place_province': '吉林省', 'native_place_city': '四平市', 'native_place_region': '', 'blood_type': '不明', 'religion': '无宗教信仰'}|
+
+# False
+
+|图片|识别结果|
+| :--- | :--- |
+|![](img/样本-002002.jpg)|Exception: 识别不到anchor常住|
+|![](img/样本-0014002.jpg)|-------birthplace-------<br>正确:河南省新乡市封丘县<br>返回:用名杨乐<br>-------birthplace_province-------<br>正确:河南省<br>返回:<br>-------birthplace_city-------<br>正确:新乡市<br>返回:<br>-------birthplace_region-------<br>正确:封丘县<br>返回:<br>-------native_place-------<br>正确:河南省新乡市封丘县<br>返回:河南省新乡市封丘县河南省封丘县<br>|
+|![](img/样本-000002.jpg)|-------birthplace-------<br>正确::黑龙江省大庆市杜尔伯特蒙古族自治县<br>返回:黑龙江省哈尔滨市市辖区<br>-------birthplace_city-------<br>正确:大庆市<br>返回:哈尔滨市<br>-------birthplace_region-------<br>正确:杜尔伯特蒙古族自治县<br>返回:市辖区<br>-------native_place-------<br>正确:黑龙江省大庆市杜尔伯特蒙古族自治县<br>返回:黑龙江省哈尔滨市市辖区<br>-------native_place_city-------<br>正确:大庆市<br>返回:哈尔滨市<br>-------native_place_region-------<br>正确:杜尔伯特蒙古族自治县<br>返回:市辖区<br>|
+|![](img/样本-004002.jpg)|-------birthplace-------<br>正确:内蒙古自治区兴安盟乌兰浩特市<br>返回:内蒙古自治区兴安盟科尔沁右翼前旗<br>-------birthplace_region-------<br>正确:乌兰浩特市<br>返回:科尔沁右翼前旗<br>|
+|![](img/样本-0012002.jpg)|-------name-------<br>正确:阚宏鹏<br>返回:闽宏鹏<br>-------birthplace_region-------<br>正确:额尔古纳市<br>返回:左旗<br>|
+|![](img/样本-0011002.jpg)|Exception: 没有找到身份证号|
+|![](img/样本-009002.jpg)|-------name-------<br>正确:周东旭<br>返回:内蒙古莫力达瓦自治旗<br>|
+|![](img/样本-0013002.jpg)|-------id-------<br>正确:370125199912104257<br>返回:37012519991210425<br>-------gender-------<br>正确:男<br>返回:<br>|

BIN
HR_OCR/img2/img/样本-000002.jpg


+ 23 - 0
HR_OCR/img2/img/样本-000002.json

@@ -0,0 +1,23 @@
+{
+    "img_type": "0",
+    "orientation": 0,
+    "type": "",
+    "address": "",
+    "address_province": "",
+    "address_city": "",
+    "address_region": "",
+    "address_detail": "",
+    "name": "孔德鑫",
+    "id": "230624199809052655",
+    "gender": "男",
+    "birthplace": ":黑龙江省大庆市杜尔伯特蒙古族自治县",
+    "birthplace_province": "黑龙江省",
+    "birthplace_city": "大庆市",
+    "birthplace_region": "杜尔伯特蒙古族自治县",
+    "native_place": "黑龙江省大庆市杜尔伯特蒙古族自治县",
+    "native_place_province": "黑龙江省",
+    "native_place_city": "大庆市",
+    "native_place_region": "杜尔伯特蒙古族自治县",
+    "blood_type": "",
+    "religion": ""
+}

BIN
HR_OCR/img2/img/样本-0010002.jpg


+ 23 - 0
HR_OCR/img2/img/样本-0010002.json

@@ -0,0 +1,23 @@
+{
+    "img_type": "0",
+    "orientation": 0,
+    "type": "",
+    "address": "",
+    "address_province": "",
+    "address_city": "",
+    "address_region": "",
+    "address_detail": "",
+    "name": "张颖",
+    "id": "152122200012102421",
+    "gender": "女",
+    "birthplace": "内蒙古自治区呼伦贝尔市阿荣旗",
+    "birthplace_province": "内蒙古自治区",
+    "birthplace_city": "呼伦贝尔市",
+    "birthplace_region": "阿荣旗",
+    "native_place": "吉林省长春市德惠市",
+    "native_place_province": "吉林省",
+    "native_place_city": "长春市",
+    "native_place_region": "德惠市",
+    "blood_type": "不明",
+    "religion": "无宗教信仰"
+}

BIN
HR_OCR/img2/img/样本-001002.jpg


+ 23 - 0
HR_OCR/img2/img/样本-001002.json

@@ -0,0 +1,23 @@
+{
+    "img_type": "0",
+    "orientation": 0,
+    "type": "",
+    "address": "",
+    "address_province": "",
+    "address_city": "",
+    "address_region": "",
+    "address_detail": "",
+    "name": "王文强",
+    "id": "152223199706015513",
+    "gender": "男",
+    "birthplace": "内蒙古自治区兴安盟扎赉特旗",
+    "birthplace_province": "内蒙古自治区",
+    "birthplace_city": "兴安盟",
+    "birthplace_region": "扎赉特旗",
+    "native_place": "内蒙古自治区兴安盟扎赉特旗",
+    "native_place_province": "内蒙古自治区",
+    "native_place_city": "兴安盟",
+    "native_place_region": "扎赉特旗",
+    "blood_type": "不明",
+    "religion": "无宗教信仰"
+}

BIN
HR_OCR/img2/img/样本-0011002.jpg


+ 23 - 0
HR_OCR/img2/img/样本-0011002.json

@@ -0,0 +1,23 @@
+{
+    "img_type": "0",
+    "orientation": 0,
+    "type": "",
+    "address": "",
+    "address_province": "",
+    "address_city": "",
+    "address_region": "",
+    "address_detail": "",
+    "name": "常龙江",
+    "id": "152201200004234014",
+    "gender": "女",
+    "birthplace": "内蒙古自治区兴安盟乌兰浩特市",
+    "birthplace_province": "内蒙古自治区",
+    "birthplace_city": "兴安盟",
+    "birthplace_region": "乌兰浩特市",
+    "native_place": "内蒙古自治区兴安盟乌兰浩特市",
+    "native_place_province": "内蒙古自治区",
+    "native_place_city": "兴安盟",
+    "native_place_region": "乌兰浩特市",
+    "blood_type": "不明",
+    "religion": "无宗教信仰"
+}

BIN
HR_OCR/img2/img/样本-0012002.jpg


+ 23 - 0
HR_OCR/img2/img/样本-0012002.json

@@ -0,0 +1,23 @@
+{
+    "img_type": "0",
+    "orientation": 0,
+    "type": "",
+    "address": "",
+    "address_province": "",
+    "address_city": "",
+    "address_region": "",
+    "address_detail": "",
+    "name": "阚宏鹏",
+    "id": "152122199205140314",
+    "gender": "男",
+    "birthplace": "内蒙古自治区呼伦贝尔市额尔古纳市左旗",
+    "birthplace_province": "内蒙古自治区",
+    "birthplace_city": "呼伦贝尔市",
+    "birthplace_region": "额尔古纳市",
+    "native_place": "黑龙江省齐齐哈尔市泰来县",
+    "native_place_province": "黑龙江省",
+    "native_place_city": "齐齐哈尔市",
+    "native_place_region": "泰来县",
+    "blood_type": "不明",
+    "religion": "无宗教信仰"
+}

BIN
HR_OCR/img2/img/样本-0013002.jpg


+ 23 - 0
HR_OCR/img2/img/样本-0013002.json

@@ -0,0 +1,23 @@
+{
+    "img_type": "0",
+    "orientation": 0,
+    "type": "",
+    "address": "",
+    "address_province": "",
+    "address_city": "",
+    "address_region": "",
+    "address_detail": "",
+    "name": "孙富豪",
+    "id": "370125199912104257",
+    "gender": "男",
+    "birthplace": "山东省济南市济阳县",
+    "birthplace_province": "山东省",
+    "birthplace_city": "济南市",
+    "birthplace_region": "济阳县",
+    "native_place": "山东省济南市济阳县",
+    "native_place_province": "山东省",
+    "native_place_city": "济南市",
+    "native_place_region": "济阳县",
+    "blood_type": "",
+    "religion": ""
+}

BIN
HR_OCR/img2/img/样本-0014002.jpg


+ 23 - 0
HR_OCR/img2/img/样本-0014002.json

@@ -0,0 +1,23 @@
+{
+    "img_type": "0",
+    "orientation": 0,
+    "type": "",
+    "address": "",
+    "address_province": "",
+    "address_city": "",
+    "address_region": "",
+    "address_detail": "",
+    "name": "杨杭升",
+    "id": "410727199205222316",
+    "gender": "男",
+    "birthplace": "河南省新乡市封丘县",
+    "birthplace_province": "河南省",
+    "birthplace_city": "新乡市",
+    "birthplace_region": "封丘县",
+    "native_place": "河南省新乡市封丘县",
+    "native_place_province": "河南省",
+    "native_place_city": "新乡市",
+    "native_place_region": "封丘县",
+    "blood_type": "",
+    "religion": "无宗教信仰"
+}

BIN
HR_OCR/img2/img/样本-002002.jpg


+ 1 - 0
HR_OCR/img2/img/样本-002002.json

@@ -0,0 +1 @@
+"101"

BIN
HR_OCR/img2/img/样本-003002.jpg


+ 23 - 0
HR_OCR/img2/img/样本-003002.json

@@ -0,0 +1,23 @@
+{
+    "img_type": "0",
+    "orientation": 0,
+    "type": "",
+    "address": "",
+    "address_province": "",
+    "address_city": "",
+    "address_region": "",
+    "address_detail": "",
+    "name": "刘书玉",
+    "id": "210623199804014764",
+    "gender": "女",
+    "birthplace": "辽宁省丹东市东港市",
+    "birthplace_province": "辽宁省",
+    "birthplace_city": "丹东市",
+    "birthplace_region": "东港市",
+    "native_place": "辽宁省丹东市东港市",
+    "native_place_province": "辽宁省",
+    "native_place_city": "丹东市",
+    "native_place_region": "东港市",
+    "blood_type": "不明",
+    "religion": "无宗教信仰"
+}

BIN
HR_OCR/img2/img/样本-004002.jpg


+ 23 - 0
HR_OCR/img2/img/样本-004002.json

@@ -0,0 +1,23 @@
+{
+    "img_type": "0",
+    "orientation": 0,
+    "type": "",
+    "address": "",
+    "address_province": "",
+    "address_city": "",
+    "address_region": "",
+    "address_detail": "",
+    "name": "张佳宝",
+    "id": "152201199906262529",
+    "gender": "女",
+    "birthplace": "内蒙古自治区兴安盟乌兰浩特市",
+    "birthplace_province": "内蒙古自治区",
+    "birthplace_city": "兴安盟",
+    "birthplace_region": "乌兰浩特市",
+    "native_place": "内蒙古自治区兴安盟科尔沁右翼前旗",
+    "native_place_province": "内蒙古自治区",
+    "native_place_city": "兴安盟",
+    "native_place_region": "科尔沁右翼前旗",
+    "blood_type": "不明",
+    "religion": "无宗教信仰"
+}

BIN
HR_OCR/img2/img/样本-005002.jpg


+ 23 - 0
HR_OCR/img2/img/样本-005002.json

@@ -0,0 +1,23 @@
+{
+    "img_type": "0",
+    "orientation": 0,
+    "type": "",
+    "address": "",
+    "address_province": "",
+    "address_city": "",
+    "address_region": "",
+    "address_detail": "",
+    "name": "王敏",
+    "id": "15222419990401602X",
+    "gender": "女",
+    "birthplace": "内蒙古自治区兴安盟突泉县",
+    "birthplace_province": "内蒙古自治区",
+    "birthplace_city": "兴安盟",
+    "birthplace_region": "突泉县",
+    "native_place": "内蒙古自治区兴安盟突泉县",
+    "native_place_province": "内蒙古自治区",
+    "native_place_city": "兴安盟",
+    "native_place_region": "突泉县",
+    "blood_type": "",
+    "religion": "无宗教信仰"
+}

BIN
HR_OCR/img2/img/样本-006002.jpg


+ 23 - 0
HR_OCR/img2/img/样本-006002.json

@@ -0,0 +1,23 @@
+{
+    "img_type": "0",
+    "orientation": 0,
+    "type": "",
+    "address": "",
+    "address_province": "",
+    "address_city": "",
+    "address_region": "",
+    "address_detail": "",
+    "name": "王凯",
+    "id": "320826199502090214",
+    "gender": "男",
+    "birthplace": "江苏省淮安市涟水县",
+    "birthplace_province": "江苏省",
+    "birthplace_city": "淮安市",
+    "birthplace_region": "涟水县",
+    "native_place": "江苏省淮安市涟水县",
+    "native_place_province": "江苏省",
+    "native_place_city": "淮安市",
+    "native_place_region": "涟水县",
+    "blood_type": "",
+    "religion": "无宗教信仰"
+}

BIN
HR_OCR/img2/img/样本-007002.jpg


+ 23 - 0
HR_OCR/img2/img/样本-007002.json

@@ -0,0 +1,23 @@
+{
+    "img_type": "0",
+    "orientation": 0,
+    "type": "",
+    "address": "",
+    "address_province": "",
+    "address_city": "",
+    "address_region": "",
+    "address_detail": "",
+    "name": "刘昕",
+    "id": "152101200003040634",
+    "gender": "男",
+    "birthplace": "内蒙古自治区呼伦贝尔市海拉尔区",
+    "birthplace_province": "内蒙古自治区",
+    "birthplace_city": "呼伦贝尔市",
+    "birthplace_region": "海拉尔区",
+    "native_place": "吉林省四平市",
+    "native_place_province": "吉林省",
+    "native_place_city": "四平市",
+    "native_place_region": "",
+    "blood_type": "不明",
+    "religion": "无宗教信仰"
+}

BIN
HR_OCR/img2/img/样本-008002.jpg


+ 23 - 0
HR_OCR/img2/img/样本-008002.json

@@ -0,0 +1,23 @@
+{
+    "img_type": "0",
+    "orientation": 0,
+    "type": "",
+    "address": "",
+    "address_province": "",
+    "address_city": "",
+    "address_region": "",
+    "address_detail": "",
+    "name": "郝良君",
+    "id": "152122199409064819",
+    "gender": "男",
+    "birthplace": "内蒙古自治区呼伦贝尔市阿荣旗",
+    "birthplace_province": "内蒙古自治区",
+    "birthplace_city": "呼伦贝尔市",
+    "birthplace_region": "阿荣旗",
+    "native_place": "山东省菏泽市巨野县",
+    "native_place_province": "山东省",
+    "native_place_city": "菏泽市",
+    "native_place_region": "巨野县",
+    "blood_type": "不明",
+    "religion": "无宗教信仰"
+}

BIN
HR_OCR/img2/img/样本-009002.jpg


+ 23 - 0
HR_OCR/img2/img/样本-009002.json

@@ -0,0 +1,23 @@
+{
+    "img_type": "0",
+    "orientation": 0,
+    "type": "",
+    "address": "",
+    "address_province": "",
+    "address_city": "",
+    "address_region": "",
+    "address_detail": "",
+    "name": "周东旭",
+    "id": "152123199910317230",
+    "gender": "男",
+    "birthplace": "内蒙古自治区呼伦贝尔市莫力达瓦达斡尔族自治旗",
+    "birthplace_province": "内蒙古自治区",
+    "birthplace_city": "呼伦贝尔市",
+    "birthplace_region": "莫力达瓦达斡尔族自治旗",
+    "native_place": "内蒙古自治区呼伦贝尔市莫力达瓦达斡尔族自治旗",
+    "native_place_province": "内蒙古自治区",
+    "native_place_city": "呼伦贝尔市",
+    "native_place_region": "莫力达瓦达斡尔族自治旗",
+    "blood_type": "",
+    "religion": ""
+}

+ 5 - 2
HR_OCR/test_script/to_md/new.py

@@ -95,7 +95,7 @@ def send_request(image: Image, ocr_name, ocr_address, image_type=None):
     config = OCR_CONFIGS[ocr_name][ocr_address]
     headers = {
         'Content-Type': 'application/json',
-        'Authorization': config.token
+        'Authorization': f'Bearer {config.token}'
     }
     data = {
         'image': base64_str,
@@ -129,7 +129,10 @@ class Dataset(object):
             else:
                 self.image_list.append(Image(p, None, rotate))
 
-        self.field = Filed.get(field)
+        if ocr_name == 'regbook':
+            self.field = Filed.get(field + str(image_type))
+        else:
+            self.field = Filed.get(field)
 
         self.correct = {k: 0 for k in self.field}
         self.error = {k: 0 for k in self.field}

+ 4 - 2
HR_OCR/test_script/to_md/ocr_config.py

@@ -172,14 +172,16 @@ idcard_fieid = ['orientation', 'name', 'id', 'ethnicity', 'gender', 'birthday',
 bankcard_fieid = ['orientation', 'number']
 schoolcert_fieid = ['orientation', 'name', 'gender', 'admission_time', 'education_time', 'education_level', 'education_type', 'learning_type', 'school', 'major', 'number']
 cet_field = ['orientation', 'name', 'id', 'language', 'level', 'exam_time', 'score']
-regbook_field = ['orientation', 'name', 'id', 'gender', 'birthplace', 'birthplace_province', 'birthplace_city',
+regbook_0_field = ['orientation', 'name', 'id', 'gender', 'birthplace', 'birthplace_province', 'birthplace_city',
                  'birthplace_region', 'native_place', 'native_place_province', 'native_place_city',
                  'native_place_region', 'blood_type', 'religion']
+regbook_1_field = ['orientation', 'type', 'address', 'address_province', 'address_city', 'address_region', 'address_detail']
 business_license = ['orientation', 'social_code', 'company_name', 'legal_person', 'registered_capital', 'type',
                     'start_date', 'business_scope', 'expire_date', 'address', 'stamp']
 Filed = {
     'cet': cet_field,
-    'regbook': regbook_field,
+    'regbook0': regbook_0_field,
+    'regbook1': regbook_1_field,
     'business_license': business_license,
     'schoolcert': schoolcert_fieid,
     'idcard': idcard_fieid,

+ 12 - 8
HR_OCR/test_script/to_md/use.py

@@ -2,35 +2,39 @@
 Author: zeke-chin zeke-chin@icloud.com
 Date: 2022-09-28 20:28:41
 LastEditors: zeke-chin zeke-chin@icloud.com
-LastEditTime: 2022-09-30 15:08:48
-FilePath: /utils/HR_OCR/utils/use.py
+LastEditTime: 2022-10-12 21:15:26
+FilePath: /to_md/HR_OCR/to_md/use.py
 Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
 '''
 
 from pathlib import Path
+from tkinter import image_types
 
 from new import MD, Image, Dataset, parser_path
 
 # config
 # 图片路径
-image_path = Path('/Users/zeke/work/sx/OCR/image_data/户口本9.30/0/img/')
+
+image_path = Path('/Users/sxkj/test_script/HR_OCR/img2/img')
 image_type = 0
 # 是否旋转
 image_rotate = False
-ocr_address = 'DXtest'  # 'local' 'TXtest' 'TXsb' 'DXtest' 'DXsb'
+ocr_address = 'local'  # 'local' 'TXtest' 'TXsb' 'DXtest' 'DXsb'
+
+ocr_name = 'regbook'  # 'cet' 'idcard' 'bankcard' 'regbook' 'schoolcert' 'business_license'
+md_name = 'RE'
 
-ocr_name = 'cet'  # 'cet' 'idcard' 'bankcard' 'regbook' 'schoolcert' 'business_license'
-md_name = 'CET'
-filed = 'cet'
+filed = 'regbook' # 'cet' 'idcard' 'bankcard' 'regbook_0' 'regbook_1' 'schoolcert' 'business_license'
 
 # 若md_path为None 则默认使用图片父路径为markdown保存路径
-# md_path = '/Users/zeke/work/sx/OCR/HROCR/utils/example' or image_path.parent
+# md_path = '/Users/zeke/work/sx/OCR/HROCR/to_md/example' or image_path.parent
 md_path = None or image_path.parent
 
 md_file = parser_path(Path(md_path) / Path(md_name + image_path.stem), image_rotate)
 
 
 if __name__ == '__main__':
+
     markdown = MD(md_file)
 
     dataset = Dataset(image_path, image_type, ocr_name, ocr_address, filed, image_rotate)

+ 0 - 46
HR_OCR/to_md/README.md

@@ -1,46 +0,0 @@
-# 人力OCR
-
-## 生成markdown测试报告脚本
-
-1. 生成**目的文件夹**下图片的**json文件**
-
-- **目的文件夹**: 
-  - 存放所需测试**图片文件夹**
-  - 对**文件夹**内图片进行标准化
-    - 运行`suffix.py`脚本
-    - 产生*.jpg
-  - 生成<u>算法推理json文件</u>
-    - 运行`convert_json.py`脚本
-    - 产生对应jpg 文件的json文件
-  - 修改<u>算法推理json文件</u>成**正确的json文件**
-
-2. 跑生成md报告脚本
-
-- 修改use.py 并运行
-
-  ```python
-  # config
-  
-  # 目的文件夹
-  image_path = Path('/Users/zeke/work/sx/OCR/image_data/户口本9.30/0/img/')
-  # 图片type(如果接口不存在传0不影响结果)
-  image_type = 0
-  # 是否旋转
-  image_rotate = False
-  
-  # ocr地址选择
-  # 本地环境、腾讯云测试环境、腾讯云生产环境、电信云测试环境、电信云生产环境
-  ocr_address = 'local'  # 'local' 'TXtest' 'TXsb' 'DXtest' 'DXsb'
-  # ocr能力选择
-  ocr_name = 'regbook'  # 'cet' 'idcard' 'bankcard' 'regbook' 'schoolcert' 'business_license'
-  # 生产MD文件名
-  md_name = 'RegBook'
-  # ocr能力对应字段()
-  filed = 'regbook'
-  
-  # 若md_path为None 则默认使用图片父路径为markdown保存路径
-  # md_path = '/Users/zeke/work/sx/OCR/HROCR/to_md/example' or image_path.parent
-  md_path = None or image_path.parent
-  ```
-
-  

BIN
HR_OCR/to_md/example/img/1_img.jpg


+ 0 - 9
HR_OCR/to_md/example/img/1_img.json

@@ -1,9 +0,0 @@
-{
-    "orientation": 0,
-    "name": "鉴康",
-    "id": "152801200003178527",
-    "language": "英语",
-    "level": "CET4",
-    "exam_time": "2021年6月",
-    "score": "451"
-}

BIN
HR_OCR/to_md/example/img/2_img.jpg


+ 0 - 9
HR_OCR/to_md/example/img/2_img.json

@@ -1,9 +0,0 @@
-{
-    "orientation": 0,
-    "name": "张鑫",
-    "id": "140227199809282317",
-    "language": "英语",
-    "level": "CET4",
-    "exam_time": "2021年6月",
-    "score": "445"
-}

+ 0 - 273
HR_OCR/to_md/new.py

@@ -1,273 +0,0 @@
-from pathlib import Path
-from typing import List, Optional
-import cv2
-import requests
-from mdutils.mdutils import MdUtils
-from dataclasses import dataclass
-import json
-import time
-import base64
-from itertools import chain
-from tqdm import tqdm
-from ocr_config import OCR_CONFIGS, Filed
-
-
-class Image:
-    def __init__(self, path: Path, rotate, is_rotate):
-        self._path = path
-        self.rotate = rotate
-        self._ocr_result = None
-        self.category = True
-        self.is_rotate = is_rotate
-        try:
-            self.gt_result = self.get_json()
-        except Exception as e:
-            print(self.json_path)
-            raise e
-
-    def __repr__(self):
-        return f'path: {self.path}, rotate: {self.rotate}, gt_result: {self.gt_result}, cate: {self.category}'
-
-    # 将方法转换为相同名称的只读属性
-    @property
-    def path(self):
-        return self._path
-
-    @path.setter
-    def path(self, path):
-        self._path = path
-
-    @property
-    def fn(self):
-        return self._path.stem
-
-    @property
-    def ocr_result(self):
-        return self._ocr_result
-
-    @ocr_result.setter
-    def ocr_result(self, value):
-        self._ocr_result = value
-
-    def get_gt_result(self, key):# sourcery skip: merge-duplicate-blocks, remove-redundant-if
-        if key == 'orientation':
-            if self.is_rotate:
-                return self.rotate + 1 if self.rotate is not None else 0
-            else:
-                return self.gt_result[key]
-        elif key in self.gt_result:
-            return self.gt_result[key]
-        else:
-            return None
-
-    @property
-    def json_path(self):
-        return self.path.parent / f'{self.path.stem}.json'
-
-    def save_image(self, img, rotate):
-        dst = self.path.parent.parent / (".ro_dst")
-        if not dst.exists(): dst.mkdir()
-        self.path = dst / f'{self.path.stem}-{rotate + 1}.jpg'
-        # print('save image', self.path)
-        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
-        cv2.imwrite(str(self.path), img)
-        return self.path
-
-    def get_base64(self, rotate=None):
-        # print(self.path)
-        img = cv2.imread(str(self.path))
-        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
-        path = self.path
-        if rotate is not None:
-            img = cv2.rotate(img, rotate)
-            path = self.save_image(img, rotate)
-            # imencode 将图片编码到缓存,并保存到本地
-        with open(path, 'rb') as f:
-            return base64.encodebytes(f.read()).decode('utf-8')
-
-    def get_json(self):
-        with open(self.json_path, 'r') as f:
-            return json.load(f)
-
-
-def send_request(image: Image, ocr_name, ocr_address, image_type=None):
-    base64_str = image.get_base64(image.rotate)
-    config = OCR_CONFIGS[ocr_name][ocr_address]
-    headers = {
-        'Content-Type': 'application/json',
-        'Authorization': f'Bearer {config.token}'
-    }
-    data = {
-        'image': base64_str,
-    }
-    if image_type is not None:
-        data['image_type'] = image_type
-    response = requests.post(config.url, headers=headers, json=data)
-    return response.json()
-
-
-def parser_path(path: Path, rotate: bool):
-    name = time.strftime("%m-%d_", time.localtime()) + path.name
-    if rotate:
-        name = f'{name}_R.md'
-    return path.parent / name
-
-
-class Dataset(object):
-    def __init__(self, images_path, image_type, ocr_name, ocr_address, field, rotate=False):
-        self.image_type = image_type
-        self.ocr_name = ocr_name
-        self.ocr_address = ocr_address
-        self.images_path = images_path
-        self.image_list = []
-        # chain 迭代器,首先返回第一个可迭代对象中所有元素,接着返回下一个可迭代对象中所有元素,直到耗尽所有可迭代对象中的元素
-        # eg:chain('ABC', 'DEF') --> A B C D E F
-
-        for p in chain(*[Path(self.images_path).rglob('*.jpg')]):
-            if rotate:
-                self.image_list.extend(Image(p, r, rotate) for r in [None, 0, 1, 2])
-            else:
-                self.image_list.append(Image(p, None, rotate))
-
-        if ocr_name == 'regbook':
-            self.field = Filed.get(field + str(image_type))
-        else:
-            self.field = Filed.get(field)
-
-        self.correct = {k: 0 for k in self.field}
-        self.error = {k: 0 for k in self.field}
-
-    def __len__(self):
-        return len(self.image_list)
-
-    def _evaluate_one(self, image: Image):
-        def _get_predict(r, key):
-            # isinstance() 函数来判断一个对象是否是一个已知的类型
-            if isinstance(r[key], dict):
-                return r[key]['text']
-            else:
-                return r[key]
-
-        if image.rotate is not None: image.gt_result['orientation'] = image.rotate + 1
-        r = send_request(image, self.ocr_name, self.ocr_address, self.image_type)
-        err_str = ''
-        if r['status'] == '000':
-            res = r['result']
-            for key in self.field:
-                # print('attr: ', key)
-                if key in res:
-                    gt = image.get_gt_result(key)
-                    predict = _get_predict(res, key)
-                    # print(f'gt: {gt}, predict: {predict}')
-                    if predict == gt:
-                        self.correct[key] += 1
-                    else:
-                        image.category = False
-                        self.error[key] += 1
-                        err_str += f'-------{key}-------<br>正确:{gt}<br>返回:{predict}<br>'
-            if image.category:
-                image.ocr_result = image.gt_result
-            else:
-                image.ocr_result = err_str
-        else:
-            image.ocr_result = r['msg']
-            image.category = False
-            for key in self.field:
-                self.error[key] += 1
-
-    def __call__(self):  # sourcery skip: yield-from
-        # yield 返回一个生成器
-        for image in self.image_list:
-            yield image
-
-    # 比较
-    def evaluate(self):
-        for image in tqdm(self.image_list):
-            self._evaluate_one(image)
-
-    # 计算总体准确度
-    @property
-    def accuracy(self):
-        return sum(list(self.correct.values())) / sum(list(self.correct.values()) + list(self.error.values()))
-
-    # 计算元素准确度
-    @property
-    def attrs_accuracy(self):
-        return {k: self.correct[k] / (self.correct[k] + self.error[k]) for k in self.field}
-
-
-class MD(object):
-    def __init__(self, file_path: Path):
-        self.name = file_path.name
-        self.f = MdUtils(file_name=str(file_path))
-        self.field_table: List = ['字段', '正确率']
-        self.true_table: List = ['图片', '识别结果']
-        self.false_table: List = ['图片', '识别结果']
-        self.write_header(f'{self.name}测试报告')
-
-    def write_header(self, title, level=1):
-        self.f.new_header(level=level, title=title)
-
-    def write_total_accuracy(self, ds: Dataset):
-        def get_format_total_accuracy(ds: Dataset):
-            acc = ds.accuracy * 100
-            return "{:.2f}%".format(acc)
-
-        # 1. 拿到format之后的百分数
-        res = get_format_total_accuracy(ds)
-
-        # 2. 写入
-        self.f.new_paragraph(res)
-
-    def write_table_accuracy(self, ds: Dataset, columns=2, text_align='center'):
-        def format_table_accuracy(ds: Dataset):
-            table = ds.attrs_accuracy
-            for k, v in table.items():
-                acc = v * 100
-                table[k] = "{:.2f}%".format(acc)
-            return table
-
-        def dict_2_list(dic: dict):
-            l = []
-            for k, v in dic.items():
-                l.extend((k, v))
-            return l
-
-        table_dict = format_table_accuracy(ds)
-        table_list = dict_2_list(table_dict)
-        self.field_table.extend(table_list)
-
-        rows = len(self.field_table) // columns
-        self.f.new_table(columns=columns, rows=rows, text=self.field_table, text_align=text_align)
-
-    def write_table_result(self, ds: Dataset, columns=2, text_align='center'):
-        for image in ds.image_list:
-            md_image = self.f.new_inline_image(text='', path=f'{image.path.parent.name}/{image.path.name}')
-            if image.category:
-                self.true_table.extend([md_image, image.ocr_result])
-            else:
-                self.false_table.extend([md_image, image.ocr_result])
-
-        true_rows = len(self.true_table) // columns
-        false_rows = len(self.false_table) // columns
-        self.write_header('True')
-        self.f.new_table(columns=columns, rows=true_rows, text=self.true_table, text_align=text_align)
-        self.write_header('False')
-        self.f.new_table(columns=columns, rows=false_rows, text=self.false_table, text_align='left')
-
-# if __name__ == '__main__':
-#     markdown = MD('英语等级证书')
-#
-#     dataset = Dataset(Path(''), 'cet', 'local', False)
-#     print(len(dataset))
-#     for d in dataset():
-#         print(d)
-#
-#     dataset.evaluate()
-#     print(dataset.accuracy)
-#
-#     markdown.write_total_accuracy(dataset)
-#     markdown.write_table_accuracy(dataset)
-#     markdown.write_table_result(dataset)
-#
-#     markdown.f.create_md_file()

+ 0 - 114
HR_OCR/to_md/ocr_config.py

@@ -1,114 +0,0 @@
-from dataclasses import dataclass
-from typing import List
-
-
-@dataclass
-class Type:
-    image_type: int
-    image_field: List
-
-
-@dataclass
-class RequestConfig:
-    url: str
-    token: str
-
-
-@dataclass
-class Configs:
-    request: RequestConfig
-    type: Type
-
-
-# cet
-cet_local_config = RequestConfig(url='http://192.168.199.27:18050/ocr_system/cet', token='')
-cet_TXtest_config = RequestConfig(
-    url='http://aihubtest.digitalyili.com/aiSquare/openApi/reasoning-services/rlocrxm//cettest/cet',
-    token='8ae1e5f1-1337-4f22-8d46-ff4c110d68fd')
-cet_TXsb_config = RequestConfig(
-    url='http://aihub.digitalyili.com/aiSquare/openApi/reasoning-services/rlocrxm/cet/cet',
-    token='dcae8cc6-0e49-4db8-a2d2-94ef84da3636')
-cet_DXtest_config = RequestConfig(
-    url='http://aihubpre-idc.digitalyili.com/aiSquare/openApi/reasoning-services/hrocr/cettest/cet',
-    token='4e00c444-620b-4d3c-85f4-777e64276f0e')
-cet_DXsb_config = RequestConfig(
-    url='http://aihub-idc.digitalyili.com/aiSquare/openApi/reasoning-services/hrocr/cet/cet',
-    token='e045de0a-e97f-4f23-b4d5-6a032c39a81e')
-
-CET_CONFIGS = {
-    'local': cet_local_config,
-    'TXtest': cet_TXtest_config,
-    'TXsb': cet_TXsb_config,
-    'DXtest': cet_DXtest_config,
-    'DXsb': cet_DXsb_config
-}
-
-# regbook
-regbook_local_config = RequestConfig(url='http://192.168.199.27:18040/ocr_system/regbook', token='')
-regbook_TXtest_config = RequestConfig(
-    url='http://aihubtest.digitalyili.com/aiSquare/openApi/reasoning-services/rlocrxm/hkbsbtest/regbook',
-    token='8ae1e5f1-1337-4f22-8d46-ff4c110d68fd')
-regbook_TXsb_config = RequestConfig(
-    url='http://aihub.digitalyili.com/aiSquare/openApi/reasoning-services/rlocrxm/hkbsb/regbook',
-    token='dcae8cc6-0e49-4db8-a2d2-94ef84da3636')
-regbook_DXtest_config = RequestConfig(
-    url='http://aihubpre-idc.digitalyili.com/aiSquare/openApi/reasoning-services/hrocr/hkbsbtest/regbook',
-    token='4e00c444-620b-4d3c-85f4-777e64276f0e')
-regbook_DXsb_config = RequestConfig(
-    url='http://aihub-idc.digitalyili.com/aiSquare/openApi/reasoning-services/hrocr/hkbsb/regbook',
-    token='e045de0a-e97f-4f23-b4d5-6a032c39a81e')
-
-REGBOOK_CONFIGS = {
-    'local': regbook_local_config,
-    'TXtest': regbook_TXtest_config,
-    'TXsb': regbook_TXsb_config,
-    'DXtest': regbook_DXtest_config,
-    'DXsb': regbook_DXsb_config
-}
-
-# business_license
-blfe_local_config = RequestConfig(url='http://192.168.199.27:18060/ocr_system/business_license', token='')
-blfe_TXtest_config = RequestConfig(
-    url='http://aihubtest.digitalyili.com/aiSquare/openApi/reasoning-services/rlocrxm/blfetest/blfe',
-    token='8ae1e5f1-1337-4f22-8d46-ff4c110d68fd')
-blfe_TXsb_config = RequestConfig(
-    url='http://aihub.digitalyili.com/aiSquare/openApi/reasoning-services/rlocrxm/blfe/blfe',
-    token='dcae8cc6-0e49-4db8-a2d2-94ef84da3636')
-blfe_DXtest_config = RequestConfig(
-    url='http://aihubpre-idc.digitalyili.com/aiSquare/openApi/reasoning-services/hrocr/blfetest/blfe',
-    token='4e00c444-620b-4d3c-85f4-777e64276f0e')
-blfe_DXsb_config = RequestConfig(
-    url='http://aihub-idc.digitalyili.com/aiSquare/openApi/reasoning-services/hrocr/blfe/blfe',
-    token='e045de0a-e97f-4f23-b4d5-6a032c39a81e')
-
-BLFE_CONFIGS = {
-    'local': blfe_local_config,
-    'TXtest': blfe_TXtest_config,
-    'TXsb': blfe_TXsb_config,
-    'DXtest': blfe_DXtest_config,
-    'DXsb': blfe_DXsb_config
-}
-
-OCR_CONFIGS = {
-    'cet': CET_CONFIGS,
-    'regbook': REGBOOK_CONFIGS,
-    'business_license': BLFE_CONFIGS
-}
-
-# 字段
-cet_field = ['orientation', 'name', 'id', 'language', 'level', 'exam_time', 'score']
-regbook_0_field = ['orientation', 'name', 'id', 'gender', 'birthplace', 'birthplace_province', 'birthplace_city',
-                 'birthplace_region', 'native_place', 'native_place_province', 'native_place_city',
-                 'native_place_region', 'blood_type', 'religion']
-regbook_1_field = ['orientation', 'type', 'address', 'address_province', 'address_city', 'address_region', 'address_detail']
-
-business_license = ['orientation', 'social_code', 'company_name', 'legal_person', 'registered_capital', 'type',
-                    'start_date', 'business_scope', 'expire_date', 'address', 'stamp']
-# business_license = ['orientation', 'social_code', 'company_name', 'legal_person', 'registered_capital', 'type',
-#                     'start_date', 'business_scope', 'expire_date', 'address']
-Filed = {
-    'cet': cet_field,
-    'regbook0': regbook_0_field,
-    'regbook1': regbook_1_field,
-    'business_license': business_license
-}

+ 0 - 53
HR_OCR/to_md/use.py

@@ -1,53 +0,0 @@
-'''
-Author: zeke-chin zeke-chin@icloud.com
-Date: 2022-09-28 20:28:41
-LastEditors: zeke-chin zeke-chin@icloud.com
-LastEditTime: 2022-10-12 21:15:26
-FilePath: /to_md/HR_OCR/to_md/use.py
-Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-'''
-
-from pathlib import Path
-from tkinter import image_types
-
-from new import MD, Image, Dataset, parser_path
-
-# config
-# 图片路径
-
-image_path = Path('/Users/sxkj/Desktop/1/img')
-image_type = 1
-# 是否旋转
-image_rotate = False
-ocr_address = 'local'  # 'local' 'TXtest' 'TXsb' 'DXtest' 'DXsb'
-
-ocr_name = 'regbook'  # 'cet' 'idcard' 'bankcard' 'regbook' 'schoolcert' 'business_license'
-md_name = 'RE_1.7'
-
-filed = 'regbook' # 'cet' 'idcard' 'bankcard' 'regbook_0' 'regbook_1' 'schoolcert' 'business_license'
-
-# 若md_path为None 则默认使用图片父路径为markdown保存路径
-# md_path = '/Users/zeke/work/sx/OCR/HROCR/to_md/example' or image_path.parent
-md_path = None or image_path.parent
-
-md_file = parser_path(Path(md_path) / Path(md_name + image_path.stem), image_rotate)
-
-
-if __name__ == '__main__':
-
-    markdown = MD(md_file)
-
-    dataset = Dataset(image_path, image_type, ocr_name, ocr_address, filed, image_rotate)
-    print(len(dataset))
-    for d in dataset():
-        print(d)
-
-    dataset.evaluate()
-    print(dataset.accuracy)
-
-    markdown.write_total_accuracy(dataset)
-    markdown.write_table_accuracy(dataset)
-    markdown.write_table_result(dataset)
-
-    print(md_file)
-    markdown.f.create_md_file()

+ 0 - 34
HR_OCR/tools/README.md

@@ -1,34 +0,0 @@
-# 人力OCR
-
-## convert_json.py
-
-```python
-# 项目url
-url = 'http://192.168.199.27:18040'
-# 目标文件夹
-imgs_path = './HR_OCR/to_md/example/img'
-
-def send_request(img_path, image_type = 0):
-    with open(img_path, 'rb') as f:
-        img_str: str = base64.encodebytes(f.read()).decode('utf-8')
-        data = {
-            'image': img_str,
-            'image_type': image_type
-        }
-        idc_header = {
-            'Content-Type': 'application/json',
-            'Authorization': 'Bearer 4e00c444-620b-4d3c-85f4-777e64276f0e'
-        }
-        r = requests.post(f'{url}/cettest/cet', json=data, headers=idc_header)
-        # r = requests.post(f'{url}/hkbsbtest/regbook', json=data)
-        print(r.json())
-        return r.json()
-```
-
-## suffix.py
-
-```python
-# 需要格式化的目的文件夹路径
-target_path = './HR_OCR/to_md/example/img'
-```
-

+ 0 - 76
HR_OCR/tools/convert_json.py

@@ -1,76 +0,0 @@
-'''
-Author: zeke-chin zeke-chin@icloud.com
-Date: 2022-10-11 16:38:18
-LastEditors: zeke-chin zeke-chin@icloud.com
-LastEditTime: 2022-10-12 17:43:29
-FilePath: /test_script/HR_OCR/tools/convert_json.py
-Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-'''
-from pathlib import Path
-
-import requests
-import json
-import base64
-from itertools import chain
-
-<<<<<<< Updated upstream
-url = 'http://192.168.199.27:18060'
-imgs_path = '/Users/zeke/work/sx/OCR/image_data/营业执照90'
-=======
-url = 'http://192.168.199.27:18040'
-imgs_path = '/Users/sxkj/test_script/HR_OCR/户口本测试样本1011-常住人口页'
->>>>>>> Stashed changes
-
-
-def send_request(img_path):
-    with open(img_path, 'rb') as f:
-        img_str: str = base64.encodebytes(f.read()).decode('utf-8')
-        data = {
-            'image': img_str,
-            'image_type': 0
-        }
-<<<<<<< Updated upstream
-        # idc_header = {
-        #     'Content-Type': 'application/json',
-        #     'Authorization': 'Bearer 4e00c444-620b-4d3c-85f4-777e64276f0e'
-        # }
-        # r = requests.post(f'{url}/ocr_system/business_license', json=data, headers=idc_header)
-        r = requests.post(f'{url}/ocr_system/business_license', json=data)
-=======
-        idc_header = {
-            'Content-Type': 'application/json',
-            'Authorization': 'Bearer 4e00c444-620b-4d3c-85f4-777e64276f0e'
-        }
-        r = requests.post(f'{url}/ocr_system/regbook', json=data, headers=idc_header)
-        # r = requests.post(f'{url}/hkbsbtest/regbook', json=data)
->>>>>>> Stashed changes
-        print(r.json())
-        return r.json()
-
-
-def _parse_result(r):
-    if r['status'] == '000':
-        r = r['result']
-        if r:
-            del r['confidence']
-        return {k: v['text'] if isinstance(v, dict) else v for k, v in r.items()}
-    elif r['status'] == '101':
-        return "101"
-
-
-if __name__ == '__main__':
-
-    # 0
-    # img_paths = chain(*[Path(root / imgs_path).rglob(f'*.{ext}') for ext in ['jpeg', 'jpg', 'png', 'JPG', 'PNG']])
-    img_paths = chain(*[Path(imgs_path).rglob(f'*.{ext}') for ext in ['jpg']])
-    for img_path in img_paths:
-        print(img_path)
-        r = send_request(img_path)
-        res = _parse_result(r)
-        print(res)
-        img_path: Path = img_path
-        d = img_path.parent
-        fn = f'{img_path.stem}.json'
-
-        with (d / fn).open('w', encoding='utf-8') as f:
-            json.dump(res, f, ensure_ascii=False, indent=4)

+ 0 - 36
HR_OCR/tools/suffix.py

@@ -1,36 +0,0 @@
-'''
-Author: zeke-chin zeke-chin@icloud.com
-Date: 2022-10-11 16:38:18
-LastEditors: zeke-chin zeke-chin@icloud.com
-LastEditTime: 2022-10-12 17:53:30
-FilePath: /test_script/HR_OCR/tools/suffix.py
-Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-'''
-import os
-from pathlib import Path
-from itertools import chain
-import sys
-
-# conf
-target_path = '/Users/zeke/work/sx/OCR/image_data/营业执照90'
-#suffix = sys.argv[2]
-suffix = 'jpg' if len(sys.argv) != 3 else sys.argv[2]
-
-def get_range(n):
-    len_n = len(str(n))
-    for num in range(1, n + 1):
-        output = str(num)
-        while len(output) < len_n:
-            output = f'0{output}'
-        yield output
-
-
-file_paths = list(chain(*[Path(target_path).rglob(f'*.{ext}') for ext in ['jpeg', 'jpg', 'png', 'JPG', 'PNG']]))
-print(len(file_paths))
-num = len(file_paths)
-file_name_list = list(get_range(num))
-for i in range(num):
-    file = file_paths[i]
-    print(file)
-    new = file.parent / f'{file_name_list[i]}_img.{suffix}'
-    file.rename(new)