пре 2 година · 935255ab04
--- a/HR_OCR/img2/10-14_CETimg.md
+++ b/HR_OCR/img2/10-14_CETimg.md
@@ -0,0 +1,42 @@
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# 10-14_CETimg测试报告
			
 
				+
			
 
				+
			
 
				+0.00%
			
 
				+|字段|正确率|
			
 
				+| :---: | :---: |
			
 
				+|orientation|0.00%|
			
 
				+|name|0.00%|
			
 
				+|id|0.00%|
			
 
				+|language|0.00%|
			
 
				+|level|0.00%|
			
 
				+|exam_time|0.00%|
			
 
				+|score|0.00%|
			
 
				+
			
 
				+# True
			
 
				+
			
 
				+|图片|识别结果|
			
 
				+| :---: | :---: |
			
 
				+
			
 
				+# False
			
 
				+
			
 
				+|图片|识别结果|
			
 
				+| :--- | :--- |
			
 
				+|![](img/样本-002002.jpg)|Exception: 专四专八无法识别|
			
 
				+|![](img/样本-0014002.jpg)|Exception: 识别不到anchorTEM|
			
 
				+|![](img/样本-000002.jpg)|Exception: 识别不到anchorTEM|
			
 
				+|![](img/样本-004002.jpg)|Exception: 专四专八无法识别|
			
 
				+|![](img/样本-008002.jpg)|Exception: 专四专八无法识别|
			
 
				+|![](img/样本-0010002.jpg)|Exception: 专四专八无法识别|
			
 
				+|![](img/样本-006002.jpg)|Exception: 识别不到anchorTEM|
			
 
				+|![](img/样本-0012002.jpg)|Exception: 识别不到anchorTEM|
			
 
				+|![](img/样本-003002.jpg)|Exception: 专四专八无法识别|
			
 
				+|![](img/样本-001002.jpg)|Exception: 专四专八无法识别|
			
 
				+|![](img/样本-0011002.jpg)|Exception: 专四专八无法识别|
			
 
				+|![](img/样本-005002.jpg)|Exception: 专四专八无法识别|
			
 
				+|![](img/样本-009002.jpg)|Exception: 识别不到anchorTEM|
			
 
				+|![](img/样本-0013002.jpg)|Exception: 识别不到anchorTEM|
			
 
				+|![](img/样本-007002.jpg)|Exception: 专四专八无法识别|
			
--- a/HR_OCR/img2/10-14_REimg.md
+++ b/HR_OCR/img2/10-14_REimg.md
@@ -0,0 +1,49 @@
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# 10-14_REimg测试报告
			
 
				+
			
 
				+
			
 
				+78.10%
			
 
				+|字段|正确率|
			
 
				+| :---: | :---: |
			
 
				+|orientation|86.67%|
			
 
				+|name|73.33%|
			
 
				+|id|80.00%|
			
 
				+|gender|80.00%|
			
 
				+|birthplace|66.67%|
			
 
				+|birthplace_province|80.00%|
			
 
				+|birthplace_city|73.33%|
			
 
				+|birthplace_region|60.00%|
			
 
				+|native_place|73.33%|
			
 
				+|native_place_province|86.67%|
			
 
				+|native_place_city|80.00%|
			
 
				+|native_place_region|80.00%|
			
 
				+|blood_type|86.67%|
			
 
				+|religion|86.67%|
			
 
				+
			
 
				+# True
			
 
				+
			
 
				+|图片|识别结果|
			
 
				+| :---: | :---: |
			
 
				+|![](img/样本-008002.jpg)|{'img_type': '0', 'orientation': 0, 'type': '', 'address': '', 'address_province': '', 'address_city': '', 'address_region': '', 'address_detail': '', 'name': '郝良君', 'id': '152122199409064819', 'gender': '男', 'birthplace': '内蒙古自治区呼伦贝尔市阿荣旗', 'birthplace_province': '内蒙古自治区', 'birthplace_city': '呼伦贝尔市', 'birthplace_region': '阿荣旗', 'native_place': '山东省菏泽市巨野县', 'native_place_province': '山东省', 'native_place_city': '菏泽市', 'native_place_region': '巨野县', 'blood_type': '不明', 'religion': '无宗教信仰'}|
			
 
				+|![](img/样本-0010002.jpg)|{'img_type': '0', 'orientation': 0, 'type': '', 'address': '', 'address_province': '', 'address_city': '', 'address_region': '', 'address_detail': '', 'name': '张颖', 'id': '152122200012102421', 'gender': '女', 'birthplace': '内蒙古自治区呼伦贝尔市阿荣旗', 'birthplace_province': '内蒙古自治区', 'birthplace_city': '呼伦贝尔市', 'birthplace_region': '阿荣旗', 'native_place': '吉林省长春市德惠市', 'native_place_province': '吉林省', 'native_place_city': '长春市', 'native_place_region': '德惠市', 'blood_type': '不明', 'religion': '无宗教信仰'}|
			
 
				+|![](img/样本-006002.jpg)|{'img_type': '0', 'orientation': 0, 'type': '', 'address': '', 'address_province': '', 'address_city': '', 'address_region': '', 'address_detail': '', 'name': '王凯', 'id': '320826199502090214', 'gender': '男', 'birthplace': '江苏省淮安市涟水县', 'birthplace_province': '江苏省', 'birthplace_city': '淮安市', 'birthplace_region': '涟水县', 'native_place': '江苏省淮安市涟水县', 'native_place_province': '江苏省', 'native_place_city': '淮安市', 'native_place_region': '涟水县', 'blood_type': '', 'religion': '无宗教信仰'}|
			
 
				+|![](img/样本-003002.jpg)|{'img_type': '0', 'orientation': 0, 'type': '', 'address': '', 'address_province': '', 'address_city': '', 'address_region': '', 'address_detail': '', 'name': '刘书玉', 'id': '210623199804014764', 'gender': '女', 'birthplace': '辽宁省丹东市东港市', 'birthplace_province': '辽宁省', 'birthplace_city': '丹东市', 'birthplace_region': '东港市', 'native_place': '辽宁省丹东市东港市', 'native_place_province': '辽宁省', 'native_place_city': '丹东市', 'native_place_region': '东港市', 'blood_type': '不明', 'religion': '无宗教信仰'}|
			
 
				+|![](img/样本-001002.jpg)|{'img_type': '0', 'orientation': 0, 'type': '', 'address': '', 'address_province': '', 'address_city': '', 'address_region': '', 'address_detail': '', 'name': '王文强', 'id': '152223199706015513', 'gender': '男', 'birthplace': '内蒙古自治区兴安盟扎赉特旗', 'birthplace_province': '内蒙古自治区', 'birthplace_city': '兴安盟', 'birthplace_region': '扎赉特旗', 'native_place': '内蒙古自治区兴安盟扎赉特旗', 'native_place_province': '内蒙古自治区', 'native_place_city': '兴安盟', 'native_place_region': '扎赉特旗', 'blood_type': '不明', 'religion': '无宗教信仰'}|
			
 
				+|![](img/样本-005002.jpg)|{'img_type': '0', 'orientation': 0, 'type': '', 'address': '', 'address_province': '', 'address_city': '', 'address_region': '', 'address_detail': '', 'name': '王敏', 'id': '15222419990401602X', 'gender': '女', 'birthplace': '内蒙古自治区兴安盟突泉县', 'birthplace_province': '内蒙古自治区', 'birthplace_city': '兴安盟', 'birthplace_region': '突泉县', 'native_place': '内蒙古自治区兴安盟突泉县', 'native_place_province': '内蒙古自治区', 'native_place_city': '兴安盟', 'native_place_region': '突泉县', 'blood_type': '', 'religion': '无宗教信仰'}|
			
 
				+|![](img/样本-007002.jpg)|{'img_type': '0', 'orientation': 0, 'type': '', 'address': '', 'address_province': '', 'address_city': '', 'address_region': '', 'address_detail': '', 'name': '刘昕', 'id': '152101200003040634', 'gender': '男', 'birthplace': '内蒙古自治区呼伦贝尔市海拉尔区', 'birthplace_province': '内蒙古自治区', 'birthplace_city': '呼伦贝尔市', 'birthplace_region': '海拉尔区', 'native_place': '吉林省四平市', 'native_place_province': '吉林省', 'native_place_city': '四平市', 'native_place_region': '', 'blood_type': '不明', 'religion': '无宗教信仰'}|
			
 
				+
			
 
				+# False
			
 
				+
			
 
				+|图片|识别结果|
			
 
				+| :--- | :--- |
			
 
				+|![](img/样本-002002.jpg)|Exception: 识别不到anchor常住|
			
 
				+|![](img/样本-0014002.jpg)|-------birthplace-------<br>正确:河南省新乡市封丘县<br>返回:用名杨乐<br>-------birthplace_province-------<br>正确:河南省<br>返回:<br>-------birthplace_city-------<br>正确:新乡市<br>返回:<br>-------birthplace_region-------<br>正确:封丘县<br>返回:<br>-------native_place-------<br>正确:河南省新乡市封丘县<br>返回:河南省新乡市封丘县河南省封丘县<br>|
			
 
				+|![](img/样本-000002.jpg)|-------birthplace-------<br>正确::黑龙江省大庆市杜尔伯特蒙古族自治县<br>返回:黑龙江省哈尔滨市市辖区<br>-------birthplace_city-------<br>正确:大庆市<br>返回:哈尔滨市<br>-------birthplace_region-------<br>正确:杜尔伯特蒙古族自治县<br>返回:市辖区<br>-------native_place-------<br>正确:黑龙江省大庆市杜尔伯特蒙古族自治县<br>返回:黑龙江省哈尔滨市市辖区<br>-------native_place_city-------<br>正确:大庆市<br>返回:哈尔滨市<br>-------native_place_region-------<br>正确:杜尔伯特蒙古族自治县<br>返回:市辖区<br>|
			
 
				+|![](img/样本-004002.jpg)|-------birthplace-------<br>正确:内蒙古自治区兴安盟乌兰浩特市<br>返回:内蒙古自治区兴安盟科尔沁右翼前旗<br>-------birthplace_region-------<br>正确:乌兰浩特市<br>返回:科尔沁右翼前旗<br>|
			
 
				+|![](img/样本-0012002.jpg)|-------name-------<br>正确:阚宏鹏<br>返回:闽宏鹏<br>-------birthplace_region-------<br>正确:额尔古纳市<br>返回:左旗<br>|
			
 
				+|![](img/样本-0011002.jpg)|Exception: 没有找到身份证号|
			
 
				+|![](img/样本-009002.jpg)|-------name-------<br>正确:周东旭<br>返回:内蒙古莫力达瓦自治旗<br>|
			
 
				+|![](img/样本-0013002.jpg)|-------id-------<br>正确:370125199912104257<br>返回:37012519991210425<br>-------gender-------<br>正确:男<br>返回:<br>|
			
--- a/HR_OCR/img2/img/样本-000002.jpg
+++ b/HR_OCR/img2/img/样本-000002.jpg
--- a/HR_OCR/img2/img/样本-000002.json
+++ b/HR_OCR/img2/img/样本-000002.json
@@ -0,0 +1,23 @@
 
				+{
			
 
				+    "img_type": "0",
			
 
				+    "orientation": 0,
			
 
				+    "type": "",
			
 
				+    "address": "",
			
 
				+    "address_province": "",
			
 
				+    "address_city": "",
			
 
				+    "address_region": "",
			
 
				+    "address_detail": "",
			
 
				+    "name": "孔德鑫",
			
 
				+    "id": "230624199809052655",
			
 
				+    "gender": "男",
			
 
				+    "birthplace": ":黑龙江省大庆市杜尔伯特蒙古族自治县",
			
 
				+    "birthplace_province": "黑龙江省",
			
 
				+    "birthplace_city": "大庆市",
			
 
				+    "birthplace_region": "杜尔伯特蒙古族自治县",
			
 
				+    "native_place": "黑龙江省大庆市杜尔伯特蒙古族自治县",
			
 
				+    "native_place_province": "黑龙江省",
			
 
				+    "native_place_city": "大庆市",
			
 
				+    "native_place_region": "杜尔伯特蒙古族自治县",
			
 
				+    "blood_type": "",
			
 
				+    "religion": ""
			
 
				+}
			
--- a/HR_OCR/img2/img/样本-0010002.jpg
+++ b/HR_OCR/img2/img/样本-0010002.jpg
--- a/HR_OCR/img2/img/样本-0010002.json
+++ b/HR_OCR/img2/img/样本-0010002.json
@@ -0,0 +1,23 @@
 
				+{
			
 
				+    "img_type": "0",
			
 
				+    "orientation": 0,
			
 
				+    "type": "",
			
 
				+    "address": "",
			
 
				+    "address_province": "",
			
 
				+    "address_city": "",
			
 
				+    "address_region": "",
			
 
				+    "address_detail": "",
			
 
				+    "name": "张颖",
			
 
				+    "id": "152122200012102421",
			
 
				+    "gender": "女",
			
 
				+    "birthplace": "内蒙古自治区呼伦贝尔市阿荣旗",
			
 
				+    "birthplace_province": "内蒙古自治区",
			
 
				+    "birthplace_city": "呼伦贝尔市",
			
 
				+    "birthplace_region": "阿荣旗",
			
 
				+    "native_place": "吉林省长春市德惠市",
			
 
				+    "native_place_province": "吉林省",
			
 
				+    "native_place_city": "长春市",
			
 
				+    "native_place_region": "德惠市",
			
 
				+    "blood_type": "不明",
			
 
				+    "religion": "无宗教信仰"
			
 
				+}
			
--- a/HR_OCR/img2/img/样本-001002.jpg
+++ b/HR_OCR/img2/img/样本-001002.jpg
--- a/HR_OCR/img2/img/样本-001002.json
+++ b/HR_OCR/img2/img/样本-001002.json
@@ -0,0 +1,23 @@
 
				+{
			
 
				+    "img_type": "0",
			
 
				+    "orientation": 0,
			
 
				+    "type": "",
			
 
				+    "address": "",
			
 
				+    "address_province": "",
			
 
				+    "address_city": "",
			
 
				+    "address_region": "",
			
 
				+    "address_detail": "",
			
 
				+    "name": "王文强",
			
 
				+    "id": "152223199706015513",
			
 
				+    "gender": "男",
			
 
				+    "birthplace": "内蒙古自治区兴安盟扎赉特旗",
			
 
				+    "birthplace_province": "内蒙古自治区",
			
 
				+    "birthplace_city": "兴安盟",
			
 
				+    "birthplace_region": "扎赉特旗",
			
 
				+    "native_place": "内蒙古自治区兴安盟扎赉特旗",
			
 
				+    "native_place_province": "内蒙古自治区",
			
 
				+    "native_place_city": "兴安盟",
			
 
				+    "native_place_region": "扎赉特旗",
			
 
				+    "blood_type": "不明",
			
 
				+    "religion": "无宗教信仰"
			
 
				+}
			
--- a/HR_OCR/img2/img/样本-0011002.jpg
+++ b/HR_OCR/img2/img/样本-0011002.jpg
--- a/HR_OCR/img2/img/样本-0011002.json
+++ b/HR_OCR/img2/img/样本-0011002.json
@@ -0,0 +1,23 @@
 
				+{
			
 
				+    "img_type": "0",
			
 
				+    "orientation": 0,
			
 
				+    "type": "",
			
 
				+    "address": "",
			
 
				+    "address_province": "",
			
 
				+    "address_city": "",
			
 
				+    "address_region": "",
			
 
				+    "address_detail": "",
			
 
				+    "name": "常龙江",
			
 
				+    "id": "152201200004234014",
			
 
				+    "gender": "女",
			
 
				+    "birthplace": "内蒙古自治区兴安盟乌兰浩特市",
			
 
				+    "birthplace_province": "内蒙古自治区",
			
 
				+    "birthplace_city": "兴安盟",
			
 
				+    "birthplace_region": "乌兰浩特市",
			
 
				+    "native_place": "内蒙古自治区兴安盟乌兰浩特市",
			
 
				+    "native_place_province": "内蒙古自治区",
			
 
				+    "native_place_city": "兴安盟",
			
 
				+    "native_place_region": "乌兰浩特市",
			
 
				+    "blood_type": "不明",
			
 
				+    "religion": "无宗教信仰"
			
 
				+}
			
--- a/HR_OCR/img2/img/样本-0012002.jpg
+++ b/HR_OCR/img2/img/样本-0012002.jpg
--- a/HR_OCR/img2/img/样本-0012002.json
+++ b/HR_OCR/img2/img/样本-0012002.json
@@ -0,0 +1,23 @@
 
				+{
			
 
				+    "img_type": "0",
			
 
				+    "orientation": 0,
			
 
				+    "type": "",
			
 
				+    "address": "",
			
 
				+    "address_province": "",
			
 
				+    "address_city": "",
			
 
				+    "address_region": "",
			
 
				+    "address_detail": "",
			
 
				+    "name": "阚宏鹏",
			
 
				+    "id": "152122199205140314",
			
 
				+    "gender": "男",
			
 
				+    "birthplace": "内蒙古自治区呼伦贝尔市额尔古纳市左旗",
			
 
				+    "birthplace_province": "内蒙古自治区",
			
 
				+    "birthplace_city": "呼伦贝尔市",
			
 
				+    "birthplace_region": "额尔古纳市",
			
 
				+    "native_place": "黑龙江省齐齐哈尔市泰来县",
			
 
				+    "native_place_province": "黑龙江省",
			
 
				+    "native_place_city": "齐齐哈尔市",
			
 
				+    "native_place_region": "泰来县",
			
 
				+    "blood_type": "不明",
			
 
				+    "religion": "无宗教信仰"
			
 
				+}
			
--- a/HR_OCR/img2/img/样本-0013002.jpg
+++ b/HR_OCR/img2/img/样本-0013002.jpg
--- a/HR_OCR/img2/img/样本-0013002.json
+++ b/HR_OCR/img2/img/样本-0013002.json
@@ -0,0 +1,23 @@
 
				+{
			
 
				+    "img_type": "0",
			
 
				+    "orientation": 0,
			
 
				+    "type": "",
			
 
				+    "address": "",
			
 
				+    "address_province": "",
			
 
				+    "address_city": "",
			
 
				+    "address_region": "",
			
 
				+    "address_detail": "",
			
 
				+    "name": "孙富豪",
			
 
				+    "id": "370125199912104257",
			
 
				+    "gender": "男",
			
 
				+    "birthplace": "山东省济南市济阳县",
			
 
				+    "birthplace_province": "山东省",
			
 
				+    "birthplace_city": "济南市",
			
 
				+    "birthplace_region": "济阳县",
			
 
				+    "native_place": "山东省济南市济阳县",
			
 
				+    "native_place_province": "山东省",
			
 
				+    "native_place_city": "济南市",
			
 
				+    "native_place_region": "济阳县",
			
 
				+    "blood_type": "",
			
 
				+    "religion": ""
			
 
				+}
			
--- a/HR_OCR/img2/img/样本-0014002.jpg
+++ b/HR_OCR/img2/img/样本-0014002.jpg
--- a/HR_OCR/img2/img/样本-0014002.json
+++ b/HR_OCR/img2/img/样本-0014002.json
@@ -0,0 +1,23 @@
 
				+{
			
 
				+    "img_type": "0",
			
 
				+    "orientation": 0,
			
 
				+    "type": "",
			
 
				+    "address": "",
			
 
				+    "address_province": "",
			
 
				+    "address_city": "",
			
 
				+    "address_region": "",
			
 
				+    "address_detail": "",
			
 
				+    "name": "杨杭升",
			
 
				+    "id": "410727199205222316",
			
 
				+    "gender": "男",
			
 
				+    "birthplace": "河南省新乡市封丘县",
			
 
				+    "birthplace_province": "河南省",
			
 
				+    "birthplace_city": "新乡市",
			
 
				+    "birthplace_region": "封丘县",
			
 
				+    "native_place": "河南省新乡市封丘县",
			
 
				+    "native_place_province": "河南省",
			
 
				+    "native_place_city": "新乡市",
			
 
				+    "native_place_region": "封丘县",
			
 
				+    "blood_type": "",
			
 
				+    "religion": "无宗教信仰"
			
 
				+}
			
--- a/HR_OCR/img2/img/样本-002002.jpg
+++ b/HR_OCR/img2/img/样本-002002.jpg
--- a/HR_OCR/img2/img/样本-002002.json
+++ b/HR_OCR/img2/img/样本-002002.json
@@ -0,0 +1 @@
 
				+"101"
			
--- a/HR_OCR/img2/img/样本-003002.jpg
+++ b/HR_OCR/img2/img/样本-003002.jpg
--- a/HR_OCR/img2/img/样本-003002.json
+++ b/HR_OCR/img2/img/样本-003002.json
@@ -0,0 +1,23 @@
 
				+{
			
 
				+    "img_type": "0",
			
 
				+    "orientation": 0,
			
 
				+    "type": "",
			
 
				+    "address": "",
			
 
				+    "address_province": "",
			
 
				+    "address_city": "",
			
 
				+    "address_region": "",
			
 
				+    "address_detail": "",
			
 
				+    "name": "刘书玉",
			
 
				+    "id": "210623199804014764",
			
 
				+    "gender": "女",
			
 
				+    "birthplace": "辽宁省丹东市东港市",
			
 
				+    "birthplace_province": "辽宁省",
			
 
				+    "birthplace_city": "丹东市",
			
 
				+    "birthplace_region": "东港市",
			
 
				+    "native_place": "辽宁省丹东市东港市",
			
 
				+    "native_place_province": "辽宁省",
			
 
				+    "native_place_city": "丹东市",
			
 
				+    "native_place_region": "东港市",
			
 
				+    "blood_type": "不明",
			
 
				+    "religion": "无宗教信仰"
			
 
				+}
			
--- a/HR_OCR/img2/img/样本-004002.jpg
+++ b/HR_OCR/img2/img/样本-004002.jpg
--- a/HR_OCR/img2/img/样本-004002.json
+++ b/HR_OCR/img2/img/样本-004002.json
@@ -0,0 +1,23 @@
 
				+{
			
 
				+    "img_type": "0",
			
 
				+    "orientation": 0,
			
 
				+    "type": "",
			
 
				+    "address": "",
			
 
				+    "address_province": "",
			
 
				+    "address_city": "",
			
 
				+    "address_region": "",
			
 
				+    "address_detail": "",
			
 
				+    "name": "张佳宝",
			
 
				+    "id": "152201199906262529",
			
 
				+    "gender": "女",
			
 
				+    "birthplace": "内蒙古自治区兴安盟乌兰浩特市",
			
 
				+    "birthplace_province": "内蒙古自治区",
			
 
				+    "birthplace_city": "兴安盟",
			
 
				+    "birthplace_region": "乌兰浩特市",
			
 
				+    "native_place": "内蒙古自治区兴安盟科尔沁右翼前旗",
			
 
				+    "native_place_province": "内蒙古自治区",
			
 
				+    "native_place_city": "兴安盟",
			
 
				+    "native_place_region": "科尔沁右翼前旗",
			
 
				+    "blood_type": "不明",
			
 
				+    "religion": "无宗教信仰"
			
 
				+}
			
--- a/HR_OCR/img2/img/样本-005002.jpg
+++ b/HR_OCR/img2/img/样本-005002.jpg
--- a/HR_OCR/img2/img/样本-005002.json
+++ b/HR_OCR/img2/img/样本-005002.json
@@ -0,0 +1,23 @@
 
				+{
			
 
				+    "img_type": "0",
			
 
				+    "orientation": 0,
			
 
				+    "type": "",
			
 
				+    "address": "",
			
 
				+    "address_province": "",
			
 
				+    "address_city": "",
			
 
				+    "address_region": "",
			
 
				+    "address_detail": "",
			
 
				+    "name": "王敏",
			
 
				+    "id": "15222419990401602X",
			
 
				+    "gender": "女",
			
 
				+    "birthplace": "内蒙古自治区兴安盟突泉县",
			
 
				+    "birthplace_province": "内蒙古自治区",
			
 
				+    "birthplace_city": "兴安盟",
			
 
				+    "birthplace_region": "突泉县",
			
 
				+    "native_place": "内蒙古自治区兴安盟突泉县",
			
 
				+    "native_place_province": "内蒙古自治区",
			
 
				+    "native_place_city": "兴安盟",
			
 
				+    "native_place_region": "突泉县",
			
 
				+    "blood_type": "",
			
 
				+    "religion": "无宗教信仰"
			
 
				+}
			
--- a/HR_OCR/img2/img/样本-006002.jpg
+++ b/HR_OCR/img2/img/样本-006002.jpg
--- a/HR_OCR/img2/img/样本-006002.json
+++ b/HR_OCR/img2/img/样本-006002.json
@@ -0,0 +1,23 @@
 
				+{
			
 
				+    "img_type": "0",
			
 
				+    "orientation": 0,
			
 
				+    "type": "",
			
 
				+    "address": "",
			
 
				+    "address_province": "",
			
 
				+    "address_city": "",
			
 
				+    "address_region": "",
			
 
				+    "address_detail": "",
			
 
				+    "name": "王凯",
			
 
				+    "id": "320826199502090214",
			
 
				+    "gender": "男",
			
 
				+    "birthplace": "江苏省淮安市涟水县",
			
 
				+    "birthplace_province": "江苏省",
			
 
				+    "birthplace_city": "淮安市",
			
 
				+    "birthplace_region": "涟水县",
			
 
				+    "native_place": "江苏省淮安市涟水县",
			
 
				+    "native_place_province": "江苏省",
			
 
				+    "native_place_city": "淮安市",
			
 
				+    "native_place_region": "涟水县",
			
 
				+    "blood_type": "",
			
 
				+    "religion": "无宗教信仰"
			
 
				+}
			
--- a/HR_OCR/img2/img/样本-007002.jpg
+++ b/HR_OCR/img2/img/样本-007002.jpg
--- a/HR_OCR/img2/img/样本-007002.json
+++ b/HR_OCR/img2/img/样本-007002.json
@@ -0,0 +1,23 @@
 
				+{
			
 
				+    "img_type": "0",
			
 
				+    "orientation": 0,
			
 
				+    "type": "",
			
 
				+    "address": "",
			
 
				+    "address_province": "",
			
 
				+    "address_city": "",
			
 
				+    "address_region": "",
			
 
				+    "address_detail": "",
			
 
				+    "name": "刘昕",
			
 
				+    "id": "152101200003040634",
			
 
				+    "gender": "男",
			
 
				+    "birthplace": "内蒙古自治区呼伦贝尔市海拉尔区",
			
 
				+    "birthplace_province": "内蒙古自治区",
			
 
				+    "birthplace_city": "呼伦贝尔市",
			
 
				+    "birthplace_region": "海拉尔区",
			
 
				+    "native_place": "吉林省四平市",
			
 
				+    "native_place_province": "吉林省",
			
 
				+    "native_place_city": "四平市",
			
 
				+    "native_place_region": "",
			
 
				+    "blood_type": "不明",
			
 
				+    "religion": "无宗教信仰"
			
 
				+}
			
--- a/HR_OCR/img2/img/样本-008002.jpg
+++ b/HR_OCR/img2/img/样本-008002.jpg
--- a/HR_OCR/img2/img/样本-008002.json
+++ b/HR_OCR/img2/img/样本-008002.json
@@ -0,0 +1,23 @@
 
				+{
			
 
				+    "img_type": "0",
			
 
				+    "orientation": 0,
			
 
				+    "type": "",
			
 
				+    "address": "",
			
 
				+    "address_province": "",
			
 
				+    "address_city": "",
			
 
				+    "address_region": "",
			
 
				+    "address_detail": "",
			
 
				+    "name": "郝良君",
			
 
				+    "id": "152122199409064819",
			
 
				+    "gender": "男",
			
 
				+    "birthplace": "内蒙古自治区呼伦贝尔市阿荣旗",
			
 
				+    "birthplace_province": "内蒙古自治区",
			
 
				+    "birthplace_city": "呼伦贝尔市",
			
 
				+    "birthplace_region": "阿荣旗",
			
 
				+    "native_place": "山东省菏泽市巨野县",
			
 
				+    "native_place_province": "山东省",
			
 
				+    "native_place_city": "菏泽市",
			
 
				+    "native_place_region": "巨野县",
			
 
				+    "blood_type": "不明",
			
 
				+    "religion": "无宗教信仰"
			
 
				+}
			
--- a/HR_OCR/img2/img/样本-009002.jpg
+++ b/HR_OCR/img2/img/样本-009002.jpg
--- a/HR_OCR/img2/img/样本-009002.json
+++ b/HR_OCR/img2/img/样本-009002.json
@@ -0,0 +1,23 @@
 
				+{
			
 
				+    "img_type": "0",
			
 
				+    "orientation": 0,
			
 
				+    "type": "",
			
 
				+    "address": "",
			
 
				+    "address_province": "",
			
 
				+    "address_city": "",
			
 
				+    "address_region": "",
			
 
				+    "address_detail": "",
			
 
				+    "name": "周东旭",
			
 
				+    "id": "152123199910317230",
			
 
				+    "gender": "男",
			
 
				+    "birthplace": "内蒙古自治区呼伦贝尔市莫力达瓦达斡尔族自治旗",
			
 
				+    "birthplace_province": "内蒙古自治区",
			
 
				+    "birthplace_city": "呼伦贝尔市",
			
 
				+    "birthplace_region": "莫力达瓦达斡尔族自治旗",
			
 
				+    "native_place": "内蒙古自治区呼伦贝尔市莫力达瓦达斡尔族自治旗",
			
 
				+    "native_place_province": "内蒙古自治区",
			
 
				+    "native_place_city": "呼伦贝尔市",
			
 
				+    "native_place_region": "莫力达瓦达斡尔族自治旗",
			
 
				+    "blood_type": "",
			
 
				+    "religion": ""
			
 
				+}
			
--- a/HR_OCR/test_script/to_md/new.py
+++ b/HR_OCR/test_script/to_md/new.py
@@ -95,7 +95,7 @@ def send_request(image: Image, ocr_name, ocr_address, image_type=None):
 
				     config = OCR_CONFIGS[ocr_name][ocr_address]
			
 
				     headers = {
			
 
				         'Content-Type': 'application/json',
			
 
				-        'Authorization': config.token
			
 
				+        'Authorization': f'Bearer {config.token}'
			
 
				     }
			
 
				     data = {
			
 
				         'image': base64_str,
			
@@ -129,7 +129,10 @@ class Dataset(object):
 
				             else:
			
 
				                 self.image_list.append(Image(p, None, rotate))
			
 
				 
			
 
				-        self.field = Filed.get(field)
			
 
				+        if ocr_name == 'regbook':
			
 
				+            self.field = Filed.get(field + str(image_type))
			
 
				+        else:
			
 
				+            self.field = Filed.get(field)
			
 
				 
			
 
				         self.correct = {k: 0 for k in self.field}
			
 
				         self.error = {k: 0 for k in self.field}
			
--- a/HR_OCR/test_script/to_md/ocr_config.py
+++ b/HR_OCR/test_script/to_md/ocr_config.py
@@ -172,14 +172,16 @@ idcard_fieid = ['orientation', 'name', 'id', 'ethnicity', 'gender', 'birthday',
 
				 bankcard_fieid = ['orientation', 'number']
			
 
				 schoolcert_fieid = ['orientation', 'name', 'gender', 'admission_time', 'education_time', 'education_level', 'education_type', 'learning_type', 'school', 'major', 'number']
			
 
				 cet_field = ['orientation', 'name', 'id', 'language', 'level', 'exam_time', 'score']
			
 
				-regbook_field = ['orientation', 'name', 'id', 'gender', 'birthplace', 'birthplace_province', 'birthplace_city',
			
 
				+regbook_0_field = ['orientation', 'name', 'id', 'gender', 'birthplace', 'birthplace_province', 'birthplace_city',
			
 
				                  'birthplace_region', 'native_place', 'native_place_province', 'native_place_city',
			
 
				                  'native_place_region', 'blood_type', 'religion']
			
 
				+regbook_1_field = ['orientation', 'type', 'address', 'address_province', 'address_city', 'address_region', 'address_detail']
			
 
				 business_license = ['orientation', 'social_code', 'company_name', 'legal_person', 'registered_capital', 'type',
			
 
				                     'start_date', 'business_scope', 'expire_date', 'address', 'stamp']
			
 
				 Filed = {
			
 
				     'cet': cet_field,
			
 
				-    'regbook': regbook_field,
			
 
				+    'regbook0': regbook_0_field,
			
 
				+    'regbook1': regbook_1_field,
			
 
				     'business_license': business_license,
			
 
				     'schoolcert': schoolcert_fieid,
			
 
				     'idcard': idcard_fieid,
			
--- a/HR_OCR/test_script/to_md/use.py
+++ b/HR_OCR/test_script/to_md/use.py
@@ -2,35 +2,39 @@
 
				 Author: zeke-chin zeke-chin@icloud.com
			
 
				 Date: 2022-09-28 20:28:41
			
 
				 LastEditors: zeke-chin zeke-chin@icloud.com
			
 
				-LastEditTime: 2022-09-30 15:08:48
			
 
				-FilePath: /utils/HR_OCR/utils/use.py
			
 
				+LastEditTime: 2022-10-12 21:15:26
			
 
				+FilePath: /to_md/HR_OCR/to_md/use.py
			
 
				 Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
			
 
				 '''
			
 
				 
			
 
				 from pathlib import Path
			
 
				+from tkinter import image_types
			
 
				 
			
 
				 from new import MD, Image, Dataset, parser_path
			
 
				 
			
 
				 # config
			
 
				 # 图片路径
			
 
				-image_path = Path('/Users/zeke/work/sx/OCR/image_data/户口本9.30/0/img/')
			
 
				+
			
 
				+image_path = Path('/Users/sxkj/test_script/HR_OCR/img2/img')
			
 
				 image_type = 0
			
 
				 # 是否旋转
			
 
				 image_rotate = False
			
 
				-ocr_address = 'DXtest'  # 'local' 'TXtest' 'TXsb' 'DXtest' 'DXsb'
			
 
				+ocr_address = 'local'  # 'local' 'TXtest' 'TXsb' 'DXtest' 'DXsb'
			
 
				+
			
 
				+ocr_name = 'regbook'  # 'cet' 'idcard' 'bankcard' 'regbook' 'schoolcert' 'business_license'
			
 
				+md_name = 'RE'
			
 
				 
			
 
				-ocr_name = 'cet'  # 'cet' 'idcard' 'bankcard' 'regbook' 'schoolcert' 'business_license'
			
 
				-md_name = 'CET'
			
 
				-filed = 'cet'
			
 
				+filed = 'regbook' # 'cet' 'idcard' 'bankcard' 'regbook_0' 'regbook_1' 'schoolcert' 'business_license'
			
 
				 
			
 
				 # 若md_path为None 则默认使用图片父路径为markdown保存路径
			
 
				-# md_path = '/Users/zeke/work/sx/OCR/HROCR/utils/example' or image_path.parent
			
 
				+# md_path = '/Users/zeke/work/sx/OCR/HROCR/to_md/example' or image_path.parent
			
 
				 md_path = None or image_path.parent
			
 
				 
			
 
				 md_file = parser_path(Path(md_path) / Path(md_name + image_path.stem), image_rotate)
			
 
				 
			
 
				 
			
 
				 if __name__ == '__main__':
			
 
				+
			
 
				     markdown = MD(md_file)
			
 
				 
			
 
				     dataset = Dataset(image_path, image_type, ocr_name, ocr_address, filed, image_rotate)
			
--- a/HR_OCR/to_md/README.md
+++ b/HR_OCR/to_md/README.md
@@ -1,46 +0,0 @@
 
				-# 人力OCR
			
 
				-
			
 
				-## 生成markdown测试报告脚本
			
 
				-
			
 
				-1. 生成**目的文件夹**下图片的**json文件**
			
 
				-
			
 
				-- **目的文件夹**: 
			
 
				-  - 存放所需测试**图片文件夹**
			
 
				-  - 对**文件夹**内图片进行标准化
			
 
				-    - 运行`suffix.py`脚本
			
 
				-    - 产生*.jpg
			
 
				-  - 生成<u>算法推理json文件</u>
			
 
				-    - 运行`convert_json.py`脚本
			
 
				-    - 产生对应jpg 文件的json文件
			
 
				-  - 修改<u>算法推理json文件</u>成**正确的json文件**
			
 
				-
			
 
				-2. 跑生成md报告脚本
			
 
				-
			
 
				-- 修改use.py 并运行
			
 
				-
			
 
				-  ```python
			
 
				-  # config
			
 
				-  
			
 
				-  # 目的文件夹
			
 
				-  image_path = Path('/Users/zeke/work/sx/OCR/image_data/户口本9.30/0/img/')
			
 
				-  # 图片type(如果接口不存在传0不影响结果)
			
 
				-  image_type = 0
			
 
				-  # 是否旋转
			
 
				-  image_rotate = False
			
 
				-  
			
 
				-  # ocr地址选择
			
 
				-  # 本地环境、腾讯云测试环境、腾讯云生产环境、电信云测试环境、电信云生产环境
			
 
				-  ocr_address = 'local'  # 'local' 'TXtest' 'TXsb' 'DXtest' 'DXsb'
			
 
				-  # ocr能力选择
			
 
				-  ocr_name = 'regbook'  # 'cet' 'idcard' 'bankcard' 'regbook' 'schoolcert' 'business_license'
			
 
				-  # 生产MD文件名
			
 
				-  md_name = 'RegBook'
			
 
				-  # ocr能力对应字段（）
			
 
				-  filed = 'regbook'
			
 
				-  
			
 
				-  # 若md_path为None 则默认使用图片父路径为markdown保存路径
			
 
				-  # md_path = '/Users/zeke/work/sx/OCR/HROCR/to_md/example' or image_path.parent
			
 
				-  md_path = None or image_path.parent
			
 
				-  ```
			
 
				-
			
 
				-  
			
--- a/HR_OCR/to_md/example/img/1_img.jpg
+++ b/HR_OCR/to_md/example/img/1_img.jpg
--- a/HR_OCR/to_md/example/img/1_img.json
+++ b/HR_OCR/to_md/example/img/1_img.json
@@ -1,9 +0,0 @@
 
				-{
			
 
				-    "orientation": 0,
			
 
				-    "name": "鉴康",
			
 
				-    "id": "152801200003178527",
			
 
				-    "language": "英语",
			
 
				-    "level": "CET4",
			
 
				-    "exam_time": "2021年6月",
			
 
				-    "score": "451"
			
 
				-}
			
--- a/HR_OCR/to_md/example/img/2_img.jpg
+++ b/HR_OCR/to_md/example/img/2_img.jpg
--- a/HR_OCR/to_md/example/img/2_img.json
+++ b/HR_OCR/to_md/example/img/2_img.json
@@ -1,9 +0,0 @@
 
				-{
			
 
				-    "orientation": 0,
			
 
				-    "name": "张鑫",
			
 
				-    "id": "140227199809282317",
			
 
				-    "language": "英语",
			
 
				-    "level": "CET4",
			
 
				-    "exam_time": "2021年6月",
			
 
				-    "score": "445"
			
 
				-}
			
--- a/HR_OCR/to_md/new.py
+++ b/HR_OCR/to_md/new.py
@@ -1,273 +0,0 @@
 
				-from pathlib import Path
			
 
				-from typing import List, Optional
			
 
				-import cv2
			
 
				-import requests
			
 
				-from mdutils.mdutils import MdUtils
			
 
				-from dataclasses import dataclass
			
 
				-import json
			
 
				-import time
			
 
				-import base64
			
 
				-from itertools import chain
			
 
				-from tqdm import tqdm
			
 
				-from ocr_config import OCR_CONFIGS, Filed
			
 
				-
			
 
				-
			
 
				-class Image:
			
 
				-    def __init__(self, path: Path, rotate, is_rotate):
			
 
				-        self._path = path
			
 
				-        self.rotate = rotate
			
 
				-        self._ocr_result = None
			
 
				-        self.category = True
			
 
				-        self.is_rotate = is_rotate
			
 
				-        try:
			
 
				-            self.gt_result = self.get_json()
			
 
				-        except Exception as e:
			
 
				-            print(self.json_path)
			
 
				-            raise e
			
 
				-
			
 
				-    def __repr__(self):
			
 
				-        return f'path: {self.path}, rotate: {self.rotate}, gt_result: {self.gt_result}, cate: {self.category}'
			
 
				-
			
 
				-    # 将方法转换为相同名称的只读属性
			
 
				-    @property
			
 
				-    def path(self):
			
 
				-        return self._path
			
 
				-
			
 
				-    @path.setter
			
 
				-    def path(self, path):
			
 
				-        self._path = path
			
 
				-
			
 
				-    @property
			
 
				-    def fn(self):
			
 
				-        return self._path.stem
			
 
				-
			
 
				-    @property
			
 
				-    def ocr_result(self):
			
 
				-        return self._ocr_result
			
 
				-
			
 
				-    @ocr_result.setter
			
 
				-    def ocr_result(self, value):
			
 
				-        self._ocr_result = value
			
 
				-
			
 
				-    def get_gt_result(self, key):# sourcery skip: merge-duplicate-blocks, remove-redundant-if
			
 
				-        if key == 'orientation':
			
 
				-            if self.is_rotate:
			
 
				-                return self.rotate + 1 if self.rotate is not None else 0
			
 
				-            else:
			
 
				-                return self.gt_result[key]
			
 
				-        elif key in self.gt_result:
			
 
				-            return self.gt_result[key]
			
 
				-        else:
			
 
				-            return None
			
 
				-
			
 
				-    @property
			
 
				-    def json_path(self):
			
 
				-        return self.path.parent / f'{self.path.stem}.json'
			
 
				-
			
 
				-    def save_image(self, img, rotate):
			
 
				-        dst = self.path.parent.parent / (".ro_dst")
			
 
				-        if not dst.exists(): dst.mkdir()
			
 
				-        self.path = dst / f'{self.path.stem}-{rotate + 1}.jpg'
			
 
				-        # print('save image', self.path)
			
 
				-        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
			
 
				-        cv2.imwrite(str(self.path), img)
			
 
				-        return self.path
			
 
				-
			
 
				-    def get_base64(self, rotate=None):
			
 
				-        # print(self.path)
			
 
				-        img = cv2.imread(str(self.path))
			
 
				-        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
			
 
				-        path = self.path
			
 
				-        if rotate is not None:
			
 
				-            img = cv2.rotate(img, rotate)
			
 
				-            path = self.save_image(img, rotate)
			
 
				-            # imencode 将图片编码到缓存，并保存到本地
			
 
				-        with open(path, 'rb') as f:
			
 
				-            return base64.encodebytes(f.read()).decode('utf-8')
			
 
				-
			
 
				-    def get_json(self):
			
 
				-        with open(self.json_path, 'r') as f:
			
 
				-            return json.load(f)
			
 
				-
			
 
				-
			
 
				-def send_request(image: Image, ocr_name, ocr_address, image_type=None):
			
 
				-    base64_str = image.get_base64(image.rotate)
			
 
				-    config = OCR_CONFIGS[ocr_name][ocr_address]
			
 
				-    headers = {
			
 
				-        'Content-Type': 'application/json',
			
 
				-        'Authorization': f'Bearer {config.token}'
			
 
				-    }
			
 
				-    data = {
			
 
				-        'image': base64_str,
			
 
				-    }
			
 
				-    if image_type is not None:
			
 
				-        data['image_type'] = image_type
			
 
				-    response = requests.post(config.url, headers=headers, json=data)
			
 
				-    return response.json()
			
 
				-
			
 
				-
			
 
				-def parser_path(path: Path, rotate: bool):
			
 
				-    name = time.strftime("%m-%d_", time.localtime()) + path.name
			
 
				-    if rotate:
			
 
				-        name = f'{name}_R.md'
			
 
				-    return path.parent / name
			
 
				-
			
 
				-
			
 
				-class Dataset(object):
			
 
				-    def __init__(self, images_path, image_type, ocr_name, ocr_address, field, rotate=False):
			
 
				-        self.image_type = image_type
			
 
				-        self.ocr_name = ocr_name
			
 
				-        self.ocr_address = ocr_address
			
 
				-        self.images_path = images_path
			
 
				-        self.image_list = []
			
 
				-        # chain 迭代器，首先返回第一个可迭代对象中所有元素，接着返回下一个可迭代对象中所有元素，直到耗尽所有可迭代对象中的元素
			
 
				-        # eg：chain('ABC', 'DEF') --> A B C D E F
			
 
				-
			
 
				-        for p in chain(*[Path(self.images_path).rglob('*.jpg')]):
			
 
				-            if rotate:
			
 
				-                self.image_list.extend(Image(p, r, rotate) for r in [None, 0, 1, 2])
			
 
				-            else:
			
 
				-                self.image_list.append(Image(p, None, rotate))
			
 
				-
			
 
				-        if ocr_name == 'regbook':
			
 
				-            self.field = Filed.get(field + str(image_type))
			
 
				-        else:
			
 
				-            self.field = Filed.get(field)
			
 
				-
			
 
				-        self.correct = {k: 0 for k in self.field}
			
 
				-        self.error = {k: 0 for k in self.field}
			
 
				-
			
 
				-    def __len__(self):
			
 
				-        return len(self.image_list)
			
 
				-
			
 
				-    def _evaluate_one(self, image: Image):
			
 
				-        def _get_predict(r, key):
			
 
				-            # isinstance() 函数来判断一个对象是否是一个已知的类型
			
 
				-            if isinstance(r[key], dict):
			
 
				-                return r[key]['text']
			
 
				-            else:
			
 
				-                return r[key]
			
 
				-
			
 
				-        if image.rotate is not None: image.gt_result['orientation'] = image.rotate + 1
			
 
				-        r = send_request(image, self.ocr_name, self.ocr_address, self.image_type)
			
 
				-        err_str = ''
			
 
				-        if r['status'] == '000':
			
 
				-            res = r['result']
			
 
				-            for key in self.field:
			
 
				-                # print('attr: ', key)
			
 
				-                if key in res:
			
 
				-                    gt = image.get_gt_result(key)
			
 
				-                    predict = _get_predict(res, key)
			
 
				-                    # print(f'gt: {gt}, predict: {predict}')
			
 
				-                    if predict == gt:
			
 
				-                        self.correct[key] += 1
			
 
				-                    else:
			
 
				-                        image.category = False
			
 
				-                        self.error[key] += 1
			
 
				-                        err_str += f'-------{key}-------<br>正确:{gt}<br>返回:{predict}<br>'
			
 
				-            if image.category:
			
 
				-                image.ocr_result = image.gt_result
			
 
				-            else:
			
 
				-                image.ocr_result = err_str
			
 
				-        else:
			
 
				-            image.ocr_result = r['msg']
			
 
				-            image.category = False
			
 
				-            for key in self.field:
			
 
				-                self.error[key] += 1
			
 
				-
			
 
				-    def __call__(self):  # sourcery skip: yield-from
			
 
				-        # yield 返回一个生成器
			
 
				-        for image in self.image_list:
			
 
				-            yield image
			
 
				-
			
 
				-    # 比较
			
 
				-    def evaluate(self):
			
 
				-        for image in tqdm(self.image_list):
			
 
				-            self._evaluate_one(image)
			
 
				-
			
 
				-    # 计算总体准确度
			
 
				-    @property
			
 
				-    def accuracy(self):
			
 
				-        return sum(list(self.correct.values())) / sum(list(self.correct.values()) + list(self.error.values()))
			
 
				-
			
 
				-    # 计算元素准确度
			
 
				-    @property
			
 
				-    def attrs_accuracy(self):
			
 
				-        return {k: self.correct[k] / (self.correct[k] + self.error[k]) for k in self.field}
			
 
				-
			
 
				-
			
 
				-class MD(object):
			
 
				-    def __init__(self, file_path: Path):
			
 
				-        self.name = file_path.name
			
 
				-        self.f = MdUtils(file_name=str(file_path))
			
 
				-        self.field_table: List = ['字段', '正确率']
			
 
				-        self.true_table: List = ['图片', '识别结果']
			
 
				-        self.false_table: List = ['图片', '识别结果']
			
 
				-        self.write_header(f'{self.name}测试报告')
			
 
				-
			
 
				-    def write_header(self, title, level=1):
			
 
				-        self.f.new_header(level=level, title=title)
			
 
				-
			
 
				-    def write_total_accuracy(self, ds: Dataset):
			
 
				-        def get_format_total_accuracy(ds: Dataset):
			
 
				-            acc = ds.accuracy * 100
			
 
				-            return "{:.2f}%".format(acc)
			
 
				-
			
 
				-        # 1. 拿到format之后的百分数
			
 
				-        res = get_format_total_accuracy(ds)
			
 
				-
			
 
				-        # 2. 写入
			
 
				-        self.f.new_paragraph(res)
			
 
				-
			
 
				-    def write_table_accuracy(self, ds: Dataset, columns=2, text_align='center'):
			
 
				-        def format_table_accuracy(ds: Dataset):
			
 
				-            table = ds.attrs_accuracy
			
 
				-            for k, v in table.items():
			
 
				-                acc = v * 100
			
 
				-                table[k] = "{:.2f}%".format(acc)
			
 
				-            return table
			
 
				-
			
 
				-        def dict_2_list(dic: dict):
			
 
				-            l = []
			
 
				-            for k, v in dic.items():
			
 
				-                l.extend((k, v))
			
 
				-            return l
			
 
				-
			
 
				-        table_dict = format_table_accuracy(ds)
			
 
				-        table_list = dict_2_list(table_dict)
			
 
				-        self.field_table.extend(table_list)
			
 
				-
			
 
				-        rows = len(self.field_table) // columns
			
 
				-        self.f.new_table(columns=columns, rows=rows, text=self.field_table, text_align=text_align)
			
 
				-
			
 
				-    def write_table_result(self, ds: Dataset, columns=2, text_align='center'):
			
 
				-        for image in ds.image_list:
			
 
				-            md_image = self.f.new_inline_image(text='', path=f'{image.path.parent.name}/{image.path.name}')
			
 
				-            if image.category:
			
 
				-                self.true_table.extend([md_image, image.ocr_result])
			
 
				-            else:
			
 
				-                self.false_table.extend([md_image, image.ocr_result])
			
 
				-
			
 
				-        true_rows = len(self.true_table) // columns
			
 
				-        false_rows = len(self.false_table) // columns
			
 
				-        self.write_header('True')
			
 
				-        self.f.new_table(columns=columns, rows=true_rows, text=self.true_table, text_align=text_align)
			
 
				-        self.write_header('False')
			
 
				-        self.f.new_table(columns=columns, rows=false_rows, text=self.false_table, text_align='left')
			
 
				-
			
 
				-# if __name__ == '__main__':
			
 
				-#     markdown = MD('英语等级证书')
			
 
				-#
			
 
				-#     dataset = Dataset(Path(''), 'cet', 'local', False)
			
 
				-#     print(len(dataset))
			
 
				-#     for d in dataset():
			
 
				-#         print(d)
			
 
				-#
			
 
				-#     dataset.evaluate()
			
 
				-#     print(dataset.accuracy)
			
 
				-#
			
 
				-#     markdown.write_total_accuracy(dataset)
			
 
				-#     markdown.write_table_accuracy(dataset)
			
 
				-#     markdown.write_table_result(dataset)
			
 
				-#
			
 
				-#     markdown.f.create_md_file()
			
--- a/HR_OCR/to_md/ocr_config.py
+++ b/HR_OCR/to_md/ocr_config.py
@@ -1,114 +0,0 @@
 
				-from dataclasses import dataclass
			
 
				-from typing import List
			
 
				-
			
 
				-
			
 
				-@dataclass
			
 
				-class Type:
			
 
				-    image_type: int
			
 
				-    image_field: List
			
 
				-
			
 
				-
			
 
				-@dataclass
			
 
				-class RequestConfig:
			
 
				-    url: str
			
 
				-    token: str
			
 
				-
			
 
				-
			
 
				-@dataclass
			
 
				-class Configs:
			
 
				-    request: RequestConfig
			
 
				-    type: Type
			
 
				-
			
 
				-
			
 
				-# cet
			
 
				-cet_local_config = RequestConfig(url='http://192.168.199.27:18050/ocr_system/cet', token='')
			
 
				-cet_TXtest_config = RequestConfig(
			
 
				-    url='http://aihubtest.digitalyili.com/aiSquare/openApi/reasoning-services/rlocrxm//cettest/cet',
			
 
				-    token='8ae1e5f1-1337-4f22-8d46-ff4c110d68fd')
			
 
				-cet_TXsb_config = RequestConfig(
			
 
				-    url='http://aihub.digitalyili.com/aiSquare/openApi/reasoning-services/rlocrxm/cet/cet',
			
 
				-    token='dcae8cc6-0e49-4db8-a2d2-94ef84da3636')
			
 
				-cet_DXtest_config = RequestConfig(
			
 
				-    url='http://aihubpre-idc.digitalyili.com/aiSquare/openApi/reasoning-services/hrocr/cettest/cet',
			
 
				-    token='4e00c444-620b-4d3c-85f4-777e64276f0e')
			
 
				-cet_DXsb_config = RequestConfig(
			
 
				-    url='http://aihub-idc.digitalyili.com/aiSquare/openApi/reasoning-services/hrocr/cet/cet',
			
 
				-    token='e045de0a-e97f-4f23-b4d5-6a032c39a81e')
			
 
				-
			
 
				-CET_CONFIGS = {
			
 
				-    'local': cet_local_config,
			
 
				-    'TXtest': cet_TXtest_config,
			
 
				-    'TXsb': cet_TXsb_config,
			
 
				-    'DXtest': cet_DXtest_config,
			
 
				-    'DXsb': cet_DXsb_config
			
 
				-}
			
 
				-
			
 
				-# regbook
			
 
				-regbook_local_config = RequestConfig(url='http://192.168.199.27:18040/ocr_system/regbook', token='')
			
 
				-regbook_TXtest_config = RequestConfig(
			
 
				-    url='http://aihubtest.digitalyili.com/aiSquare/openApi/reasoning-services/rlocrxm/hkbsbtest/regbook',
			
 
				-    token='8ae1e5f1-1337-4f22-8d46-ff4c110d68fd')
			
 
				-regbook_TXsb_config = RequestConfig(
			
 
				-    url='http://aihub.digitalyili.com/aiSquare/openApi/reasoning-services/rlocrxm/hkbsb/regbook',
			
 
				-    token='dcae8cc6-0e49-4db8-a2d2-94ef84da3636')
			
 
				-regbook_DXtest_config = RequestConfig(
			
 
				-    url='http://aihubpre-idc.digitalyili.com/aiSquare/openApi/reasoning-services/hrocr/hkbsbtest/regbook',
			
 
				-    token='4e00c444-620b-4d3c-85f4-777e64276f0e')
			
 
				-regbook_DXsb_config = RequestConfig(
			
 
				-    url='http://aihub-idc.digitalyili.com/aiSquare/openApi/reasoning-services/hrocr/hkbsb/regbook',
			
 
				-    token='e045de0a-e97f-4f23-b4d5-6a032c39a81e')
			
 
				-
			
 
				-REGBOOK_CONFIGS = {
			
 
				-    'local': regbook_local_config,
			
 
				-    'TXtest': regbook_TXtest_config,
			
 
				-    'TXsb': regbook_TXsb_config,
			
 
				-    'DXtest': regbook_DXtest_config,
			
 
				-    'DXsb': regbook_DXsb_config
			
 
				-}
			
 
				-
			
 
				-# business_license
			
 
				-blfe_local_config = RequestConfig(url='http://192.168.199.27:18060/ocr_system/business_license', token='')
			
 
				-blfe_TXtest_config = RequestConfig(
			
 
				-    url='http://aihubtest.digitalyili.com/aiSquare/openApi/reasoning-services/rlocrxm/blfetest/blfe',
			
 
				-    token='8ae1e5f1-1337-4f22-8d46-ff4c110d68fd')
			
 
				-blfe_TXsb_config = RequestConfig(
			
 
				-    url='http://aihub.digitalyili.com/aiSquare/openApi/reasoning-services/rlocrxm/blfe/blfe',
			
 
				-    token='dcae8cc6-0e49-4db8-a2d2-94ef84da3636')
			
 
				-blfe_DXtest_config = RequestConfig(
			
 
				-    url='http://aihubpre-idc.digitalyili.com/aiSquare/openApi/reasoning-services/hrocr/blfetest/blfe',
			
 
				-    token='4e00c444-620b-4d3c-85f4-777e64276f0e')
			
 
				-blfe_DXsb_config = RequestConfig(
			
 
				-    url='http://aihub-idc.digitalyili.com/aiSquare/openApi/reasoning-services/hrocr/blfe/blfe',
			
 
				-    token='e045de0a-e97f-4f23-b4d5-6a032c39a81e')
			
 
				-
			
 
				-BLFE_CONFIGS = {
			
 
				-    'local': blfe_local_config,
			
 
				-    'TXtest': blfe_TXtest_config,
			
 
				-    'TXsb': blfe_TXsb_config,
			
 
				-    'DXtest': blfe_DXtest_config,
			
 
				-    'DXsb': blfe_DXsb_config
			
 
				-}
			
 
				-
			
 
				-OCR_CONFIGS = {
			
 
				-    'cet': CET_CONFIGS,
			
 
				-    'regbook': REGBOOK_CONFIGS,
			
 
				-    'business_license': BLFE_CONFIGS
			
 
				-}
			
 
				-
			
 
				-# 字段
			
 
				-cet_field = ['orientation', 'name', 'id', 'language', 'level', 'exam_time', 'score']
			
 
				-regbook_0_field = ['orientation', 'name', 'id', 'gender', 'birthplace', 'birthplace_province', 'birthplace_city',
			
 
				-                 'birthplace_region', 'native_place', 'native_place_province', 'native_place_city',
			
 
				-                 'native_place_region', 'blood_type', 'religion']
			
 
				-regbook_1_field = ['orientation', 'type', 'address', 'address_province', 'address_city', 'address_region', 'address_detail']
			
 
				-
			
 
				-business_license = ['orientation', 'social_code', 'company_name', 'legal_person', 'registered_capital', 'type',
			
 
				-                    'start_date', 'business_scope', 'expire_date', 'address', 'stamp']
			
 
				-# business_license = ['orientation', 'social_code', 'company_name', 'legal_person', 'registered_capital', 'type',
			
 
				-#                     'start_date', 'business_scope', 'expire_date', 'address']
			
 
				-Filed = {
			
 
				-    'cet': cet_field,
			
 
				-    'regbook0': regbook_0_field,
			
 
				-    'regbook1': regbook_1_field,
			
 
				-    'business_license': business_license
			
 
				-}
			
--- a/HR_OCR/to_md/use.py
+++ b/HR_OCR/to_md/use.py
@@ -1,53 +0,0 @@
 
				-'''
			
 
				-Author: zeke-chin zeke-chin@icloud.com
			
 
				-Date: 2022-09-28 20:28:41
			
 
				-LastEditors: zeke-chin zeke-chin@icloud.com
			
 
				-LastEditTime: 2022-10-12 21:15:26
			
 
				-FilePath: /to_md/HR_OCR/to_md/use.py
			
 
				-Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
			
 
				-'''
			
 
				-
			
 
				-from pathlib import Path
			
 
				-from tkinter import image_types
			
 
				-
			
 
				-from new import MD, Image, Dataset, parser_path
			
 
				-
			
 
				-# config
			
 
				-# 图片路径
			
 
				-
			
 
				-image_path = Path('/Users/sxkj/Desktop/1/img')
			
 
				-image_type = 1
			
 
				-# 是否旋转
			
 
				-image_rotate = False
			
 
				-ocr_address = 'local'  # 'local' 'TXtest' 'TXsb' 'DXtest' 'DXsb'
			
 
				-
			
 
				-ocr_name = 'regbook'  # 'cet' 'idcard' 'bankcard' 'regbook' 'schoolcert' 'business_license'
			
 
				-md_name = 'RE_1.7'
			
 
				-
			
 
				-filed = 'regbook' # 'cet' 'idcard' 'bankcard' 'regbook_0' 'regbook_1' 'schoolcert' 'business_license'
			
 
				-
			
 
				-# 若md_path为None 则默认使用图片父路径为markdown保存路径
			
 
				-# md_path = '/Users/zeke/work/sx/OCR/HROCR/to_md/example' or image_path.parent
			
 
				-md_path = None or image_path.parent
			
 
				-
			
 
				-md_file = parser_path(Path(md_path) / Path(md_name + image_path.stem), image_rotate)
			
 
				-
			
 
				-
			
 
				-if __name__ == '__main__':
			
 
				-
			
 
				-    markdown = MD(md_file)
			
 
				-
			
 
				-    dataset = Dataset(image_path, image_type, ocr_name, ocr_address, filed, image_rotate)
			
 
				-    print(len(dataset))
			
 
				-    for d in dataset():
			
 
				-        print(d)
			
 
				-
			
 
				-    dataset.evaluate()
			
 
				-    print(dataset.accuracy)
			
 
				-
			
 
				-    markdown.write_total_accuracy(dataset)
			
 
				-    markdown.write_table_accuracy(dataset)
			
 
				-    markdown.write_table_result(dataset)
			
 
				-
			
 
				-    print(md_file)
			
 
				-    markdown.f.create_md_file()
			
--- a/HR_OCR/tools/README.md
+++ b/HR_OCR/tools/README.md
@@ -1,34 +0,0 @@
 
				-# 人力OCR
			
 
				-
			
 
				-## convert_json.py
			
 
				-
			
 
				-```python
			
 
				-# 项目url
			
 
				-url = 'http://192.168.199.27:18040'
			
 
				-# 目标文件夹
			
 
				-imgs_path = './HR_OCR/to_md/example/img'
			
 
				-
			
 
				-def send_request(img_path, image_type = 0):
			
 
				-    with open(img_path, 'rb') as f:
			
 
				-        img_str: str = base64.encodebytes(f.read()).decode('utf-8')
			
 
				-        data = {
			
 
				-            'image': img_str,
			
 
				-            'image_type': image_type
			
 
				-        }
			
 
				-        idc_header = {
			
 
				-            'Content-Type': 'application/json',
			
 
				-            'Authorization': 'Bearer 4e00c444-620b-4d3c-85f4-777e64276f0e'
			
 
				-        }
			
 
				-        r = requests.post(f'{url}/cettest/cet', json=data, headers=idc_header)
			
 
				-        # r = requests.post(f'{url}/hkbsbtest/regbook', json=data)
			
 
				-        print(r.json())
			
 
				-        return r.json()
			
 
				-```
			
 
				-
			
 
				-## suffix.py
			
 
				-
			
 
				-```python
			
 
				-# 需要格式化的目的文件夹路径
			
 
				-target_path = './HR_OCR/to_md/example/img'
			
 
				-```
			
 
				-
			
--- a/HR_OCR/tools/convert_json.py
+++ b/HR_OCR/tools/convert_json.py
@@ -1,76 +0,0 @@
 
				-'''
			
 
				-Author: zeke-chin zeke-chin@icloud.com
			
 
				-Date: 2022-10-11 16:38:18
			
 
				-LastEditors: zeke-chin zeke-chin@icloud.com
			
 
				-LastEditTime: 2022-10-12 17:43:29
			
 
				-FilePath: /test_script/HR_OCR/tools/convert_json.py
			
 
				-Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
			
 
				-'''
			
 
				-from pathlib import Path
			
 
				-
			
 
				-import requests
			
 
				-import json
			
 
				-import base64
			
 
				-from itertools import chain
			
 
				-
			
 
				-<<<<<<< Updated upstream
			
 
				-url = 'http://192.168.199.27:18060'
			
 
				-imgs_path = '/Users/zeke/work/sx/OCR/image_data/营业执照90'
			
 
				-=======
			
 
				-url = 'http://192.168.199.27:18040'
			
 
				-imgs_path = '/Users/sxkj/test_script/HR_OCR/户口本测试样本1011-常住人口页'
			
 
				->>>>>>> Stashed changes
			
 
				-
			
 
				-
			
 
				-def send_request(img_path):
			
 
				-    with open(img_path, 'rb') as f:
			
 
				-        img_str: str = base64.encodebytes(f.read()).decode('utf-8')
			
 
				-        data = {
			
 
				-            'image': img_str,
			
 
				-            'image_type': 0
			
 
				-        }
			
 
				-<<<<<<< Updated upstream
			
 
				-        # idc_header = {
			
 
				-        #     'Content-Type': 'application/json',
			
 
				-        #     'Authorization': 'Bearer 4e00c444-620b-4d3c-85f4-777e64276f0e'
			
 
				-        # }
			
 
				-        # r = requests.post(f'{url}/ocr_system/business_license', json=data, headers=idc_header)
			
 
				-        r = requests.post(f'{url}/ocr_system/business_license', json=data)
			
 
				-=======
			
 
				-        idc_header = {
			
 
				-            'Content-Type': 'application/json',
			
 
				-            'Authorization': 'Bearer 4e00c444-620b-4d3c-85f4-777e64276f0e'
			
 
				-        }
			
 
				-        r = requests.post(f'{url}/ocr_system/regbook', json=data, headers=idc_header)
			
 
				-        # r = requests.post(f'{url}/hkbsbtest/regbook', json=data)
			
 
				->>>>>>> Stashed changes
			
 
				-        print(r.json())
			
 
				-        return r.json()
			
 
				-
			
 
				-
			
 
				-def _parse_result(r):
			
 
				-    if r['status'] == '000':
			
 
				-        r = r['result']
			
 
				-        if r:
			
 
				-            del r['confidence']
			
 
				-        return {k: v['text'] if isinstance(v, dict) else v for k, v in r.items()}
			
 
				-    elif r['status'] == '101':
			
 
				-        return "101"
			
 
				-
			
 
				-
			
 
				-if __name__ == '__main__':
			
 
				-
			
 
				-    # 0
			
 
				-    # img_paths = chain(*[Path(root / imgs_path).rglob(f'*.{ext}') for ext in ['jpeg', 'jpg', 'png', 'JPG', 'PNG']])
			
 
				-    img_paths = chain(*[Path(imgs_path).rglob(f'*.{ext}') for ext in ['jpg']])
			
 
				-    for img_path in img_paths:
			
 
				-        print(img_path)
			
 
				-        r = send_request(img_path)
			
 
				-        res = _parse_result(r)
			
 
				-        print(res)
			
 
				-        img_path: Path = img_path
			
 
				-        d = img_path.parent
			
 
				-        fn = f'{img_path.stem}.json'
			
 
				-
			
 
				-        with (d / fn).open('w', encoding='utf-8') as f:
			
 
				-            json.dump(res, f, ensure_ascii=False, indent=4)
			
--- a/HR_OCR/tools/suffix.py
+++ b/HR_OCR/tools/suffix.py
@@ -1,36 +0,0 @@
 
				-'''
			
 
				-Author: zeke-chin zeke-chin@icloud.com
			
 
				-Date: 2022-10-11 16:38:18
			
 
				-LastEditors: zeke-chin zeke-chin@icloud.com
			
 
				-LastEditTime: 2022-10-12 17:53:30
			
 
				-FilePath: /test_script/HR_OCR/tools/suffix.py
			
 
				-Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
			
 
				-'''
			
 
				-import os
			
 
				-from pathlib import Path
			
 
				-from itertools import chain
			
 
				-import sys
			
 
				-
			
 
				-# conf
			
 
				-target_path = '/Users/zeke/work/sx/OCR/image_data/营业执照90'
			
 
				-#suffix = sys.argv[2]
			
 
				-suffix = 'jpg' if len(sys.argv) != 3 else sys.argv[2]
			
 
				-
			
 
				-def get_range(n):
			
 
				-    len_n = len(str(n))
			
 
				-    for num in range(1, n + 1):
			
 
				-        output = str(num)
			
 
				-        while len(output) < len_n:
			
 
				-            output = f'0{output}'
			
 
				-        yield output
			
 
				-
			
 
				-
			
 
				-file_paths = list(chain(*[Path(target_path).rglob(f'*.{ext}') for ext in ['jpeg', 'jpg', 'png', 'JPG', 'PNG']]))
			
 
				-print(len(file_paths))
			
 
				-num = len(file_paths)
			
 
				-file_name_list = list(get_range(num))
			
 
				-for i in range(num):
			
 
				-    file = file_paths[i]
			
 
				-    print(file)
			
 
				-    new = file.parent / f'{file_name_list[i]}_img.{suffix}'
			
 
				-    file.rename(new)