2 years ago · dca4c1face
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 
				 .DS_Store
			
 
				 .idea
			
 
				 __pycache__/
			
 
				-
			
 
				+*.json
			
 
				+*.md
			
--- a/HR_OCR/__init__.py
+++ b/HR_OCR/__init__.py
--- a/YQ_OCR/to_md/convert_MD.py
+++ b/YQ_OCR/to_md/convert_MD.py
@@ -13,7 +13,7 @@ from YQ_OCR.config import keyDict
 
				 
			
 
				 url = 'http://192.168.199.107:18087'
			
 
				 url_path = '/ocr_system/identify'
			
 
				-imgs_path = '/Users/zeke/work/sx/OCR/HROCR/to_md/YQ_OCR/img'
			
 
				+imgs_path = '/Users/sxkj/to_md/YQ_OCR/img'
			
 
				 
			
 
				 
			
 
				 # 1. xlsx -> 正确json文件(写入厂家信息)
			
@@ -56,21 +56,34 @@ def _parse_result(r):  # sourcery skip: dict-comprehension
 
				 
			
 
				 
			
 
				 # 比较两个json文件 并在md文件中写入对比结果
			
 
				+# def evaluate_one(xlsx_dict, res_dict):
			
 
				+#     true_num = 0
			
 
				+#     xlsx_dict_no_space: dict = copy.deepcopy(xlsx_dict)
			
 
				+#     for index, text in xlsx_dict_no_space.items():
			
 
				+#         if type(xlsx_dict_no_space[index]) is str:
			
 
				+#             xlsx_dict_no_space[index] = text.replace(' ', '')
			
 
				+#         elif type(xlsx_dict_no_space[index]) is list:
			
 
				+#             for k, v in enumerate(xlsx_dict_no_space[index]):
			
 
				+#                 xlsx_dict_no_space[index][k] = v.replace(' ', '')
			
 
				+#     for key_yes in res_dict:
			
 
				+#         if type(res_dict[key_yes]) is str:
			
 
				+#             if Levenshtein_Distance(res_dict[key_yes], xlsx_dict_no_space[key_yes]) == 0:
			
 
				+#                 table_result.extend([key_yes, xlsx_dict[key_yes], res_dict[key_yes], '✅'])
			
 
				+#                 true_num += 1
			
 
				+#             else:
			
 
				+#                 table_result.extend([key_yes, xlsx_dict[key_yes], res_dict[key_yes], '❌'])
			
 
				+
			
 
				 def evaluate_one(xlsx_dict, res_dict):
			
 
				     true_num = 0
			
 
				-    xlsx_dict_no_space = copy.deepcopy(xlsx_dict)
			
 
				-    for index, text in enumerate(xlsx_dict_no_space):
			
 
				-        xlsx_dict_no_space[index] = text.replace(' ', '')
			
 
				     for key_yes in res_dict:
			
 
				         if type(res_dict[key_yes]) is str:
			
 
				-            if Levenshtein_Distance(res_dict[key_yes], xlsx_dict_no_space[key_yes]) == 0:
			
 
				+            if Levenshtein_Distance(res_dict[key_yes], xlsx_dict[key_yes]) == 0:
			
 
				                 table_result.extend([key_yes, xlsx_dict[key_yes], res_dict[key_yes], '✅'])
			
 
				                 true_num += 1
			
 
				             else:
			
 
				                 table_result.extend([key_yes, xlsx_dict[key_yes], res_dict[key_yes], '❌'])
			
 
				-
			
 
				     key_no_dict = {}
			
 
				-    for key_no_xlsx in xlsx_dict_no_space['noKeyList']:
			
 
				+    for key_no_xlsx in xlsx_dict['noKeyList']:
			
 
				         key_no_dict[key_no_xlsx] = []
			
 
				         for key_no_res in res_dict['noKeyList']:
			
 
				             key_no_dict[key_no_xlsx].append((Levenshtein_Distance(key_no_xlsx, key_no_res), key_no_res))
			
@@ -108,7 +121,7 @@ if __name__ == '__main__':
 
				         # md
			
 
				         md_file_path = img_path.parent / (img_path.with_suffix('.md'))
			
 
				         MD = MdUtils(file_name=str(md_file_path))
			
 
				-        table_result = ['key值', '正确答案', 'ocr返回答案', '是否正确']
			
 
				+        table_result = ['key值', '正确答案', 'ocr返回结果', '是否正确']
			
 
				         rate, statistics = evaluate_one(true_d, res_d)
			
 
				         MD.new_header(level=1, title='测试结果')
			
 
				         MD.new_header(level=2, title=f'正确率：{rate}')
			
--- a/YQ_OCR/to_md/xlsx_convert_json.py
+++ b/YQ_OCR/to_md/xlsx_convert_json.py
@@ -9,7 +9,7 @@ from YQ_OCR.config import keyDict
 
				 # 把xlsx转成json
			
 
				 
			
 
				 
			
 
				-excels_path = '/Users/zeke/work/sx/OCR/HROCR/to_md/YQ_OCR/img'
			
 
				+excels_path = '/Users/sxkj/to_md/YQ_OCR/img'
			
 
				 
			
 
				 
			
 
				 # 返回文档里所以所需识别字符串