Sfoglia il codice sorgente

temp 带空格比对生产厂商

yan chuanli 2 anni fa
parent
commit
dca4c1face
4 ha cambiato i file con 24 aggiunte e 10 eliminazioni
  1. 2 1
      .gitignore
  2. 0 0
      HR_OCR/__init__.py
  3. 21 8
      YQ_OCR/to_md/convert_MD.py
  4. 1 1
      YQ_OCR/to_md/xlsx_convert_json.py

+ 2 - 1
.gitignore

@@ -1,4 +1,5 @@
 .DS_Store
 .idea
 __pycache__/
-
+*.json
+*.md

+ 0 - 0
HR_OCR/__init__.py


+ 21 - 8
YQ_OCR/to_md/convert_MD.py

@@ -13,7 +13,7 @@ from YQ_OCR.config import keyDict
 
 url = 'http://192.168.199.107:18087'
 url_path = '/ocr_system/identify'
-imgs_path = '/Users/zeke/work/sx/OCR/HROCR/to_md/YQ_OCR/img'
+imgs_path = '/Users/sxkj/to_md/YQ_OCR/img'
 
 
 # 1. xlsx -> 正确json文件(写入厂家信息)
@@ -56,21 +56,34 @@ def _parse_result(r):  # sourcery skip: dict-comprehension
 
 
 # 比较两个json文件 并在md文件中写入对比结果
+# def evaluate_one(xlsx_dict, res_dict):
+#     true_num = 0
+#     xlsx_dict_no_space: dict = copy.deepcopy(xlsx_dict)
+#     for index, text in xlsx_dict_no_space.items():
+#         if type(xlsx_dict_no_space[index]) is str:
+#             xlsx_dict_no_space[index] = text.replace(' ', '')
+#         elif type(xlsx_dict_no_space[index]) is list:
+#             for k, v in enumerate(xlsx_dict_no_space[index]):
+#                 xlsx_dict_no_space[index][k] = v.replace(' ', '')
+#     for key_yes in res_dict:
+#         if type(res_dict[key_yes]) is str:
+#             if Levenshtein_Distance(res_dict[key_yes], xlsx_dict_no_space[key_yes]) == 0:
+#                 table_result.extend([key_yes, xlsx_dict[key_yes], res_dict[key_yes], '✅'])
+#                 true_num += 1
+#             else:
+#                 table_result.extend([key_yes, xlsx_dict[key_yes], res_dict[key_yes], '❌'])
+
 def evaluate_one(xlsx_dict, res_dict):
     true_num = 0
-    xlsx_dict_no_space = copy.deepcopy(xlsx_dict)
-    for index, text in enumerate(xlsx_dict_no_space):
-        xlsx_dict_no_space[index] = text.replace(' ', '')
     for key_yes in res_dict:
         if type(res_dict[key_yes]) is str:
-            if Levenshtein_Distance(res_dict[key_yes], xlsx_dict_no_space[key_yes]) == 0:
+            if Levenshtein_Distance(res_dict[key_yes], xlsx_dict[key_yes]) == 0:
                 table_result.extend([key_yes, xlsx_dict[key_yes], res_dict[key_yes], '✅'])
                 true_num += 1
             else:
                 table_result.extend([key_yes, xlsx_dict[key_yes], res_dict[key_yes], '❌'])
-
     key_no_dict = {}
-    for key_no_xlsx in xlsx_dict_no_space['noKeyList']:
+    for key_no_xlsx in xlsx_dict['noKeyList']:
         key_no_dict[key_no_xlsx] = []
         for key_no_res in res_dict['noKeyList']:
             key_no_dict[key_no_xlsx].append((Levenshtein_Distance(key_no_xlsx, key_no_res), key_no_res))
@@ -108,7 +121,7 @@ if __name__ == '__main__':
         # md
         md_file_path = img_path.parent / (img_path.with_suffix('.md'))
         MD = MdUtils(file_name=str(md_file_path))
-        table_result = ['key值', '正确答案', 'ocr返回答案', '是否正确']
+        table_result = ['key值', '正确答案', 'ocr返回结果', '是否正确']
         rate, statistics = evaluate_one(true_d, res_d)
         MD.new_header(level=1, title='测试结果')
         MD.new_header(level=2, title=f'正确率:{rate}')

+ 1 - 1
YQ_OCR/to_md/xlsx_convert_json.py

@@ -9,7 +9,7 @@ from YQ_OCR.config import keyDict
 # 把xlsx转成json
 
 
-excels_path = '/Users/zeke/work/sx/OCR/HROCR/to_md/YQ_OCR/img'
+excels_path = '/Users/sxkj/to_md/YQ_OCR/img'
 
 
 # 返回文档里所以所需识别字符串