Browse Source

update 生产厂家比对

zeke-chin 2 years ago
parent
commit
08440daec3
1 changed files with 43 additions and 20 deletions
  1. 43 20
      YQ_OCR/to_md/convert_MD.py

+ 43 - 20
YQ_OCR/to_md/convert_MD.py

@@ -56,34 +56,26 @@ def _parse_result(r):  # sourcery skip: dict-comprehension
 
 
 # 比较两个json文件 并在md文件中写入对比结果
-# def evaluate_one(xlsx_dict, res_dict):
-#     true_num = 0
-#     xlsx_dict_no_space: dict = copy.deepcopy(xlsx_dict)
-#     for index, text in xlsx_dict_no_space.items():
-#         if type(xlsx_dict_no_space[index]) is str:
-#             xlsx_dict_no_space[index] = text.replace(' ', '')
-#         elif type(xlsx_dict_no_space[index]) is list:
-#             for k, v in enumerate(xlsx_dict_no_space[index]):
-#                 xlsx_dict_no_space[index][k] = v.replace(' ', '')
-#     for key_yes in res_dict:
-#         if type(res_dict[key_yes]) is str:
-#             if Levenshtein_Distance(res_dict[key_yes], xlsx_dict_no_space[key_yes]) == 0:
-#                 table_result.extend([key_yes, xlsx_dict[key_yes], res_dict[key_yes], '✅'])
-#                 true_num += 1
-#             else:
-#                 table_result.extend([key_yes, xlsx_dict[key_yes], res_dict[key_yes], '❌'])
-
 def evaluate_one(xlsx_dict, res_dict):
     true_num = 0
+    xlsx_dict_no_space: dict = copy.deepcopy(xlsx_dict)
+    for index, text in xlsx_dict_no_space.items():
+        if type(xlsx_dict_no_space[index]) is str:
+            xlsx_dict_no_space[index] = text.replace(' ', '')
+        elif type(xlsx_dict_no_space[index]) is list:
+            for k, v in enumerate(xlsx_dict_no_space[index]):
+                xlsx_dict_no_space[index][k] = v.replace(' ', '')
+    # 有key值的比较
     for key_yes in res_dict:
         if type(res_dict[key_yes]) is str:
-            if Levenshtein_Distance(res_dict[key_yes], xlsx_dict[key_yes]) == 0:
+            if Levenshtein_Distance(res_dict[key_yes], xlsx_dict_no_space[key_yes]) == 0:
                 table_result.extend([key_yes, xlsx_dict[key_yes], res_dict[key_yes], '✅'])
                 true_num += 1
             else:
                 table_result.extend([key_yes, xlsx_dict[key_yes], res_dict[key_yes], '❌'])
+    # 无key值的比较
     key_no_dict = {}
-    for key_no_xlsx in xlsx_dict['noKeyList']:
+    for key_no_xlsx in xlsx_dict_no_space['noKeyList']:
         key_no_dict[key_no_xlsx] = []
         for key_no_res in res_dict['noKeyList']:
             key_no_dict[key_no_xlsx].append((Levenshtein_Distance(key_no_xlsx, key_no_res), key_no_res))
@@ -94,13 +86,44 @@ def evaluate_one(xlsx_dict, res_dict):
             true_num += 1
         else:
             table_result.extend(['无key值', key_no_xlsx, sort_NoKey[0][1], '❌'])
+
+    # 算正确率
     rate = true_num / (len(table_result) / 4)
     all_rate.append(rate)
     statistics = f'共{len(table_result) // 4}个字段,正确{true_num}个,错误{len(table_result) // 4 - true_num}个'
-
     return "{:.2f}%".format(rate * 100), statistics
 
 
+# def evaluate_one(xlsx_dict, res_dict):
+#     true_num = 0
+#     # 有key值的比较
+#     for key_yes in res_dict:
+#         if type(res_dict[key_yes]) is str:
+#             if Levenshtein_Distance(res_dict[key_yes], xlsx_dict[key_yes]) == 0:
+#                 table_result.extend([key_yes, xlsx_dict[key_yes], res_dict[key_yes], '✅'])
+#                 true_num += 1
+#             else:
+#                 table_result.extend([key_yes, xlsx_dict[key_yes], res_dict[key_yes], '❌'])
+#     # 无key值的比较
+#     key_no_dict = {}
+#     for key_no_xlsx in xlsx_dict['noKeyList']:
+#         key_no_dict[key_no_xlsx] = []
+#         for key_no_res in res_dict['noKeyList']:
+#             key_no_dict[key_no_xlsx].append((Levenshtein_Distance(key_no_xlsx, key_no_res), key_no_res))
+#         sort_NoKey = sorted(key_no_dict[key_no_xlsx], key=lambda x: x[0])
+#         NoKey_min_distance = sort_NoKey[0][0]
+#         if NoKey_min_distance == 0:
+#             table_result.extend(['无key值', key_no_xlsx, sort_NoKey[0][1], '✅'])
+#             true_num += 1
+#         else:
+#             table_result.extend(['无key值', key_no_xlsx, sort_NoKey[0][1], '❌'])
+#     # 算正确率
+#     rate = true_num / (len(table_result) / 4)
+#     all_rate.append(rate)
+#     statistics = f'共{len(table_result) // 4}个字段,正确{true_num}个,错误{len(table_result) // 4 - true_num}个'
+#     return "{:.2f}%".format(rate * 100), statistics
+
+
 # 打开正确的json文件
 def open_true_json(j_path):
     with j_path.open('r') as f: