|
@@ -0,0 +1,118 @@
|
|
|
|
+import re
|
|
|
|
+from itertools import chain
|
|
|
|
+from pathlib import Path
|
|
|
|
+
|
|
|
|
+import numpy as np
|
|
|
|
+import pandas as pd
|
|
|
|
+import json
|
|
|
|
+from mdutils.mdutils import MdUtils
|
|
|
|
+import requests
|
|
|
|
+
|
|
|
|
+from config import keyDict
|
|
|
|
+
|
|
|
|
+url = 'http://192.168.199.107:18087'
|
|
|
|
+url_path = '/ocr_system/identify'
|
|
|
|
+imgs_path = '/Users/zeke/work/sx/OCR/image_data/样签图片'
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+# 1. xlsx -> 正确json文件(写入厂家信息)
|
|
|
|
+# 2. 发送图片(带正确json文件)
|
|
|
|
+# 3. 把返回的json 和正确的json 进行对比(有key--用返回结果与正确结果比对,无key--用正确结果与返回结果比对)
|
|
|
|
+
|
|
|
|
+# 编辑距离
|
|
|
|
+def Levenshtein_Distance(str1, str2):
|
|
|
|
+ matrix = [[i + j for j in range(len(str2) + 1)] for i in range(len(str1) + 1)]
|
|
|
|
+ for i in range(1, len(str1) + 1):
|
|
|
|
+ for j in range(1, len(str2) + 1):
|
|
|
|
+ d = 0 if (str1[i - 1] == str2[j - 1]) else 1
|
|
|
|
+ matrix[i][j] = min(matrix[i - 1][j] + 1, matrix[i][j - 1] + 1, matrix[i - 1][j - 1] + d)
|
|
|
|
+ return matrix[len(str1)][len(str2)]
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+# 发送请求 带正确答案参数
|
|
|
|
+def send_request(img_path: Path, img_json: str):
|
|
|
|
+ file = {'file': (img_path.name, open(img_path, 'rb'), img_path)}
|
|
|
|
+ payload = {'docDataStr': img_json}
|
|
|
|
+ r = requests.post(url + url_path, files=file, data=payload)
|
|
|
|
+ return r.json()
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+# 处理返回结果
|
|
|
|
+def _parse_result(r): # sourcery skip: dict-comprehension
|
|
|
|
+ if r['status'] == '000':
|
|
|
|
+ result = r['result']
|
|
|
|
+ res = {}
|
|
|
|
+ for field in keyDict:
|
|
|
|
+ if field in result:
|
|
|
|
+ res[field] = result[field]
|
|
|
|
+ res['noKeyList'] = result['noKeyList']
|
|
|
|
+ res['logoList'] = result['logoList']
|
|
|
|
+ logoFileName = [log['logoFileName'] for log in res['logoList']]
|
|
|
|
+ res['logoList'] = logoFileName
|
|
|
|
+ return res
|
|
|
|
+ elif r['status'] == '101':
|
|
|
|
+ return "101"
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+# 比较两个json文件 并在md文件中写入对比结果
|
|
|
|
+def evaluate_one(xlsx_dict, res_dict):
|
|
|
|
+ true_num = 0
|
|
|
|
+ for key_yes in res_dict:
|
|
|
|
+ if type(res_dict[key_yes]) is str:
|
|
|
|
+ if Levenshtein_Distance(res_dict[key_yes], xlsx_dict[key_yes]) == 0:
|
|
|
|
+ table_result.extend([key_yes, xlsx_dict[key_yes], res_dict[key_yes], '✅'])
|
|
|
|
+ true_num += 1
|
|
|
|
+ else:
|
|
|
|
+ table_result.extend([key_yes, xlsx_dict[key_yes], res_dict[key_yes], '❌'])
|
|
|
|
+
|
|
|
|
+ key_no_dict = {}
|
|
|
|
+ for key_no_xlsx in xlsx_dict['noKeyList']:
|
|
|
|
+ key_no_dict[key_no_xlsx] = []
|
|
|
|
+ for key_no_res in res_dict['noKeyList']:
|
|
|
|
+ key_no_dict[key_no_xlsx].append((Levenshtein_Distance(key_no_xlsx, key_no_res), key_no_res))
|
|
|
|
+ sort_NoKey = sorted(key_no_dict[key_no_xlsx], key=lambda x: x[0])
|
|
|
|
+ NoKey_min_distance = sort_NoKey[0][0]
|
|
|
|
+ if NoKey_min_distance == 0:
|
|
|
|
+ table_result.extend(['无key值', key_no_xlsx, sort_NoKey[0][1], '✅'])
|
|
|
|
+ true_num += 1
|
|
|
|
+ else:
|
|
|
|
+ table_result.extend(['无key值', key_no_xlsx, sort_NoKey[0][1], '❌'])
|
|
|
|
+ rate = true_num / (len(table_result) / 4)
|
|
|
|
+ all_rate.append(rate)
|
|
|
|
+ statistics = f'共{len(table_result) // 4}个字段,正确{true_num}个,错误{len(table_result) // 4 - true_num}个'
|
|
|
|
+
|
|
|
|
+ return "{:.2f}%".format(rate * 100), statistics
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+# 打开正确的json文件
|
|
|
|
+def open_true_json(j_path):
|
|
|
|
+ with j_path.open('r') as f:
|
|
|
|
+ j_dict = json.load(f)
|
|
|
|
+ j_json_str = json.dumps(j_dict, ensure_ascii=False)
|
|
|
|
+ return j_dict, j_json_str
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+if __name__ == '__main__':
|
|
|
|
+ img_paths = chain(*[Path(imgs_path).rglob(f'*.{ext}') for ext in ['jpg', 'png', 'jpeg', 'PNG', 'JPG', 'JPEG']])
|
|
|
|
+ all_rate = []
|
|
|
|
+ for img_path in img_paths:
|
|
|
|
+ print(img_path)
|
|
|
|
+ # json result
|
|
|
|
+ true_d, true_json = open_true_json(img_path.with_suffix('.json'))
|
|
|
|
+ result = send_request(img_path, true_json)
|
|
|
|
+ res_d = _parse_result(result)
|
|
|
|
+ # md
|
|
|
|
+ md_file_path = img_path.parent / (img_path.with_suffix('.md'))
|
|
|
|
+ MD = MdUtils(file_name=str(md_file_path))
|
|
|
|
+ table_result = ['key值', '正确答案', 'ocr返回答案', '是否正确']
|
|
|
|
+ rate, statistics = evaluate_one(true_d, res_d)
|
|
|
|
+ MD.new_header(level=1, title='测试结果')
|
|
|
|
+ MD.new_header(level=2, title=f'正确率:{rate}')
|
|
|
|
+ MD.new_header(level=3, title=statistics)
|
|
|
|
+ print(f'正确率:{rate}')
|
|
|
|
+ MD.new_table(columns=4, rows=len(table_result) // 4, text=table_result, text_align='center')
|
|
|
|
+ MD.create_md_file()
|
|
|
|
+
|
|
|
|
+ print('-------------------------------')
|
|
|
|
+ all_rate = "{:.2f}%".format(np.mean(all_rate) * 100)
|
|
|
|
+ print(f'总体正确率:{all_rate}')
|