xujiayue hace 2 años
padre
commit
880b20a38f

+ 1 - 1
HR_OCR/TestAllOcr/config.py

@@ -3,7 +3,7 @@ Author: zeke-chin zeke-chin@icloud.com
 Date: 2022-09-26 14:58:10
 LastEditors: zeke-chin zeke-chin@icloud.com
 LastEditTime: 2022-09-30 09:59:43
-FilePath: /utils/HR_OCR/TestAllOcr/config.py
+FilePath: /to_md/HR_OCR/TestAllOcr/config.py
 Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
 '''
 import base64

+ 1 - 1
HR_OCR/TestAllOcr/test_interface.py

@@ -3,7 +3,7 @@ Author: zeke-chin zeke-chin@icloud.com
 Date: 2022-09-28 20:28:41
 LastEditors: zeke-chin zeke-chin@icloud.com
 LastEditTime: 2022-09-30 10:06:35
-FilePath: /utils/HR_OCR/TestAllOcr/test_interface.py
+FilePath: /to_md/HR_OCR/TestAllOcr/test_interface.py
 Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
 '''
 from pathlib import Path

+ 1 - 1
HR_OCR/to_md/README.md

@@ -39,7 +39,7 @@
   filed = 'regbook'
   
   # 若md_path为None 则默认使用图片父路径为markdown保存路径
-  # md_path = '/Users/zeke/work/sx/OCR/HROCR/utils/example' or image_path.parent
+  # md_path = '/Users/zeke/work/sx/OCR/HROCR/to_md/example' or image_path.parent
   md_path = None or image_path.parent
   ```
 

+ 2 - 2
HR_OCR/to_md/use.py

@@ -3,7 +3,7 @@ Author: zeke-chin zeke-chin@icloud.com
 Date: 2022-09-28 20:28:41
 LastEditors: zeke-chin zeke-chin@icloud.com
 LastEditTime: 2022-09-30 15:08:48
-FilePath: /utils/HR_OCR/utils/use.py
+FilePath: /to_md/HR_OCR/to_md/use.py
 Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
 '''
 
@@ -26,7 +26,7 @@ md_name = 'CET'
 filed = 'cet'
 
 # 若md_path为None 则默认使用图片父路径为markdown保存路径
-# md_path = '/Users/zeke/work/sx/OCR/HROCR/utils/example' or image_path.parent
+# md_path = '/Users/zeke/work/sx/OCR/HROCR/to_md/example' or image_path.parent
 md_path = None or image_path.parent
 
 md_file = parser_path(Path(md_path) / Path(md_name + image_path.stem), image_rotate)

+ 2 - 2
HR_OCR/tools/README.md

@@ -6,7 +6,7 @@
 # 项目url
 url = 'http://192.168.199.27:18040'
 # 目标文件夹
-imgs_path = './HR_OCR/utils/example/img'
+imgs_path = './HR_OCR/to_md/example/img'
 
 def send_request(img_path, image_type = 0):
     with open(img_path, 'rb') as f:
@@ -29,6 +29,6 @@ def send_request(img_path, image_type = 0):
 
 ```python
 # 需要格式化的目的文件夹路径
-target_path = './HR_OCR/utils/example/img'
+target_path = './HR_OCR/to_md/example/img'
 ```
 

+ 1 - 1
HR_OCR/tools/convert_json.py

@@ -6,7 +6,7 @@ import base64
 from itertools import chain
 
 url = 'http://192.168.199.27:18060'
-imgs_path = '/Users/sxkj/utils/img'
+imgs_path = '/Users/sxkj/to_md/img'
 
 def send_request(img_path):
     with open(img_path, 'rb') as f:

+ 2 - 5
HR_OCR/tools/suffix.py

@@ -4,11 +4,8 @@ from itertools import chain
 import sys
 
 # conf
-<<<<<<< HEAD
-target_path = '/Users/sxkj/utils/9.29/1'
-=======
-target_path = './HR_OCR/utils/example/img'
->>>>>>> a0b1c051142a2687d8ae2c63f543f021d3239f1f
+
+target_path = './HR_OCR/to_md/example/img'
 #suffix = sys.argv[2]
 suffix = 'jpg' if len(sys.argv) != 3 else sys.argv[2]
 

+ 6 - 6
YQ_OCR/output/餐饮纯牛奶 内包-表格识别结果.md

@@ -39,22 +39,22 @@
 | :---: | :---: | :---: | :---: |
 |1行|项目|项目|✅|
 |1行|每100mL|每100mL|✅|
-|1行|NRV%|NRV% ||
+|1行|NRV%|NRV% ||
 |2行|能量|能量|✅|
 |2行|280kJ|280kJ|✅|
-|2行|3%|3% ||
+|2行|3%|3% ||
 |3行|蛋白质|蛋白质|✅|
 |3行|3.2g|3.2g|✅|
-|3行|5%|5% ||
+|3行|5%|5% ||
 |4行|脂肪|脂肪|✅|
 |4行|3.8g|3.8g|✅|
-|4行|6%|6% ||
+|4行|6%|6% ||
 |5行|碳水化合物|碳水化合物|✅|
 |5行|5.0g|5.0g|✅|
-|5行|2%|2% ||
+|5行|2%|2% ||
 |6行|钠|钠|✅|
 |6行|53mg|53mg|✅|
-|6行|3%|3% ||
+|6行|3%|3% ||
 |7行|钙|钙|✅|
 |7行|100mg|100mg|✅|
 |7行|13%|13%|✅|

+ 4 - 2
YQ_OCR/utils/text2md.py

@@ -79,13 +79,15 @@ class TableMD(object):
                         [f'{x + 1}行',
                          gt_parse_list[j],
                          pre_parse_list[j],
-                         '✅' if gt_parse_list[j] == pre_parse_list[j] else '❌'])
+                         '✅' if gt_parse_list[j] == pre_parse_list[j] or gt_parse_list[j].replace(' ', '') ==
+                                pre_parse_list[j].replace(' ', '') else '❌'])
                 elif key == 'old':
                     self.old_table_text.extend(
                         [f'{x + 1}行',
                          gt_parse_list[j],
                          pre_parse_list[j],
-                         '✅' if gt_parse_list[j] == pre_parse_list[j] else '❌'])
+                         '✅' if gt_parse_list[j] == pre_parse_list[j] or gt_parse_list[j].replace(' ', '') ==
+                                pre_parse_list[j].replace(' ', '') else '❌'])
 
         acc = correct / count * 100
         self.acc = acc