Browse Source

Merge branch 'new' of http://gogsb.soaringnova.com/chenguilong/ocr-table into new

jingze_cheng 9 months ago
parent
commit
8535f56c44
1 changed files with 12 additions and 11 deletions
  1. 12 11
      cores/check_table.py

+ 12 - 11
cores/check_table.py

@@ -1,6 +1,7 @@
 import cv2
 import numpy as np
 
+hard_colors = [[[35, 43, 46], [77, 220, 255]]]
 
 class Table:
     def __init__(self, html, img=[]):
@@ -71,19 +72,20 @@ class Table:
                 if cell == '':
                     self.empty += 1
 
-    def change_green2white(self):
+    def change_hard2white(self):
         """
         将图像中绿色区域修改为白色。
 
         Returns:
             None
         """
-        hsv = cv2.cvtColor(self.img, cv2.COLOR_BGR2HSV)
-        lower_green = np.array([35, 43, 46])
-        upper_green = np.array([77, 220, 255])
-        mask_green = cv2.inRange(hsv, lower_green, upper_green)
         color = [248, 248, 255]
-        self.img[mask_green != 0] = color
+        hsv = cv2.cvtColor(self.img, cv2.COLOR_BGR2HSV)
+        for hard_color in hard_colors:
+            lower_green = np.array(hard_color[0])
+            upper_green = np.array(hard_color[1])
+            mask_green = cv2.inRange(hsv, lower_green, upper_green)
+            self.img[mask_green != 0] = color
 
     def get_str(self):
         """
@@ -107,11 +109,10 @@ class Table:
         """
         self.get_empty()
         html_str = self.get_str()
-
-        if (self.empty > 4 and self.empty > self.total // 4) or (
-                '项目' in html_str and '每份' in html_str and '营养素参考值' in html_str and np.max(
-                [len(a) for a in self.html_arr]) < 3):
+        # 空白值大于四个,或者大于总格子数的四分之一(self.total // 4,除数之后向下取整)
+        # HTML字符串 html_str 中同时包含 '项目'、'每份' 和 '营养素参考值',并且在每一行的格子数中最大值小于3时。
+        if (self.empty > 4 and self.empty > self.total // 4) or ('项目' in html_str and '每份' in html_str and '营养素参考值' in html_str and np.max([len(a) for a in self.html_arr]) < 3):
             print('识别效果不佳,改变图片颜色!')
-            self.change_green2white()
+            self.change_hard2white()
             return 1
         return 0