chencheng 1 рік тому
батько
коміт
e31b4c90ec
2 змінених файлів з 86 додано та 3 видалено
  1. 76 0
      cores/check_table.py
  2. 10 3
      server.py

+ 76 - 0
cores/check_table.py

@@ -0,0 +1,76 @@
+import cv2
+import numpy as np
+
+
+class Table:
+    def __init__(self, html, img=[]):
+        self.img = img
+        self.html = html
+        self.html_arr = []
+        self.total = 0
+        self.empty = 0
+
+    def get_body(self):
+        try:
+            res = self.html.split('<tbody>')[1]
+        except Exception as r:
+            print('<tbody> 识别失败')
+            print(r)
+        try:
+            res = res.split('</tbody>')[0]
+        except Exception as r:
+            print('</tbody> 识别失败')
+            print(r)
+        return res
+
+    def get_tr(self):
+        str = self.get_body()
+        if len(str.split('<tr>')) > 1:
+            return str.split('<tr>')
+        else:
+            return []
+
+    def get_td(self):
+        if self.html_arr != []:
+            return
+        tr_list = self.get_tr()
+        # print(tr_list)
+        # print('\n')
+        for i in range(len(tr_list)):
+            if tr_list[i] == '':
+                continue
+            # print(tr_list[i])
+            tr = tr_list[i].split('</td>')[:-1]
+            temp_list = []
+            for cell in tr:
+                if '<td colspan=\\"3\\">' in cell:
+                    temp_list.append(cell.split('<td colspan=\\"3\\">')[1])
+                if '<td>' in cell:
+                    temp_list.append(cell.split('<td>')[1])
+            self.html_arr.append(temp_list)
+
+    def get_empty(self):
+        self.get_td()
+        if self.total != 0:
+            return
+        for tr in self.html_arr:
+            for cell in tr:
+                self.total += 1
+                if cell == '':
+                    self.empty += 1
+
+    def change_green2white(self):
+        hsv = cv2.cvtColor(self.img, cv2.COLOR_BGR2HSV)
+        lower_green = np.array([35, 43, 46])
+        upper_green = np.array([77, 220, 255])
+        mask_green = cv2.inRange(hsv, lower_green, upper_green)
+        color = [248, 248, 255]
+        self.img[mask_green != 0] = color
+        # cv2.imwrite('table.jpg', self.img)
+
+    def check_html(self):
+        self.get_empty()
+        if self.empty > 4 and self.empty > self.total // 4:
+            self.change_green2white()
+            return 1 
+        return 0

+ 10 - 3
server.py

@@ -18,6 +18,7 @@ from sx_utils.sx_log import *
 import paddleclas
 
 from cores.post_hander import *
+from cores.check_table import *
 
 
 format_print()
@@ -147,13 +148,12 @@ def get_zero_degree_image(img):
 
 def table_res(im, ROTATE=-1):
     im = im.copy()
-    cv2.imwrite('before-rotate.jpg', im)
+    # cv2.imwrite('before-rotate.jpg', im)
     # 获取正向图片
     img = get_zero_degree_image(im)
-    cv2.imwrite('after-rotate.jpg', img)
+    # cv2.imwrite('after-rotate.jpg', img)
     try:
         table_engine_lock.acquire()
-        # cv2.imwrite('3.jpg', img)
         res = table_engine(img)
     finally:
         table_engine_lock.release()
@@ -175,6 +175,13 @@ def ping():
 def table(image: TableInfo):
     img = base64_to_np(image.image)
     res, html = table_res(img)
+    # print(html)
+    table=Table(html,img)
+    if table.check_html():
+        res, html = table_res(table.img)
+        cv2.imwrite('table.jpg', table.img)
+    # print(table.total)
+    # print(table.empty)
 
     if html:
         post_hander = PostHandler(html)