|
@@ -0,0 +1,76 @@
|
|
|
+import cv2
|
|
|
+import numpy as np
|
|
|
+
|
|
|
+
|
|
|
+class Table:
|
|
|
+ def __init__(self, html, img=[]):
|
|
|
+ self.img = img
|
|
|
+ self.html = html
|
|
|
+ self.html_arr = []
|
|
|
+ self.total = 0
|
|
|
+ self.empty = 0
|
|
|
+
|
|
|
+ def get_body(self):
|
|
|
+ try:
|
|
|
+ res = self.html.split('<tbody>')[1]
|
|
|
+ except Exception as r:
|
|
|
+ print('<tbody> 识别失败')
|
|
|
+ print(r)
|
|
|
+ try:
|
|
|
+ res = res.split('</tbody>')[0]
|
|
|
+ except Exception as r:
|
|
|
+ print('</tbody> 识别失败')
|
|
|
+ print(r)
|
|
|
+ return res
|
|
|
+
|
|
|
+ def get_tr(self):
|
|
|
+ str = self.get_body()
|
|
|
+ if len(str.split('<tr>')) > 1:
|
|
|
+ return str.split('<tr>')
|
|
|
+ else:
|
|
|
+ return []
|
|
|
+
|
|
|
+ def get_td(self):
|
|
|
+ if self.html_arr != []:
|
|
|
+ return
|
|
|
+ tr_list = self.get_tr()
|
|
|
+ # print(tr_list)
|
|
|
+ # print('\n')
|
|
|
+ for i in range(len(tr_list)):
|
|
|
+ if tr_list[i] == '':
|
|
|
+ continue
|
|
|
+ # print(tr_list[i])
|
|
|
+ tr = tr_list[i].split('</td>')[:-1]
|
|
|
+ temp_list = []
|
|
|
+ for cell in tr:
|
|
|
+ if '<td colspan=\\"3\\">' in cell:
|
|
|
+ temp_list.append(cell.split('<td colspan=\\"3\\">')[1])
|
|
|
+ if '<td>' in cell:
|
|
|
+ temp_list.append(cell.split('<td>')[1])
|
|
|
+ self.html_arr.append(temp_list)
|
|
|
+
|
|
|
+ def get_empty(self):
|
|
|
+ self.get_td()
|
|
|
+ if self.total != 0:
|
|
|
+ return
|
|
|
+ for tr in self.html_arr:
|
|
|
+ for cell in tr:
|
|
|
+ self.total += 1
|
|
|
+ if cell == '':
|
|
|
+ self.empty += 1
|
|
|
+
|
|
|
+ def change_green2white(self):
|
|
|
+ hsv = cv2.cvtColor(self.img, cv2.COLOR_BGR2HSV)
|
|
|
+ lower_green = np.array([35, 43, 46])
|
|
|
+ upper_green = np.array([77, 220, 255])
|
|
|
+ mask_green = cv2.inRange(hsv, lower_green, upper_green)
|
|
|
+ color = [248, 248, 255]
|
|
|
+ self.img[mask_green != 0] = color
|
|
|
+ # cv2.imwrite('table.jpg', self.img)
|
|
|
+
|
|
|
+ def check_html(self):
|
|
|
+ self.get_empty()
|
|
|
+ if self.empty > 4 and self.empty > self.total // 4:
|
|
|
+ self.change_green2white()
|
|
|
+ return 1
|
|
|
+ return 0
|