import cv2 import numpy as np class Table: def __init__(self, html, img=[]): self.img = img self.html = html self.html_arr = [] self.total = 0 self.empty = 0 def get_body(self): try: res = self.html.split('')[1] except Exception as r: print(' 识别失败') print(r) try: res = res.split('')[0] except Exception as r: print(' 识别失败') print(r) return res def get_tr(self): str = self.get_body() if len(str.split('')) > 1: return str.split('') else: return [] def get_td(self): if self.html_arr != []: return tr_list = self.get_tr() # print(tr_list) # print('\n') for i in range(len(tr_list)): if tr_list[i] == '': continue # print(tr_list[i]) tr = tr_list[i].split('')[:-1] temp_list = [] for cell in tr: if '' in cell: temp_list.append(cell.split('')[1]) if '' in cell: temp_list.append(cell.split('')[1]) self.html_arr.append(temp_list) def get_empty(self): self.get_td() if self.total != 0: return for tr in self.html_arr: for cell in tr: self.total += 1 if cell == '': self.empty += 1 def change_green2white(self): hsv = cv2.cvtColor(self.img, cv2.COLOR_BGR2HSV) lower_green = np.array([35, 43, 46]) upper_green = np.array([77, 220, 255]) mask_green = cv2.inRange(hsv, lower_green, upper_green) color = [248, 248, 255] self.img[mask_green != 0] = color cv2.imwrite('table.jpg', self.img) def get_str(self): str = '' for tr in self.html_arr: for cell in tr: str+=cell return str def check_html(self): self.get_empty() html_str = self.get_str() if (self.empty > 4 and self.empty > self.total // 4) or ('项目' in str and '每份' in str and '营养素参考值' in str and np.max([len(a) for a in self.html_arr])<3): self.change_green2white() return 1 return 0