import cv2 import numpy as np class Table: def __init__(self, html, img=[]): self.img = img self.html = html self.html_arr = [] self.total = 0 self.empty = 0 # def get_body(self): # try: # res = self.html.split('')[1] # except Exception as r: # print(' 识别失败') # print(r) # try: # res = res.split('')[0] # except Exception as r: # print(' 识别失败') # print(r) # return res def get_tr(self): # str = self.get_body() str = self.html if len(str.split('')) > 1: return str.split('')[1:] else: return [] def get_td(self): if self.html_arr != []: return tr_list = self.get_tr() for i in range(len(tr_list)): if tr_list[i] == '': continue tr = tr_list[i].split('')[:-1] temp_list = [] for cell in tr: if '' in cell: temp_list.append(cell.split('')[1]) if '' in cell: temp_list.append(cell.split('')[1]) if '' in cell: temp_list.append(cell.split('')[1]) print(temp_list) self.html_arr.append(temp_list) def get_empty(self): self.get_td() if self.total != 0: return for tr in self.html_arr: for cell in tr: self.total += 1 if cell == '': self.empty += 1 def change_green2white(self): hsv = cv2.cvtColor(self.img, cv2.COLOR_BGR2HSV) lower_green = np.array([35, 43, 46]) upper_green = np.array([77, 220, 255]) mask_green = cv2.inRange(hsv, lower_green, upper_green) color = [248, 248, 255] self.img[mask_green != 0] = color def get_str(self): str = '' for tr in self.html_arr: for cell in tr: str+=cell return str def check_html(self): self.get_empty() html_str = self.get_str() print(self.html) print(self.html_arr) print(self.empty) if (self.empty > 4 and self.empty > self.total // 4) or ('项目' in html_str and '每份' in html_str and '营养素参考值' in html_str and np.max([len(a) for a in self.html_arr])<3): print('识别效果不佳,改变图片颜色!') self.change_green2white() return 1 return 0