import cv2
import numpy as np
class Table:
def __init__(self, html, img=[]):
self.img = img
self.html = html
self.html_arr = []
self.total = 0
self.empty = 0
# def get_body(self):
# try:
# res = self.html.split('
')[1]
# except Exception as r:
# print(' 识别失败')
# print(r)
# try:
# res = res.split('')[0]
# except Exception as r:
# print(' 识别失败')
# print(r)
# return res
def get_tr(self):
# str = self.get_body()
str = self.html
if len(str.split('')) > 1:
return str.split('
')[1:]
else:
return []
def get_td(self):
if self.html_arr != []:
return
tr_list = self.get_tr()
for i in range(len(tr_list)):
if tr_list[i] == '':
continue
tr = tr_list[i].split('')[:-1]
temp_list = []
for cell in tr:
if '' in cell:
temp_list.append(cell.split(' | ')[1])
if ' | ' in cell:
temp_list.append(cell.split(' | ')[1])
if ' | ' in cell:
temp_list.append(cell.split(' | ')[1])
print(temp_list)
self.html_arr.append(temp_list)
def get_empty(self):
self.get_td()
if self.total != 0:
return
for tr in self.html_arr:
for cell in tr:
self.total += 1
if cell == '':
self.empty += 1
def change_green2white(self):
hsv = cv2.cvtColor(self.img, cv2.COLOR_BGR2HSV)
lower_green = np.array([35, 43, 46])
upper_green = np.array([77, 220, 255])
mask_green = cv2.inRange(hsv, lower_green, upper_green)
color = [248, 248, 255]
self.img[mask_green != 0] = color
def get_str(self):
str = ''
for tr in self.html_arr:
for cell in tr:
str+=cell
return str
def check_html(self):
self.get_empty()
html_str = self.get_str()
print(self.html)
print(self.html_arr)
print(self.empty)
if (self.empty > 4 and self.empty > self.total // 4) or ('项目' in html_str and '每份' in html_str and '营养素参考值' in html_str and np.max([len(a) for a in self.html_arr])<3):
print('识别效果不佳,改变图片颜色!')
self.change_green2white()
return 1
return 0 |