|
@@ -23,11 +23,12 @@ class Table:
|
|
|
# print(r)
|
|
|
# return res
|
|
|
|
|
|
+
|
|
|
def get_tr(self):
|
|
|
# str = self.get_body()
|
|
|
str = self.html
|
|
|
if len(str.split('<tr>')) > 1:
|
|
|
- return str.split('<tr>')
|
|
|
+ return str.split('<tr>')[1:]
|
|
|
else:
|
|
|
return []
|
|
|
|
|
@@ -43,8 +44,11 @@ class Table:
|
|
|
for cell in tr:
|
|
|
if '<td colspan=\\"3\\">' in cell:
|
|
|
temp_list.append(cell.split('<td colspan=\\"3\\">')[1])
|
|
|
+ if '<td colspan="3">' in cell:
|
|
|
+ temp_list.append(cell.split('<td colspan="3">')[1])
|
|
|
if '<td>' in cell:
|
|
|
temp_list.append(cell.split('<td>')[1])
|
|
|
+ print(temp_list)
|
|
|
self.html_arr.append(temp_list)
|
|
|
|
|
|
def get_empty(self):
|
|
@@ -75,10 +79,12 @@ class Table:
|
|
|
def check_html(self):
|
|
|
self.get_empty()
|
|
|
html_str = self.get_str()
|
|
|
- print(self.html_arr)
|
|
|
+
|
|
|
print(self.html)
|
|
|
+ print(self.html_arr)
|
|
|
print(self.empty)
|
|
|
if (self.empty > 4 and self.empty > self.total // 4) or ('项目' in html_str and '每份' in html_str and '营养素参考值' in html_str and np.max([len(a) for a in self.html_arr])<3):
|
|
|
+ print('识别效果不佳,改变图片颜色!')
|
|
|
self.change_green2white()
|
|
|
return 1
|
|
|
return 0
|