|
@@ -10,21 +10,22 @@ class Table:
|
|
|
self.total = 0
|
|
|
self.empty = 0
|
|
|
|
|
|
- def get_body(self):
|
|
|
- try:
|
|
|
- res = self.html.split('<tbody>')[1]
|
|
|
- except Exception as r:
|
|
|
- print('<tbody> 识别失败')
|
|
|
- print(r)
|
|
|
- try:
|
|
|
- res = res.split('</tbody>')[0]
|
|
|
- except Exception as r:
|
|
|
- print('</tbody> 识别失败')
|
|
|
- print(r)
|
|
|
- return res
|
|
|
+ # def get_body(self):
|
|
|
+ # try:
|
|
|
+ # res = self.html.split('<tbody>')[1]
|
|
|
+ # except Exception as r:
|
|
|
+ # print('<tbody> 识别失败')
|
|
|
+ # print(r)
|
|
|
+ # try:
|
|
|
+ # res = res.split('</tbody>')[0]
|
|
|
+ # except Exception as r:
|
|
|
+ # print('</tbody> 识别失败')
|
|
|
+ # print(r)
|
|
|
+ # return res
|
|
|
|
|
|
def get_tr(self):
|
|
|
- str = self.get_body()
|
|
|
+ # str = self.get_body()
|
|
|
+ str = self.html
|
|
|
if len(str.split('<tr>')) > 1:
|
|
|
return str.split('<tr>')
|
|
|
else:
|
|
@@ -74,6 +75,9 @@ class Table:
|
|
|
def check_html(self):
|
|
|
self.get_empty()
|
|
|
html_str = self.get_str()
|
|
|
+ print(self.html_arr)
|
|
|
+ print(self.html)
|
|
|
+ print(self.empty)
|
|
|
if (self.empty > 4 and self.empty > self.total // 4) or ('项目' in html_str and '每份' in html_str and '营养素参考值' in html_str and np.max([len(a) for a in self.html_arr])<3):
|
|
|
self.change_green2white()
|
|
|
return 1
|