Przeglądaj źródła

modify check_table

chencheng 1 rok temu
rodzic
commit
e4ef67d486
1 zmienionych plików z 6 dodań i 1 usunięć
  1. 6 1
      cores/check_table.py

+ 6 - 1
cores/check_table.py

@@ -26,8 +26,9 @@ class Table:
 
     def get_tr(self):
         # str = self.get_body()
+        str = self.html
         if len(str.split('<tr>')) > 1:
-            return str.split('<tr>')[1:-1]
+            return str.split('<tr>')[1:]
         else:
             return []
 
@@ -43,8 +44,11 @@ class Table:
             for cell in tr:
                 if '<td colspan=\\"3\\">' in cell:
                     temp_list.append(cell.split('<td colspan=\\"3\\">')[1])
+                if '<td colspan="3">' in cell:
+                    temp_list.append(cell.split('<td colspan="3">')[1])
                 if '<td>' in cell:
                     temp_list.append(cell.split('<td>')[1])
+            print(temp_list)
             self.html_arr.append(temp_list)
 
     def get_empty(self):
@@ -75,6 +79,7 @@ class Table:
     def check_html(self):
         self.get_empty()
         html_str = self.get_str()
+        print(self.html)
         print(self.html_arr)
         print(self.empty)
         if (self.empty > 4 and self.empty > self.total // 4) or ('项目' in html_str and '每份' in html_str and '营养素参考值' in html_str and np.max([len(a) for a in self.html_arr])<3):