1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374 |
- import re
- from typing import List
- from .post_decorators import combined_decorator
- class PostHandler:
- def __init__(self, predict_html):
- self.predict_html = predict_html
- self.format_lines = self._get_format_lines()
- @property
- def format_predict_html(self):
- if self.format_lines:
- header = '<html><body><table><tbody>'
- footer = '</tbody></table></body></html>'
- COLS = 3
- html = []
- for i, line in enumerate(self.format_lines):
- print('-=-=-=-=', line)
- html.append('<tr>')
- for j in range(COLS):
- col_html = []
- try:
- if i == 0 and '成分表' in line[j]:
- col_html.append('<td colspan="3">')
- col_html.append(line[j])
- col_html.append('</td>')
- html.extend(col_html)
- break
- else:
- col_html.append('<td>')
- col_html.append(line[j])
- col_html.append('</td>')
- except IndexError as e:
- print(e)
- col_html = ['<td>', '', '</td>']
- html.extend(col_html)
- html.append('</tr>')
- res = f'{header}{"".join(html)}{footer}'
- return res
- else:
- return self.predict_html
- @combined_decorator
- def _format_predict_line(self, predict_line):
- return predict_line
- def _get_format_lines(self):
- format_lines = []
- predict_lines = self._get_lines(self.predict_html)
- predict_words_list = [self._split_to_words(line) for line in predict_lines]
- for line in predict_words_list:
- line = self._format_predict_line(line)
- format_lines.append(line)
- return format_lines
- def _get_lines(self, html) -> List[str]:
- '''
- res: ['<td>项目</td><td>每100克</td><td>营养素参考值%</td>',...]
- '''
- rows = re.split('<tr>', html)
- res = []
- for row in rows:
- m = re.findall('<td.*>.*</td>', row)
- if m:
- res.extend(m)
- return res
- def _split_to_words(self, line):
- '''
- line: '<td>项目</td><td>每100克</td><td>营养素参考值%</td>'
- res: ['项目', '每100克', '营养素参考值%', '']
- '''
- res = [re.sub('<td.*>', '', word) for word in re.split('</td>', line)]
- return res
|