import re from typing import List from .post_decorators import combined_decorator class PostHandler: def __init__(self, predict_html): self.predict_html = predict_html self.format_lines = self._get_format_lines() @property def format_predict_html(self): if self.format_lines: header = '' footer = '
' COLS = 3 html = [] for i, line in enumerate(self.format_lines): html.append('') for j in range(COLS): try: if i == 0 and '成分表' in line[j]: html.append('') html.append(line[j]) html.append('') break else: html.append('') html.append(line[j]) html.append('') except IndexError as e: print(e) html.append('') html.append('') html.append('') continue html.append('') res = f'{header}{"".join(html)}{footer}' return res else: return self.predict_html @combined_decorator def _format_predict_line(self, predict_line): return predict_line def _get_format_lines(self): format_lines = [] predict_lines = self._get_lines(self.predict_html) predict_words_list = [self._split_to_words(line) for line in predict_lines] for line in predict_words_list: line = self._format_predict_line(line) format_lines.append(line) return format_lines def _get_lines(self, html) -> List[str]: ''' res: ['项目每100克营养素参考值%',...] ''' rows = re.split('', html) res = [] for row in rows: m = re.findall('.*', row) if m: res.extend(m) return res def _split_to_words(self, line): ''' line: '项目每100克营养素参考值%' res: ['项目', '每100克', '营养素参考值%', ''] ''' res = [re.sub('', '', word) for word in re.split('', line)] return res