import re from typing import List from .post_decorators import combined_decorator class PostHandler: def __init__(self, predict_html): self.predict_html = predict_html self.format_lines = self._get_format_lines() # 将二维列表处理为想要的富文本格式 @property def format_predict_html(self): if self.format_lines: header = '' footer = '

' COLS = 3 html = [] for i, line in enumerate(self.format_lines): print('-=-=-=-=', line) html.append('') for j in range(COLS): col_html = [] try: if i == 0 and '成分表' in line[j]: col_html.append('') col_html.append(line[j]) col_html.append('') html.extend(col_html) break else: col_html.append('') col_html.append(line[j]) col_html.append('') except IndexError as e: print(e) col_html = ['', '', ''] html.extend(col_html) html.append('') res = f'{header}{"".join(html)}{footer}' return res else: return self.predict_html # 对每一行进行处理 @combined_decorator def _format_predict_line(self, predict_line): return predict_line # 对每一行进行处理 def _get_format_lines(self): format_lines = [] predict_lines = self._get_lines(self.predict_html) predict_words_list = [self._split_to_words(line) for line in predict_lines] for line in predict_words_list: line = self._format_predict_line(line) format_lines.append(line) return format_lines # 获取每一行 def _get_lines(self, html) -> List[str]: ''' res: ['项目每100克营养素参考值%',...] ''' rows = re.split('', html) res = [] for row in rows: m = re.findall('.*', row) if m: res.extend(m) return res # 切分每一个格子 def _split_to_words(self, line): ''' line: '项目每100克营养素参考值%' res: ['项目', '每100克', '营养素参考值%', ''] ''' res = [re.sub('', '', word) for word in re.split('', line)] return res