import re
from typing import List
from .post_decorators import combined_decorator
class PostHandler:
def __init__(self, predict_html):
self.predict_html = predict_html
self.format_lines = self._get_format_lines()
# 将二维列表处理为想要的富文本格式
@property
def format_predict_html(self):
if self.format_lines:
header = '
'
COLS = 3
html = []
for i, line in enumerate(self.format_lines):
print('-=-=-=-=', line)
html.append('')
for j in range(COLS):
col_html = []
try:
if i == 0 and '成分表' in line[j]:
col_html.append('')
col_html.append(line[j])
col_html.append(' | ')
html.extend(col_html)
break
else:
col_html.append('')
col_html.append(line[j])
col_html.append(' | ')
except IndexError as e:
print(e)
col_html = ['', '', ' | ']
html.extend(col_html)
html.append('
')
res = f'{header}{"".join(html)}{footer}'
return res
else:
return self.predict_html
# 对每一行进行处理
@combined_decorator
def _format_predict_line(self, predict_line):
return predict_line
# 对每一行进行处理
def _get_format_lines(self):
format_lines = []
predict_lines = self._get_lines(self.predict_html)
predict_words_list = [self._split_to_words(line) for line in predict_lines]
for line in predict_words_list:
line = self._format_predict_line(line)
format_lines.append(line)
return format_lines
# 获取每一行
def _get_lines(self, html) -> List[str]:
'''
res: ['项目 | 每100克 | 营养素参考值% | ',...]
'''
rows = re.split('', html)
res = []
for row in rows:
m = re.findall('.*', row)
if m:
res.extend(m)
return res
# 切分每一个格子
def _split_to_words(self, line):
'''
line: '项目 | 每100克 | 营养素参考值% | '
res: ['项目', '每100克', '营养素参考值%', '']
'''
res = [re.sub('', '', word) for word in re.split('', line)]
return res