123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778 |
- import re
- from typing import List
- from .post_decorators import *
- class PostHandler:
- def __init__(self, predict_html):
- self.predict_html = predict_html
- self.format_lines = self._get_format_lines()
- @property
- def format_predict_html(self):
- if self.format_lines:
- header = '<html><body><table><tbody>'
- footer = '</tbody></table></body></html>'
- COLS = 3
- html = []
- for i, line in enumerate(self.format_lines):
- html.append('<tr>')
- for j in range(COLS):
- try:
- if i == 0 and '成分表' in line[j]:
- html.append('<td colspan="3">')
- html.append(line[j])
- html.append('</td>')
- break
- else:
- html.append('<td>')
- html.append(line[j])
- html.append('</td>')
- except IndexError as e:
- print(e)
- html.append('<td>')
- html.append('')
- html.append('</td>')
- continue
- html.append('</tr>')
- res = f'{header}{"".join(html)}{footer}'
- return res
- else:
- return self.predict_html
- @rule6_decorator
- @rule5_decorator
- @rule4_decorator
- @rule3_decorator
- @rule2_decorator
- @rule1_decorator
- def _format_predict_line(self, predict_line):
- return predict_line
- def _get_format_lines(self):
- format_lines = []
- predict_lines = self._get_lines(self.predict_html)
- predict_words_list = [self._split_to_words(line) for line in predict_lines]
- for line in predict_words_list:
- line = self._format_predict_line(line)
- format_lines.append(line)
- return format_lines
- def _get_lines(self, html) -> List[str]:
- '''
- res: ['<td>项目</td><td>每100克</td><td>营养素参考值%</td>',...]
- '''
- rows = re.split('<tr>', html)
- res = []
- for row in rows:
- m = re.findall('<td.*>.*</td>', row)
- if m:
- res.extend(m)
- return res
- def _split_to_words(self, line):
- '''
- line: '<td>项目</td><td>每100克</td><td>营养素参考值%</td>'
- res: ['项目', '每100克', '营养素参考值%', '']
- '''
- res = [re.sub('<td.*>', '', word) for word in re.split('</td>', line)]
- return res
|