post_hander.py 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. import re
  2. from typing import List
  3. from .post_decorators import combined_decorator
  4. class PostHandler:
  5. def __init__(self, predict_html):
  6. self.predict_html = predict_html
  7. self.format_lines = self._get_format_lines()
  8. @property
  9. def format_predict_html(self):
  10. if self.format_lines:
  11. header = '<html><body><table><tbody>'
  12. footer = '</tbody></table></body></html>'
  13. COLS = 3
  14. html = []
  15. for i, line in enumerate(self.format_lines):
  16. print('-=-=-=-=', line)
  17. html.append('<tr>')
  18. for j in range(COLS):
  19. col_html = []
  20. try:
  21. if i == 0 and '成分表' in line[j]:
  22. col_html.append('<td colspan="3">')
  23. col_html.append(line[j])
  24. col_html.append('</td>')
  25. html.extend(col_html)
  26. break
  27. else:
  28. col_html.append('<td>')
  29. col_html.append(line[j])
  30. col_html.append('</td>')
  31. except IndexError as e:
  32. print(e)
  33. col_html = ['<td>', '', '</td>']
  34. html.extend(col_html)
  35. html.append('</tr>')
  36. res = f'{header}{"".join(html)}{footer}'
  37. return res
  38. else:
  39. return self.predict_html
  40. @combined_decorator
  41. def _format_predict_line(self, predict_line):
  42. return predict_line
  43. def _get_format_lines(self):
  44. format_lines = []
  45. predict_lines = self._get_lines(self.predict_html)
  46. predict_words_list = [self._split_to_words(line) for line in predict_lines]
  47. for line in predict_words_list:
  48. line = self._format_predict_line(line)
  49. format_lines.append(line)
  50. return format_lines
  51. def _get_lines(self, html) -> List[str]:
  52. '''
  53. res: ['<td>项目</td><td>每100克</td><td>营养素参考值%</td>',...]
  54. '''
  55. rows = re.split('<tr>', html)
  56. res = []
  57. for row in rows:
  58. m = re.findall('<td.*>.*</td>', row)
  59. if m:
  60. res.extend(m)
  61. return res
  62. def _split_to_words(self, line):
  63. '''
  64. line: '<td>项目</td><td>每100克</td><td>营养素参考值%</td>'
  65. res: ['项目', '每100克', '营养素参考值%', '']
  66. '''
  67. res = [re.sub('<td.*>', '', word) for word in re.split('</td>', line)]
  68. return res