post_hander.py 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. import re
  2. from typing import List
  3. from .post_decorators import *
  4. class PostHandler:
  5. def __init__(self, predict_html):
  6. self.predict_html = predict_html
  7. self.format_lines = self._get_format_lines()
  8. @property
  9. def format_predict_html(self):
  10. if self.format_lines:
  11. header = '<html><body><table><tbody>'
  12. footer = '</tbody></table></body></html>'
  13. COLS = 3
  14. html = []
  15. for i, line in enumerate(self.format_lines):
  16. html.append('<tr>')
  17. for j in range(COLS):
  18. try:
  19. if i == 0 and '成分表' in line[j]:
  20. html.append('<td colspan="3">')
  21. html.append(line[j])
  22. html.append('</td>')
  23. break
  24. else:
  25. html.append('<td>')
  26. html.append(line[j])
  27. html.append('</td>')
  28. except IndexError as e:
  29. print(e)
  30. html.append('<td>')
  31. html.append('')
  32. html.append('</td>')
  33. continue
  34. html.append('</tr>')
  35. res = f'{header}{"".join(html)}{footer}'
  36. return res
  37. else:
  38. return self.predict_html
  39. @rule6_decorator
  40. @rule5_decorator
  41. @rule4_decorator
  42. @rule3_decorator
  43. @rule2_decorator
  44. @rule1_decorator
  45. def _format_predict_line(self, predict_line):
  46. return predict_line
  47. def _get_format_lines(self):
  48. format_lines = []
  49. predict_lines = self._get_lines(self.predict_html)
  50. predict_words_list = [self._split_to_words(line) for line in predict_lines]
  51. for line in predict_words_list:
  52. line = self._format_predict_line(line)
  53. format_lines.append(line)
  54. return format_lines
  55. def _get_lines(self, html) -> List[str]:
  56. '''
  57. res: ['<td>项目</td><td>每100克</td><td>营养素参考值%</td>',...]
  58. '''
  59. rows = re.split('<tr>', html)
  60. res = []
  61. for row in rows:
  62. m = re.findall('<td.*>.*</td>', row)
  63. if m:
  64. res.extend(m)
  65. return res
  66. def _split_to_words(self, line):
  67. '''
  68. line: '<td>项目</td><td>每100克</td><td>营养素参考值%</td>'
  69. res: ['项目', '每100克', '营养素参考值%', '']
  70. '''
  71. res = [re.sub('<td.*>', '', word) for word in re.split('</td>', line)]
  72. return res