|
@@ -4,9 +4,10 @@ import re
|
|
|
|
|
|
@decorator
|
|
|
def rule1_decorator(f, *args, **kwargs):
|
|
|
- '''
|
|
|
- predict_line = ['项目 ', '', '每100克营养素参考值%', '']
|
|
|
- '''
|
|
|
+ """
|
|
|
+ 处理表头第二格合并至第三格的情况
|
|
|
+ predict_line = ['项目 ', '', '每100克营养素参考值%', '']
|
|
|
+ """
|
|
|
predict_line = args[1]
|
|
|
predict_line = f(*args, **kwargs)
|
|
|
idx = 0
|
|
@@ -26,9 +27,10 @@ def rule1_decorator(f, *args, **kwargs):
|
|
|
|
|
|
@decorator
|
|
|
def rule2_decorator(f, *args, **kwargs):
|
|
|
- '''
|
|
|
- predict_line = ['碳水化合物18.2克', '', '6%', '']
|
|
|
- '''
|
|
|
+ """
|
|
|
+ 处理碳水化合物这一行,第二格合并至第一格的问题
|
|
|
+ predict_line = ['碳水化合物18.2克', '', '6%', '']
|
|
|
+ """
|
|
|
predict_line = args[1]
|
|
|
predict_line = f(*args, **kwargs)
|
|
|
idx = 0
|
|
@@ -48,13 +50,14 @@ def rule2_decorator(f, *args, **kwargs):
|
|
|
|
|
|
@decorator
|
|
|
def rule3_decorator(f, *args, **kwargs):
|
|
|
- '''
|
|
|
- ['患直质', '1.6克', '3%', '']
|
|
|
- ['脂扇', '1.1', '19%', '']
|
|
|
- ['碳水化合物', '勿18.2克', '6%', '']
|
|
|
- ['能量.', '408千焦', '5%']
|
|
|
- ['——精', '2.9克']
|
|
|
- '''
|
|
|
+ """
|
|
|
+ 处理易错字
|
|
|
+ ['患直质', '1.6克', '3%', '']
|
|
|
+ ['脂扇', '1.1', '19%', '']
|
|
|
+ ['碳水化合物', '勿18.2克', '6%', '']
|
|
|
+ ['能量.', '408千焦', '5%']
|
|
|
+ ['——精', '2.9克']
|
|
|
+ """
|
|
|
predict_line = args[1]
|
|
|
predict_line = f(*args, **kwargs)
|
|
|
predict_line = [re.sub('患直质', '蛋白质', s) for s in predict_line]
|
|
@@ -67,9 +70,10 @@ def rule3_decorator(f, *args, **kwargs):
|
|
|
|
|
|
@decorator
|
|
|
def rule4_decorator(f, *args, **kwargs):
|
|
|
- '''
|
|
|
- ['', '项目每一百克', '营养素参考值']
|
|
|
- '''
|
|
|
+ """
|
|
|
+ 处理表头第一格合并至第二格的问题
|
|
|
+ ['', '项目每100克', '营养素参考值']
|
|
|
+ """
|
|
|
predict_line = args[1]
|
|
|
predict_line = f(*args, **kwargs)
|
|
|
try:
|
|
@@ -83,9 +87,10 @@ def rule4_decorator(f, *args, **kwargs):
|
|
|
|
|
|
@decorator
|
|
|
def rule5_decorator(f, *args, **kwargs):
|
|
|
- '''
|
|
|
+ """
|
|
|
+ 处理表头第三格合并至第二格的问题
|
|
|
predict_line = ['项目 ', '每份(70g)营养素参考值%', '']
|
|
|
- '''
|
|
|
+ """
|
|
|
predict_line = args[1]
|
|
|
predict_line = f(*args, **kwargs)
|
|
|
try:
|
|
@@ -102,9 +107,10 @@ def rule5_decorator(f, *args, **kwargs):
|
|
|
|
|
|
@decorator
|
|
|
def rule6_decorator(f, *args, **kwargs):
|
|
|
- '''
|
|
|
- predict_line = ['项目 ', '', '每份(70g)营养素参考值%', '']
|
|
|
- '''
|
|
|
+ """
|
|
|
+ 处理表头第二格合并至第三格的问题
|
|
|
+ predict_line = ['项目 ', '', '每份(70g)营养素参考值%', '']
|
|
|
+ """
|
|
|
predict_line = args[1]
|
|
|
predict_line = f(*args, **kwargs)
|
|
|
idx = 0
|
|
@@ -123,22 +129,24 @@ def rule6_decorator(f, *args, **kwargs):
|
|
|
|
|
|
@decorator
|
|
|
def rule7_decorator(f, *args, **kwargs):
|
|
|
- '''
|
|
|
- predict_line = ['项目 ', '', '每份(70g)营养素参考值%', '']
|
|
|
- '''
|
|
|
+ """
|
|
|
+ 处理项目缺一个字未识别出的问题
|
|
|
+ predict_line = ['项', '每份(70g)', '营养素参考值%', '']
|
|
|
+ """
|
|
|
predict_line = f(*args, **kwargs)
|
|
|
try:
|
|
|
if '项目' in predict_line[0] or '项' in predict_line[0] or '目' in predict_line[0]:
|
|
|
predict_line[0] = '项目'
|
|
|
except IndexError as e:
|
|
|
- print('rule6_decorator', e)
|
|
|
+ print('rule7_decorator', e)
|
|
|
return predict_line
|
|
|
|
|
|
@decorator
|
|
|
def rule8_decorator(f, *args, **kwargs):
|
|
|
- '''
|
|
|
- predict_line = ['项目 ', '', '每份(70g)营养素参考值%', '']
|
|
|
- '''
|
|
|
+ """
|
|
|
+ 处理表头数据集中在第三格的问题
|
|
|
+ predict_line = ['', '', '项目每份(70g)营养素参考值%', '']
|
|
|
+ """
|
|
|
predict_line = f(*args, **kwargs)
|
|
|
try:
|
|
|
if len(predict_line) >= 3 \
|
|
@@ -151,7 +159,7 @@ def rule8_decorator(f, *args, **kwargs):
|
|
|
predict_line[1] = '每100克'
|
|
|
predict_line[2] = '营养素参考值%'
|
|
|
except IndexError as e:
|
|
|
- print('rule6_decorator', e)
|
|
|
+ print('rule8_decorator', e)
|
|
|
return predict_line
|
|
|
|
|
|
|