from decorator import decorator import re @decorator def rule1_decorator(f, *args, **kwargs): ''' predict_line = ['项目 ', '', '每100克营养素参考值%', ''] ''' predict_line = args[1] predict_line = f(*args, **kwargs) idx = 0 if '' in predict_line: idx = predict_line.index('') try: if idx == 1: if '项目' in predict_line[0] and '每100克' in predict_line[2]: predict_line[1] = '每100克' r = re.split('每100克', predict_line[2]) if len(r) == 2 and r[1]: predict_line[2] = r[1] except IndexError as e: print('rule1_decorator', e) return predict_line @decorator def rule2_decorator(f, *args, **kwargs): ''' predict_line = ['碳水化合物18.2克', '', '6%', ''] ''' predict_line = args[1] predict_line = f(*args, **kwargs) idx = predict_line.index('') try: if idx == 1: if '化合物' in predict_line[0]: r = re.split('化合物', predict_line[0]) predict_line[0] = '碳水化合物' if len(r) == 2 and r[1]: predict_line[1] = r[1] except IndexError as e: print('rule2_decorator', e) return predict_line @decorator def rule3_decorator(f, *args, **kwargs): ''' ['患直质', '1.6克', '3%', ''] ['脂扇', '1.1', '19%', ''] ['碳水化合物', '勿18.2克', '6%', ''] ''' predict_line = args[1] predict_line = f(*args, **kwargs) predict_line = [re.sub('患直质', '蛋白质', s) for s in predict_line] predict_line = [re.sub('脂扇', '脂肪', s) for s in predict_line] predict_line = [re.sub('勿(.*克)', '\\1', s) for s in predict_line] predict_line = [re.sub('毫 克', '毫克', s) for s in predict_line] return predict_line @decorator def rule4_decorator(f, *args, **kwargs): ''' ['', '项目每一百克', '营养素参考值'] ''' predict_line = args[1] predict_line = f(*args, **kwargs) try: if '项目' in predict_line[1] and predict_line[0]=='': predict_line[0] = '项目' predict_line[1] = predict_line[1].replace('项目', '') except IndexError as e: print('rule4_decorator', e) return predict_line @decorator def rule5_decorator(f, *args, **kwargs): predict_line = args[1] predict_line = f(*args, **kwargs) try: if '项目' in predict_line[0] and '营养素参考值' in predict_line[1] and len(predict_line[1])>7 and predict_line[2] == '': predict_line[2] = '营养素参考值' if len(predict_line[1].split('营养素参考值'))>1: predict_line[2] = predict_line[2]+predict_line[1].split('营养素参考值')[1] predict_line[1] = predict_line[1].split('营养素参考值')[0] except IndexError as e: print('rule5_decorator', e) return predict_line @decorator def rule6_decorator(f, *args, **kwargs): ''' predict_line = ['项目 ', '', '每份(70g)营养素参考值%', ''] ''' predict_line = args[1] predict_line = f(*args, **kwargs) idx = 0 if '' in predict_line: idx = predict_line.index('') try: if idx == 1: if '项目' in predict_line[0] and '每份' in predict_line[2] and '营养素参考值' in predict_line[2]: predict_line[1] = predict_line[2].split('营养素参考值')[0] r = predict_line[2].split('营养素参考值') if len(r) == 2: predict_line[2] = '营养素参考值'+r[1] except IndexError as e: print('rule6_decorator', e) return predict_line