from decorator import decorator import re @decorator def rule1_decorator(f, *args, **kwargs): """ 处理表头第二格合并至第三格的情况 predict_line = ['项目 ', '', '每100克营养素参考值%', ''] """ predict_line = args[1] predict_line = f(*args, **kwargs) idx = 0 if '' in predict_line: idx = predict_line.index('') try: if idx == 1: if '项目' in predict_line[0] and '每100克' in predict_line[2]: predict_line[1] = '每100克' r = re.split('每100克', predict_line[2]) if len(r) == 2 and r[1]: predict_line[2] = r[1] except IndexError as e: print('rule1_decorator', e) return predict_line @decorator def rule2_decorator(f, *args, **kwargs): """ 处理碳水化合物这一行,第二格合并至第一格的问题 predict_line = ['碳水化合物18.2克', '', '6%', ''] """ predict_line = args[1] predict_line = f(*args, **kwargs) idx = 0 if '' in predict_line: idx = predict_line.index('') try: if idx == 1: if '化合物' in predict_line[0]: r = re.split('化合物', predict_line[0]) predict_line[0] = '碳水化合物' if len(r) == 2 and r[1]: predict_line[1] = r[1] except IndexError as e: print('rule2_decorator', e) return predict_line @decorator def rule3_decorator(f, *args, **kwargs): """ 处理易错字 ['患直质', '1.6克', '3%', ''] ['脂扇', '1.1', '19%', ''] ['碳水化合物', '勿18.2克', '6%', ''] ['能量.', '408千焦', '5%'] ['——精', '2.9克'] """ predict_line = args[1] predict_line = f(*args, **kwargs) predict_line = [re.sub('患直质', '蛋白质', s) for s in predict_line] predict_line = [re.sub('脂扇', '脂肪', s) for s in predict_line] predict_line = [re.sub('勿(.*克)', '\\1', s) for s in predict_line] predict_line = [re.sub('毫 克', '毫克', s) for s in predict_line] predict_line = [re.sub('——精', '——糖', s) for s in predict_line] return predict_line @decorator def rule4_decorator(f, *args, **kwargs): """ 处理表头第一格合并至第二格的问题 ['', '项目每100克', '营养素参考值'] """ predict_line = args[1] predict_line = f(*args, **kwargs) try: if '项目' in predict_line[1] and predict_line[0] == '': predict_line[0] = '项目' predict_line[1] = predict_line[1].replace('项目', '') except IndexError as e: print('rule4_decorator', e) return predict_line @decorator def rule5_decorator(f, *args, **kwargs): """ 处理表头第三格合并至第二格的问题 predict_line = ['项目 ', '每份(70g)营养素参考值%', ''] """ predict_line = args[1] predict_line = f(*args, **kwargs) try: if '项目' in predict_line[0] and '营养素参考值' in predict_line[1] and len(predict_line[1]) > 7 and \ predict_line[2] == '': predict_line[2] = '营养素参考值' if len(predict_line[1].split('营养素参考值')) > 1: predict_line[2] = predict_line[2] + predict_line[1].split('营养素参考值')[1] predict_line[1] = predict_line[1].split('营养素参考值')[0] except IndexError as e: print('rule5_decorator', e) return predict_line @decorator def rule6_decorator(f, *args, **kwargs): """ 处理表头第二格合并至第三格的问题 predict_line = ['项目 ', '', '每份(70g)营养素参考值%', ''] """ predict_line = args[1] predict_line = f(*args, **kwargs) idx = 0 if '' in predict_line: idx = predict_line.index('') try: if idx == 1: if '项目' in predict_line[0] and '每份' in predict_line[2] and '营养素参考值' in predict_line[2]: predict_line[1] = predict_line[2].split('营养素参考值')[0] r = predict_line[2].split('营养素参考值') if len(r) == 2: predict_line[2] = '营养素参考值' + r[1] except IndexError as e: print('rule6_decorator', e) return predict_line @decorator def rule7_decorator(f, *args, **kwargs): """ 处理项目缺一个字未识别出的问题 predict_line = ['项', '每份(70g)', '营养素参考值%', ''] """ predict_line = f(*args, **kwargs) try: if '项目' in predict_line[0] or '项' in predict_line[0] or '目' in predict_line[0]: predict_line[0] = '项目' except IndexError as e: print('rule7_decorator', e) return predict_line @decorator def rule8_decorator(f, *args, **kwargs): """ 处理表头数据集中在第三格的问题 predict_line = ['', '', '项目每份(70g)营养素参考值%', ''] """ predict_line = f(*args, **kwargs) try: if len(predict_line) >= 3 \ and '' == predict_line[0] \ and '' in predict_line[1] \ and ('项' in predict_line[2] or '目' in predict_line[2]) \ and ('100' in predict_line[2] or '克' in predict_line[2]) \ and '营养' in predict_line[2]: predict_line[0] = '项目' predict_line[1] = '每100克' predict_line[2] = '营养素参考值%' except IndexError as e: print('rule8_decorator', e) return predict_line decorators = [] def register_decorator(dtor): decorators.append(dtor) def combined_decorator(func): for dtor in reversed(decorators): func = dtor(func) return func register_decorator(rule1_decorator) register_decorator(rule2_decorator) register_decorator(rule3_decorator) register_decorator(rule4_decorator) register_decorator(rule5_decorator) register_decorator(rule6_decorator) register_decorator(rule7_decorator) register_decorator(rule8_decorator)