import re import string from dataclasses import dataclass from collections import defaultdict import numpy as np import cpca from typing import List from core.line_parser import OcrResult @dataclass class RecItem: text: str = '' confidence: float = 0. def to_dict(self): return {"text": self.text, "confidence": np.nan_to_num(self.confidence)} def find_card_row(line_results): new_lines = [] for row in line_results: new_line = [] for r in row: if r.conf > 0.93: new_line.append(r) if new_line: new_lines.append(new_line) print('~~~~new line results~~~~~') for row in new_lines: print('++++') print(row) print('~~~~new line results~~~~~') line_results = new_lines for row in line_results: txt = [r.txt.replace(' ', '') for r in row] conf = np.mean([r.conf for r in row]) txt = ''.join(txt) res = re.findall('\d{15,20}', txt) print(f'res: {res}, conf: {conf}') if res and conf > 0.95: return row, res[0], conf raise Exception('无法识别') def handle_wrong_digits(s): s = s.replace(' ', '') s = s.replace('-', '') s = s.replace('/', '') s = s.replace('L', '1') s = s.replace('l', '1') if s.startswith('1'): s = s[1:] if s.startswith('1'): s = '6' + s[1:] return s class Parser(object): def __init__(self, line_results: List[List[OcrResult]]): self.line_results = line_results self.res = defaultdict(RecItem) self.res["number"] = RecItem() def bank_no(self): # 号码 row, txt, conf = find_card_row(self.line_results) print(f'=== txt: {txt}, res: {row}======') txt = handle_wrong_digits(txt) self.res['number'] = RecItem(txt, conf) def parse(self): self.bank_no() return self.res