1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283 |
- import re
- import string
- from dataclasses import dataclass
- from collections import defaultdict
- import numpy as np
- import cpca
- from typing import List
- from core.line_parser import OcrResult
- @dataclass
- class RecItem:
- text: str = ''
- confidence: float = 0.
- def to_dict(self):
- return {"text": self.text, "confidence": np.nan_to_num(self.confidence)}
- def find_card_row(line_results):
- print('~~~~line results~~~~~')
- for row in line_results:
- print('++++')
- print(row)
- print('~~~~line results~~~~~')
- new_lines = []
- for row in line_results:
- new_line = []
- for r in row:
- if r.conf > 0.93:
- new_line.append(r)
- if new_line:
- new_lines.append(new_line)
- print('~~~~new line results~~~~~')
- for row in new_lines:
- print('++++')
- print(row)
- print('~~~~new line results~~~~~')
- line_results = new_lines
- for row in line_results:
- txt = [r.txt.replace(' ', '') for r in row]
- conf = np.mean([r.conf for r in row])
- txt = ''.join(txt)
- res = re.findall('\d{15,20}', txt)
- print(f'res: {res}, conf: {conf}')
- if res and conf > 0.95:
- return row, res[0], conf
- raise Exception('无法识别')
- def handle_wrong_digits(s):
- s = s.replace(' ', '')
- s = s.replace('-', '')
- s = s.replace('/', '')
- s = s.replace('L', '1')
- s = s.replace('l', '1')
- if s.startswith('1'):
- s = s[1:]
- if s.startswith('1'):
- s = '6' + s[1:]
- return s
- class Parser(object):
- def __init__(self, line_results: List[List[OcrResult]]):
- self.line_results = line_results
- self.res = defaultdict(RecItem)
- self.res["number"] = RecItem()
- def bank_no(self):
- # 号码
- row, txt, conf = find_card_row(self.line_results)
- print(f'=== txt: {txt}, res: {row}======')
- txt = handle_wrong_digits(txt)
- self.res['number'] = RecItem(txt, conf)
- def parse(self):
- self.bank_no()
- return self.res
|