parser.py 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. import re
  2. import string
  3. from dataclasses import dataclass
  4. from collections import defaultdict
  5. import numpy as np
  6. import cpca
  7. from typing import List
  8. from core.line_parser import OcrResult
  9. @dataclass
  10. class RecItem:
  11. text: str = ''
  12. confidence: float = 0.
  13. def to_dict(self):
  14. return {"text": self.text, "confidence": np.nan_to_num(self.confidence)}
  15. def find_card_row(line_results):
  16. print('~~~~line results~~~~~')
  17. for row in line_results:
  18. print('++++')
  19. print(row)
  20. print('~~~~line results~~~~~')
  21. new_lines = []
  22. for row in line_results:
  23. new_line = []
  24. for r in row:
  25. if r.conf > 0.93:
  26. new_line.append(r)
  27. if new_line:
  28. new_lines.append(new_line)
  29. print('~~~~new line results~~~~~')
  30. for row in new_lines:
  31. print('++++')
  32. print(row)
  33. print('~~~~new line results~~~~~')
  34. line_results = new_lines
  35. for row in line_results:
  36. txt = [r.txt.replace(' ', '') for r in row]
  37. conf = np.mean([r.conf for r in row])
  38. txt = ''.join(txt)
  39. res = re.findall('\d{15,20}', txt)
  40. print(f'res: {res}, conf: {conf}')
  41. if res and conf > 0.95:
  42. return row, res[0], conf
  43. raise Exception('无法识别')
  44. def handle_wrong_digits(s):
  45. s = s.replace(' ', '')
  46. s = s.replace('-', '')
  47. s = s.replace('/', '')
  48. s = s.replace('L', '1')
  49. s = s.replace('l', '1')
  50. if s.startswith('1'):
  51. s = s[1:]
  52. if s.startswith('1'):
  53. s = '6' + s[1:]
  54. return s
  55. class Parser(object):
  56. def __init__(self, line_results: List[List[OcrResult]]):
  57. self.line_results = line_results
  58. self.res = defaultdict(RecItem)
  59. self.res["number"] = RecItem()
  60. def bank_no(self):
  61. # 号码
  62. row, txt, conf = find_card_row(self.line_results)
  63. print(f'=== txt: {txt}, res: {row}======')
  64. txt = handle_wrong_digits(txt)
  65. self.res['number'] = RecItem(txt, conf)
  66. def parse(self):
  67. self.bank_no()
  68. return self.res