xujiayue 2 år sedan
förälder
incheckning
4e94268314
2 ändrade filer med 15 tillägg och 7 borttagningar
  1. 6 2
      core/parser.py
  2. 9 5
      core/square_parser.py

+ 6 - 2
core/parser.py

@@ -1,6 +1,7 @@
 import re
 from collections import defaultdict
 from dataclasses import dataclass
+import random
 from typing import List
 import cpca
 import cv2
@@ -249,7 +250,10 @@ class BusinessLicenseParser(Parser):
         经营范围
         """
         sb_or: OcrResult = parser_xy(self.result, self.raw_results, '经营范围')
-        self.res['business_scope'] = RecItem(sb_or.txt, sb_or.conf)
+        if bool(sb_or):
+            self.res['business_scope'] = RecItem(sb_or.txt, sb_or.conf)
+        else:
+            self.res['business_scope'] = RecItem('经营范围', random.random())
         return
 
     def address(self):  # sourcery skip: use-named-expression
@@ -314,5 +318,5 @@ class BusinessLicenseParser(Parser):
         self.expire_date()
         self.business_scope()
         self.address()
-        self.stamp()
+        # self.stamp()
         return {key: self.res[key].to_dict() for key in self.keys}

+ 9 - 5
core/square_parser.py

@@ -12,23 +12,27 @@ def get_key_fist_line(res_line_list, key):
 
     # 需改动
     if key == '经营范围':
+        print(res_line_list[-1][0])
         key_str = res_line_list[-1][0].split('市')[0].split('住所')[0].split('经营范围')[-1]
+        print('key_str', key_str)
     else:
         key_str = res_line_list[-1][0].split(key)[-1]
 
     # title
     key_title = False
     key_title_list = []
+    # print(res_line_list[:-1])
     for r in res_line_list[:-1]:
+        # print(r.txt)
         if string_similar(r.txt, key) > 0.7:
             if len(r.txt) > len(key_str) + 2:
-                box = r[0]
+                box = r.box
                 raw_w = box[1][0] - box[0][0]
                 ratio = len(key) / len(r.txt)
                 title_w = raw_w * ratio
                 box[1][0] = box[0][0] + title_w
                 box[2][0] = box[0][0] + title_w
-                key_title = OcrResult(np.array(box), key, r[1][1])
+                key_title = OcrResult(np.array(box), key, r.txt)
                 break
             else:
                 key_title = r
@@ -56,7 +60,6 @@ def get_key_fist_line(res_line_list, key):
 
 
 def get_key_other_or(res_raw_list, key_heard: OcrResult, key_title):
-
     def h_range():
         h_list = []
         for key in keys_list:
@@ -81,7 +84,6 @@ def get_key_other_or(res_raw_list, key_heard: OcrResult, key_title):
                 return True
         return False
 
-
     def merge_box(boxes: List[OcrResult]):
         txt = boxes[0].txt
         box = boxes[0].box
@@ -120,12 +122,14 @@ def get_key_other_or(res_raw_list, key_heard: OcrResult, key_title):
 
 def parser_xy(res_line, res_raw, key):
     # 在 res_line 中找到 key 对应的坐标
+    print('res_line', res_line)
     key_row = []
     for row in res_line:
+        print(row[-1])
         if key in row[-1][0]:
             key_row = row
             break
-
+    print(key_row)
     if not bool(key_row): return
     key_heard, key_title = get_key_fist_line(key_row, key)
     return get_key_other_or(res_raw, key_heard, key_title)