|
@@ -36,6 +36,7 @@ class Parser(object):
|
|
|
def confidence(self):
|
|
|
return 0.
|
|
|
|
|
|
+
|
|
|
class FrontParser(Parser):
|
|
|
"""
|
|
|
"""
|
|
@@ -56,7 +57,7 @@ class FrontParser(Parser):
|
|
|
# txt = txt.replace("出生", "")
|
|
|
txt = txt.split('生')[-1]
|
|
|
addString.append(txt.strip())
|
|
|
- self.res["Birth"] = RecItem("".join(addString), self.confs[i])
|
|
|
+ self.res["Birth"] = RecItem("".join(addString), self.confs[i])
|
|
|
break
|
|
|
|
|
|
def card_no(self):
|
|
@@ -86,10 +87,20 @@ class FrontParser(Parser):
|
|
|
"""
|
|
|
for i in range(len(self.result)):
|
|
|
txt = self.result[i]
|
|
|
- if ("姓名" in txt) and len(txt) > 2:
|
|
|
- res = re.findall("姓名[\u4e00-\u9fa5]{1,4}", txt)
|
|
|
- if len(res) > 0:
|
|
|
- self.res["Name"].text = res[0].split("姓名")[-1]
|
|
|
+ length = len(txt)
|
|
|
+ print(length)
|
|
|
+ if "姓名" in txt:
|
|
|
+ if len(txt) < 6:
|
|
|
+ res = re.findall("姓名[\u4e00-\u9fa5]{1,4}", txt)
|
|
|
+ # 三个字名字
|
|
|
+ if len(res) > 0:
|
|
|
+ self.res["Name"].text = res[0].split("姓名")[-1]
|
|
|
+ self.res["Name"].confidence = self.confs[i]
|
|
|
+ self.result[i] = "temp" # 避免身份证姓名对地址造成干扰
|
|
|
+ break
|
|
|
+ else:
|
|
|
+ res = txt[2:]
|
|
|
+ self.res["Name"].text = res
|
|
|
self.res["Name"].confidence = self.confs[i]
|
|
|
self.result[i] = "temp" # 避免身份证姓名对地址造成干扰
|
|
|
break
|
|
@@ -187,7 +198,6 @@ class FrontParser(Parser):
|
|
|
self.res["expire_date"] = RecItem(res[0], conf)
|
|
|
break
|
|
|
|
|
|
-
|
|
|
def predict_name(self):
|
|
|
"""
|
|
|
如果PaddleOCR返回的不是姓名xx连着的,则需要去猜测这个姓名,此处需要改进
|
|
@@ -229,12 +239,10 @@ class FrontParser(Parser):
|
|
|
return self.res
|
|
|
|
|
|
|
|
|
-
|
|
|
class BackParser(Parser):
|
|
|
def __init__(self, txts, confs):
|
|
|
Parser.__init__(self, txts, confs)
|
|
|
|
|
|
-
|
|
|
def expire_date(self):
|
|
|
for txt, conf in zip(self.result, self.confs):
|
|
|
txt = txt.replace('.', '')
|