|
@@ -21,13 +21,11 @@ class Parser(object):
|
|
|
self.confs = confs
|
|
|
assert len(self.result) == len(self.confs), 'result and confs do not match'
|
|
|
self.res = defaultdict(RecItem)
|
|
|
- # self.res["Name"] = RecItem()
|
|
|
- # self.res["IDNumber"] = RecItem()
|
|
|
- # self.res["Address"] = RecItem()
|
|
|
- # self.res["Gender"] = RecItem()
|
|
|
- # self.res["Nationality"] = RecItem()
|
|
|
- # self.res["Birth"] = RecItem()
|
|
|
- # self.res["expire_date"] = RecItem()
|
|
|
+ self.keys = ["name", "id", "ethnicity", "gender", "birthday",
|
|
|
+ "address", "address_province", "address_city", "address_region", "address_detail", "expire_date"]
|
|
|
+ for key in self.keys:
|
|
|
+ self.res[key] = RecItem()
|
|
|
+
|
|
|
|
|
|
def parse(self):
|
|
|
return self.res
|
|
@@ -48,21 +46,18 @@ class FrontParser(Parser):
|
|
|
for i in txts
|
|
|
]
|
|
|
assert len(self.result) == len(self.confs), 'result and confs do not match'
|
|
|
- self.keys = ["name", "id", "ethnicity", "gender", "birthday",
|
|
|
- "address", "address_province", "address_city", "address_region", "address_detail", "expire_date"]
|
|
|
- for key in self.keys:
|
|
|
- self.res[key] = RecItem()
|
|
|
|
|
|
def birth(self):
|
|
|
- addString = []
|
|
|
- for i in range(len(self.result)):
|
|
|
- txt = self.result[i]
|
|
|
- if "出生" in txt or "生" in txt:
|
|
|
- # txt = txt.replace("出生", "")
|
|
|
- txt = txt.split('生')[-1]
|
|
|
- addString.append(txt.strip())
|
|
|
- self.res["birthday"] = RecItem("".join(addString), self.confs[i])
|
|
|
- break
|
|
|
+ if len(self.res["id"].text) == 18:
|
|
|
+ # 342423 2001 0 2 1 5 6552
|
|
|
+ # 012345 6789 10 11 12 13 14
|
|
|
+ str_num = self.res["id"].text
|
|
|
+ date = list(str_num[6:10] + "年" + str_num[10:12] + "月" + str_num[12:14] + "日")
|
|
|
+ if date[date.index("月") - 2] == "0":
|
|
|
+ del date[date.index("月") - 2]
|
|
|
+ if date[date.index("日") - 2] == "0":
|
|
|
+ del date[date.index("日") - 2]
|
|
|
+ self.res["birthday"].text = "".join(date)
|
|
|
|
|
|
def card_no(self):
|
|
|
"""
|
|
@@ -93,7 +88,7 @@ class FrontParser(Parser):
|
|
|
txt = self.result[i]
|
|
|
length = len(txt)
|
|
|
if "姓名" in txt:
|
|
|
- if len(txt) < 6:
|
|
|
+ if len(txt) < 7:
|
|
|
res = re.findall("姓名[\u4e00-\u9fa5]{1,4}", txt)
|
|
|
# 三个字名字
|
|
|
if len(res) > 0:
|
|
@@ -170,10 +165,12 @@ class FrontParser(Parser):
|
|
|
or "区" in txt
|
|
|
or "城" in txt
|
|
|
or "组" in txt
|
|
|
+ or "旗" in txt
|
|
|
or "号" in txt
|
|
|
):
|
|
|
-
|
|
|
- if "住址" in txt or "省" in txt or "址" in txt:
|
|
|
+ # if "住址" in txt or "省" in txt or "址" in txt:
|
|
|
+ if "住址" in txt or "省" in txt or "址" in txt or \
|
|
|
+ ('市' in txt and len(addString) > 0 and '市' not in addString[0]):
|
|
|
addString.insert(0, txt.split("址")[-1])
|
|
|
else:
|
|
|
addString.append(txt)
|
|
@@ -187,18 +184,24 @@ class FrontParser(Parser):
|
|
|
def split_addr(self):
|
|
|
if self.res["address"].text:
|
|
|
conf = self.res["address"].confidence
|
|
|
- df = cpca.transform([self.res["Address"].text])
|
|
|
+ df = cpca.transform([self.res["address"].text])
|
|
|
# print(df)
|
|
|
|
|
|
province = df.iloc[0, 0]
|
|
|
city = df.iloc[0, 1]
|
|
|
region = df.iloc[0, 2]
|
|
|
detail = df.iloc[0, 3]
|
|
|
- # print(f'pronvince: {province}, city: {city}, region: {region}, detail: {detail}')
|
|
|
+ print(f'pronvince: {province}, city: {city}, region: {region}, detail: {detail}')
|
|
|
self.res["address_province"] = RecItem(province, conf)
|
|
|
self.res["address_city"] = RecItem(city, conf)
|
|
|
- self.res["address_region"] = RecItem(region, conf)
|
|
|
- self.res["address_detail"] = RecItem(detail, conf)
|
|
|
+ if detail and "旗" in detail:
|
|
|
+ temp_region = []
|
|
|
+ temp_region.insert(0, detail.split("旗")[0] + "旗")
|
|
|
+ self.res["address_region"] = RecItem(temp_region[0], conf)
|
|
|
+ self.res["address_detail"] = RecItem(detail.split("旗")[-1], conf)
|
|
|
+ else:
|
|
|
+ self.res["address_region"] = RecItem(region, conf)
|
|
|
+ self.res["address_detail"] = RecItem(detail, conf)
|
|
|
|
|
|
def expire_date(self):
|
|
|
for txt, conf in zip(self.result, self.confs):
|
|
@@ -277,4 +280,4 @@ class BackParser(Parser):
|
|
|
self.expire_date()
|
|
|
if not self.res["expire_date"].text:
|
|
|
raise Exception("无法识别")
|
|
|
- return self.res
|
|
|
+ return {key: self.res[key].to_dict() for key in self.keys}
|