|
@@ -12,7 +12,7 @@ class RecItem:
|
|
|
confidence: float = 0.
|
|
|
|
|
|
def to_dict(self):
|
|
|
- return {"text": self.text, "confidence": self.confidence}
|
|
|
+ return {"text": self.text, "confidence": np.nan_to_num(self.confidence)}
|
|
|
|
|
|
|
|
|
class Parser(object):
|
|
@@ -86,10 +86,10 @@ class FrontParser(Parser):
|
|
|
"""
|
|
|
for i in range(len(self.result)):
|
|
|
txt = self.result[i]
|
|
|
- if ("姓名" or "名" in txt) and len(txt) > 2:
|
|
|
- res = re.findall("名[\u4e00-\u9fa5]{1,4}", txt)
|
|
|
+ if ("姓名" in txt) and len(txt) > 2:
|
|
|
+ res = re.findall("姓名[\u4e00-\u9fa5]{1,4}", txt)
|
|
|
if len(res) > 0:
|
|
|
- self.res["Name"].text = res[0].split("名")[-1]
|
|
|
+ self.res["Name"].text = res[0].split("姓名")[-1]
|
|
|
self.res["Name"].confidence = self.confs[i]
|
|
|
self.result[i] = "temp" # 避免身份证姓名对地址造成干扰
|
|
|
break
|
|
@@ -157,19 +157,19 @@ class FrontParser(Parser):
|
|
|
if len(addString) > 0:
|
|
|
self.res["Address"].text = "".join(addString)
|
|
|
self.res["Address"].confidence = np.mean(conf)
|
|
|
- print(f'addr: {self.res["Address"]}')
|
|
|
+ # print(f'addr: {self.res["Address"]}')
|
|
|
|
|
|
def split_addr(self):
|
|
|
if self.res["Address"].text:
|
|
|
conf = self.res["Address"].confidence
|
|
|
df = cpca.transform([self.res["Address"].text])
|
|
|
- print(df)
|
|
|
+ # print(df)
|
|
|
|
|
|
province = df.iloc[0, 0]
|
|
|
city = df.iloc[0, 1]
|
|
|
region = df.iloc[0, 2]
|
|
|
detail = df.iloc[0, 3]
|
|
|
- print(f'pronvince: {province}, city: {city}, region: {region}, detail: {detail}')
|
|
|
+ # print(f'pronvince: {province}, city: {city}, region: {region}, detail: {detail}')
|
|
|
self.res["address_province"] = RecItem(province, conf)
|
|
|
self.res["address_city"] = RecItem(city, conf)
|
|
|
self.res["address_region"] = RecItem(region, conf)
|