123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173 |
- import re
- import json
- import string
- class IdCardStraight:
- """
- """
- def __init__(self, result):
- self.result = [
- i.replace(" ", "").translate(str.maketrans("", "", string.punctuation))
- for i in result
- ]
- self.out = {"Data": {"FrontResult": {}}}
- self.res = self.out["Data"]["FrontResult"]
- self.res["Name"] = ""
- self.res["IDNumber"] = ""
- self.res["Address"] = ""
- self.res["Gender"] = ""
- self.res["Nationality"] = ""
- self.res["year"] = ""
- # self.res["Isauthority"]=""
- # self.res["Effdata"]=""
- # def IS_author(self):
- # """
- # 签发机关
- # """
- def year(self):
- addString = []
- for i in range(len(self.result)):
- txt = self.result[i]
- if "出生" in txt:
- txt = txt.replace("出生", "")
- addString.append(txt)
- self.res["year"] = "".join(addString)
- # break
- # print(',,,,')
- # print(self.result)
- # txt = txt.replace("出生", "")
- # addString.append(txt)
- # print(txt)
- # self.res["year"] = "".join(addString)
- def birth_no(self):
- """
- 身份证号码
- """
- for i in range(len(self.result)):
- txt = self.result[i]
- # 身份证号码
- if "X" in txt or "x" in txt:
- res = re.findall("\d*[X|x]", txt)
- else:
- res = re.findall("\d{16,18}", txt)
- if len(res) > 0:
- if len(res[0]) == 18:
- self.res["IDNumber"] = res[0].replace("号码", "")
- self.res["Gender"] = "男" if int(res[0][16]) % 2 else "女"
- break
- def full_name(self):
- """
- 身份证姓名
- """
- # print(self)
- for i in range(len(self.result)):
- txt = self.result[i]
- if ("姓名" or "名" in txt) and len(txt) > 2:
- res = re.findall("名[\u4e00-\u9fa5]{1,4}", txt)
- if len(res) > 0:
- self.res["Name"] = res[0].split("名")[-1]
- self.result[i] = "temp" # 避免身份证姓名对地址造成干扰
- break
- def sex(self):
- """
- 性别女民族汉
- """
- for i in range(len(self.result)):
- txt = self.result[i]
- if "男" in txt:
- self.res["Gender"] = "男"
- elif "女" in txt:
- self.res["Gender"] = "女"
- def national(self):
- # 性别女民族汉
- for i in range(len(self.result)):
- txt = self.result[i]
- res = re.findall(".*民族[\u4e00-\u9fa5]+", txt)
- if len(res) > 0:
- self.res["Nationality"] = res[0].split("族")[-1]
- break
- def address(self):
- """
- 身份证地址
- """
- addString = []
- for i in range(len(self.result)):
- txt = self.result[i]
- txt = txt.replace("号码", "")
- if "公民" in txt:
- txt = "temp"
- # 身份证地址
- if (
- "住址" in txt
- or "址" in txt
- or "省" in txt
- or "市" in txt
- or "县" in txt
- or "街" in txt
- or "乡" in txt
- or "村" in txt
- or "镇" in txt
- or "区" in txt
- or "城" in txt
- or "组" in txt
- or "号" in txt
- ):
- if "住址" in txt or "省" in txt or "址" in txt:
- addString.insert(0, txt.split("址")[-1])
- else:
- addString.append(txt)
- self.result[i] = "temp"
- # print(addString)
- if len(addString) > 0:
- self.res["Address"] = "".join(addString)
- else:
- self.res["Address"] = ""
- def predict_name(self):
- """
- 如果PaddleOCR返回的不是姓名xx连着的,则需要去猜测这个姓名,此处需要改进
- """
- for i in range(len(self.result)):
- txt = self.result[i]
- if self.res["Name"] == "":
- if len(txt) > 1 and len(txt) < 5:
- if (
- "性别" not in txt
- and "姓名" not in txt
- and "民族" not in txt
- and "住址" not in txt
- and "出生" not in txt
- and "号码" not in txt
- and "身份" not in txt
- ):
- result = re.findall("[\u4e00-\u9fa5]{2,4}", txt)
- if len(result) > 0:
- self.res["Name"] = result[0]
- break
- def run(self):
- print(self)
- self.full_name()
- self.national()
- self.birth_no()
- self.address()
- self.predict_name()
- self.year()
- print(self.out)
- return self.out
|