Zhang Li před 2 roky
rodič
revize
86aa0af158
2 změnil soubory, kde provedl 31 přidání a 28 odebrání
  1. 31 28
      core/parser.py
  2. binární
      images/birth.png

+ 31 - 28
core/parser.py

@@ -21,13 +21,11 @@ class Parser(object):
         self.confs = confs
         assert len(self.result) == len(self.confs), 'result and confs do not match'
         self.res = defaultdict(RecItem)
-        # self.res["Name"] = RecItem()
-        # self.res["IDNumber"] = RecItem()
-        # self.res["Address"] = RecItem()
-        # self.res["Gender"] = RecItem()
-        # self.res["Nationality"] = RecItem()
-        # self.res["Birth"] = RecItem()
-        # self.res["expire_date"] = RecItem()
+        self.keys = ["name", "id", "ethnicity", "gender", "birthday",
+                     "address", "address_province", "address_city", "address_region", "address_detail", "expire_date"]
+        for key in self.keys:
+            self.res[key] = RecItem()
+
 
     def parse(self):
         return self.res
@@ -48,21 +46,18 @@ class FrontParser(Parser):
             for i in txts
         ]
         assert len(self.result) == len(self.confs), 'result and confs do not match'
-        self.keys = ["name", "id", "ethnicity", "gender", "birthday",
-                     "address", "address_province", "address_city", "address_region", "address_detail", "expire_date"]
-        for key in self.keys:
-            self.res[key] = RecItem()
 
     def birth(self):
-        addString = []
-        for i in range(len(self.result)):
-            txt = self.result[i]
-            if "出生" in txt or "生" in txt:
-                # txt = txt.replace("出生", "")
-                txt = txt.split('生')[-1]
-                addString.append(txt.strip())
-                self.res["birthday"] = RecItem("".join(addString), self.confs[i])
-                break
+        if len(self.res["id"].text) == 18:
+            # 342423 2001  0  2    1  5    6552
+            # 012345 6789  10 11   12 13   14
+            str_num = self.res["id"].text
+            date = list(str_num[6:10] + "年" + str_num[10:12] + "月" + str_num[12:14] + "日")
+            if date[date.index("月") - 2] == "0":
+                del date[date.index("月") - 2]
+            if date[date.index("日") - 2] == "0":
+                del date[date.index("日") - 2]
+            self.res["birthday"].text = "".join(date)
 
     def card_no(self):
         """
@@ -93,7 +88,7 @@ class FrontParser(Parser):
             txt = self.result[i]
             length = len(txt)
             if "姓名" in txt:
-                if len(txt) < 6:
+                if len(txt) < 7:
                     res = re.findall("姓名[\u4e00-\u9fa5]{1,4}", txt)
                     # 三个字名字
                     if len(res) > 0:
@@ -170,10 +165,12 @@ class FrontParser(Parser):
                     or "区" in txt
                     or "城" in txt
                     or "组" in txt
+                    or "旗" in txt
                     or "号" in txt
             ):
-
-                if "住址" in txt or "省" in txt or "址" in txt:
+                # if "住址" in txt or "省" in txt or "址" in txt:
+                if "住址" in txt or "省" in txt or "址" in txt or \
+                        ('市' in txt and len(addString) > 0 and '市' not in addString[0]):
                     addString.insert(0, txt.split("址")[-1])
                 else:
                     addString.append(txt)
@@ -187,18 +184,24 @@ class FrontParser(Parser):
     def split_addr(self):
         if self.res["address"].text:
             conf = self.res["address"].confidence
-            df = cpca.transform([self.res["Address"].text])
+            df = cpca.transform([self.res["address"].text])
             # print(df)
 
             province = df.iloc[0, 0]
             city = df.iloc[0, 1]
             region = df.iloc[0, 2]
             detail = df.iloc[0, 3]
-            # print(f'pronvince: {province}, city: {city}, region: {region}, detail: {detail}')
+            print(f'pronvince: {province}, city: {city}, region: {region}, detail: {detail}')
             self.res["address_province"] = RecItem(province, conf)
             self.res["address_city"] = RecItem(city, conf)
-            self.res["address_region"] = RecItem(region, conf)
-            self.res["address_detail"] = RecItem(detail, conf)
+            if detail and "旗" in detail:
+                temp_region = []
+                temp_region.insert(0, detail.split("旗")[0] + "旗")
+                self.res["address_region"] = RecItem(temp_region[0], conf)
+                self.res["address_detail"] = RecItem(detail.split("旗")[-1], conf)
+            else:
+                self.res["address_region"] = RecItem(region, conf)
+                self.res["address_detail"] = RecItem(detail, conf)
 
     def expire_date(self):
         for txt, conf in zip(self.result, self.confs):
@@ -277,4 +280,4 @@ class BackParser(Parser):
         self.expire_date()
         if not self.res["expire_date"].text:
             raise Exception("无法识别")
-        return self.res
+        return {key: self.res[key].to_dict() for key in self.keys}

binární
images/birth.png