|
@@ -1,122 +0,0 @@
|
|
|
-# 重构代码
|
|
|
-import json
|
|
|
-import cpca
|
|
|
-
|
|
|
-addr = "江苏省京市永扬大道"
|
|
|
-
|
|
|
-f = open('areas.json', 'r')
|
|
|
-content = f.read()
|
|
|
-areas = json.loads(content)
|
|
|
-
|
|
|
-
|
|
|
-def get_cpca(addr):
|
|
|
- df = cpca.transform([addr])
|
|
|
- print(df)
|
|
|
- province = df.iloc[0][0]
|
|
|
- city = df.iloc[0][1] or ""
|
|
|
- region = df.iloc[0][2] or ""
|
|
|
- street = df.iloc[0][3]
|
|
|
- print(province, city, region, street)
|
|
|
- return province, city, region, street
|
|
|
-
|
|
|
-
|
|
|
-province, city, region, street = get_cpca(addr)
|
|
|
-
|
|
|
-
|
|
|
-def get_region(city_or_province, temp_addr):
|
|
|
- """
|
|
|
- flag=1:city_or_province is city
|
|
|
- flag=0: city_or_province is province
|
|
|
- """
|
|
|
- maxnum = 0
|
|
|
- for _, json_region in enumerate(city_or_province['children']):
|
|
|
- num = 0
|
|
|
- if len(temp_addr) == len(json_region['name']):
|
|
|
- for i in range(len(temp_addr)):
|
|
|
- if temp_addr[i] == json_region['name'][i]:
|
|
|
- num += 1
|
|
|
- elif len(temp_addr) < len(json_region['name']):
|
|
|
- for i in range(len(temp_addr)):
|
|
|
- for j in range(len(json_region['name'])):
|
|
|
- if temp_addr[i] == json_region['name'][j]:
|
|
|
- num += 1
|
|
|
- break
|
|
|
- if maxnum <= num:
|
|
|
- maxnum = num
|
|
|
- region = json_region['name']
|
|
|
-
|
|
|
- return region
|
|
|
-
|
|
|
-
|
|
|
-def cal_regions(json_province, city, temp_addr, flag):
|
|
|
- """
|
|
|
- flag=0:cpca没有找到"区",也没有找到"市",只需要找到市即可
|
|
|
- flag=1:cpca没有找到"区",需要找到"区"
|
|
|
- """
|
|
|
- if flag == 0:
|
|
|
- return get_region(json_province, temp_addr)
|
|
|
- elif flag == 1:
|
|
|
- for _, json_city in enumerate(json_province['children']):
|
|
|
- if (city and city in json_city['name']) or (not city):
|
|
|
- return get_region(json_city, temp_addr)
|
|
|
-
|
|
|
-
|
|
|
-def cal_addrs(province, city, region, temp_addr, areas, flag):
|
|
|
- for _, json_province in enumerate(areas):
|
|
|
- if province in json_province['name']:
|
|
|
- region = cal_regions(json_province, city, temp_addr, flag)
|
|
|
- return region
|
|
|
-
|
|
|
-
|
|
|
-# 第一步,判断 df中的省市区是否存在,存在说明后面的street只要把原来的省市区丢掉即可
|
|
|
-if province and city and region:
|
|
|
- print("省市区都存在,只需要切割street中‘区’后面的内容")
|
|
|
- if "区" in street:
|
|
|
- street = street.split("市")[-1].split("区")[-1]
|
|
|
- if "县" in street:
|
|
|
- street = street.split("市")[-1].split("县")[-1]
|
|
|
-
|
|
|
-# 第二步,判断 df中“区”是否存在,如果存在,那么 “省市区” 肯定都存在
|
|
|
-elif not region:
|
|
|
- # 判断 df中“市”是否存在,如果存在,那么 ”省市“ 肯定都存在
|
|
|
- if city:
|
|
|
- if '区' in street or '县' in street:
|
|
|
- print("cpca没有检测到‘区’,但是检测到省,市,并且street中含有‘区’,需要校准区")
|
|
|
- flag = 1
|
|
|
- if "区" in street:
|
|
|
- temp_region = street.split("区")[0] + "区"
|
|
|
- street = street.split("区")[-1]
|
|
|
- if "县" in street:
|
|
|
- temp_region = street.split("县")[0] + "县"
|
|
|
- street = street.split("县")[-1]
|
|
|
- region = cal_addrs(province, city, region, temp_region, areas, flag)
|
|
|
- else:
|
|
|
- # (暂时)说明street没有必要切割了
|
|
|
- print("cpca没有检测到‘区’,但是检测到省,市,并且street中没有有‘区’,就不需要校准区")
|
|
|
-
|
|
|
- # 这里比较复杂
|
|
|
- elif not city:
|
|
|
- # 还是先校准“区”,如果区存在,就直接把区拿到,然后再进行一次cpca,
|
|
|
- # 如果“区”不存在,就直接校准“市”,street直接切割
|
|
|
- if '区' in street or '县' in street:
|
|
|
- print("cpca只检测到了省,但是street中有区,直接分割出区,再做cpca即可")
|
|
|
- flag = 1
|
|
|
- if "区" in street:
|
|
|
- temp_region = street.split('市')[-1].split('区')[0] + "区"
|
|
|
- street = street.split('区')[-1]
|
|
|
- if "县" in street:
|
|
|
- temp_region = street.split('市')[-1].split('县')[0] + "县"
|
|
|
- street = street.split('县')[-1]
|
|
|
- region = cal_addrs(province, city, region, temp_region, areas, flag)
|
|
|
- elif '市' not in street:
|
|
|
- # 市,区都没有,那就直接返回
|
|
|
- print("cpca只检测到了省,并且street中没有市,也没有区,直接跳过")
|
|
|
- elif '市' in street:
|
|
|
- print("cpca只检测到了省,并且street中有市,没有区,就只要校准city")
|
|
|
- flag = 0
|
|
|
- temp_city = street.split('市')[0] + '市'
|
|
|
- street = street.split('市')[-1]
|
|
|
- city = cal_addrs(province, city, region, temp_city, areas, flag)
|
|
|
-
|
|
|
-addr = province + city + region + street
|
|
|
-province, city, region, street = get_cpca(addr)
|