123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122 |
- # 重构代码
- import json
- import cpca
- addr = "江苏省京市永扬大道"
- f = open('areas.json', 'r')
- content = f.read()
- areas = json.loads(content)
- def get_cpca(addr):
- df = cpca.transform([addr])
- print(df)
- province = df.iloc[0][0]
- city = df.iloc[0][1] or ""
- region = df.iloc[0][2] or ""
- street = df.iloc[0][3]
- print(province, city, region, street)
- return province, city, region, street
- province, city, region, street = get_cpca(addr)
- def get_region(city_or_province, temp_addr):
- """
- flag=1:city_or_province is city
- flag=0: city_or_province is province
- """
- maxnum = 0
- for _, json_region in enumerate(city_or_province['children']):
- num = 0
- if len(temp_addr) == len(json_region['name']):
- for i in range(len(temp_addr)):
- if temp_addr[i] == json_region['name'][i]:
- num += 1
- elif len(temp_addr) < len(json_region['name']):
- for i in range(len(temp_addr)):
- for j in range(len(json_region['name'])):
- if temp_addr[i] == json_region['name'][j]:
- num += 1
- break
- if maxnum <= num:
- maxnum = num
- region = json_region['name']
- return region
- def cal_regions(json_province, city, temp_addr, flag):
- """
- flag=0:cpca没有找到"区",也没有找到"市",只需要找到市即可
- flag=1:cpca没有找到"区",需要找到"区"
- """
- if flag == 0:
- return get_region(json_province, temp_addr)
- elif flag == 1:
- for _, json_city in enumerate(json_province['children']):
- if (city and city in json_city['name']) or (not city):
- return get_region(json_city, temp_addr)
- def cal_addrs(province, city, region, temp_addr, areas, flag):
- for _, json_province in enumerate(areas):
- if province in json_province['name']:
- region = cal_regions(json_province, city, temp_addr, flag)
- return region
- # 第一步,判断 df中的省市区是否存在,存在说明后面的street只要把原来的省市区丢掉即可
- if province and city and region:
- print("省市区都存在,只需要切割street中‘区’后面的内容")
- if "区" in street:
- street = street.split("市")[-1].split("区")[-1]
- if "县" in street:
- street = street.split("市")[-1].split("县")[-1]
- # 第二步,判断 df中“区”是否存在,如果存在,那么 “省市区” 肯定都存在
- elif not region:
- # 判断 df中“市”是否存在,如果存在,那么 ”省市“ 肯定都存在
- if city:
- if '区' in street or '县' in street:
- print("cpca没有检测到‘区’,但是检测到省,市,并且street中含有‘区’,需要校准区")
- flag = 1
- if "区" in street:
- temp_region = street.split("区")[0] + "区"
- street = street.split("区")[-1]
- if "县" in street:
- temp_region = street.split("县")[0] + "县"
- street = street.split("县")[-1]
- region = cal_addrs(province, city, region, temp_region, areas, flag)
- else:
- # (暂时)说明street没有必要切割了
- print("cpca没有检测到‘区’,但是检测到省,市,并且street中没有有‘区’,就不需要校准区")
- # 这里比较复杂
- elif not city:
- # 还是先校准“区”,如果区存在,就直接把区拿到,然后再进行一次cpca,
- # 如果“区”不存在,就直接校准“市”,street直接切割
- if '区' in street or '县' in street:
- print("cpca只检测到了省,但是street中有区,直接分割出区,再做cpca即可")
- flag = 1
- if "区" in street:
- temp_region = street.split('市')[-1].split('区')[0] + "区"
- street = street.split('区')[-1]
- if "县" in street:
- temp_region = street.split('市')[-1].split('县')[0] + "县"
- street = street.split('县')[-1]
- region = cal_addrs(province, city, region, temp_region, areas, flag)
- elif '市' not in street:
- # 市,区都没有,那就直接返回
- print("cpca只检测到了省,并且street中没有市,也没有区,直接跳过")
- elif '市' in street:
- print("cpca只检测到了省,并且street中有市,没有区,就只要校准city")
- flag = 0
- temp_city = street.split('市')[0] + '市'
- street = street.split('市')[-1]
- city = cal_addrs(province, city, region, temp_city, areas, flag)
- addr = province + city + region + street
- province, city, region, street = get_cpca(addr)
|