# 重构代码 import json import cpca addr = "江苏省京市永扬大道" f = open('areas.json', 'r') content = f.read() areas = json.loads(content) def get_cpca(addr): df = cpca.transform([addr]) print(df) province = df.iloc[0][0] city = df.iloc[0][1] or "" region = df.iloc[0][2] or "" street = df.iloc[0][3] print(province, city, region, street) return province, city, region, street province, city, region, street = get_cpca(addr) def get_region(city_or_province, temp_addr): """ flag=1:city_or_province is city flag=0: city_or_province is province """ maxnum = 0 for _, json_region in enumerate(city_or_province['children']): num = 0 if len(temp_addr) == len(json_region['name']): for i in range(len(temp_addr)): if temp_addr[i] == json_region['name'][i]: num += 1 elif len(temp_addr) < len(json_region['name']): for i in range(len(temp_addr)): for j in range(len(json_region['name'])): if temp_addr[i] == json_region['name'][j]: num += 1 break if maxnum <= num: maxnum = num region = json_region['name'] return region def cal_regions(json_province, city, temp_addr, flag): """ flag=0:cpca没有找到"区",也没有找到"市",只需要找到市即可 flag=1:cpca没有找到"区",需要找到"区" """ if flag == 0: return get_region(json_province, temp_addr) elif flag == 1: for _, json_city in enumerate(json_province['children']): if (city and city in json_city['name']) or (not city): return get_region(json_city, temp_addr) def cal_addrs(province, city, region, temp_addr, areas, flag): for _, json_province in enumerate(areas): if province in json_province['name']: region = cal_regions(json_province, city, temp_addr, flag) return region # 第一步,判断 df中的省市区是否存在,存在说明后面的street只要把原来的省市区丢掉即可 if province and city and region: print("省市区都存在,只需要切割street中‘区’后面的内容") if "区" in street: street = street.split("市")[-1].split("区")[-1] if "县" in street: street = street.split("市")[-1].split("县")[-1] # 第二步,判断 df中“区”是否存在,如果存在,那么 “省市区” 肯定都存在 elif not region: # 判断 df中“市”是否存在,如果存在,那么 ”省市“ 肯定都存在 if city: if '区' in street or '县' in street: print("cpca没有检测到‘区’,但是检测到省,市,并且street中含有‘区’,需要校准区") flag = 1 if "区" in street: temp_region = street.split("区")[0] + "区" street = street.split("区")[-1] if "县" in street: temp_region = street.split("县")[0] + "县" street = street.split("县")[-1] region = cal_addrs(province, city, region, temp_region, areas, flag) else: # (暂时)说明street没有必要切割了 print("cpca没有检测到‘区’,但是检测到省,市,并且street中没有有‘区’,就不需要校准区") # 这里比较复杂 elif not city: # 还是先校准“区”,如果区存在,就直接把区拿到,然后再进行一次cpca, # 如果“区”不存在,就直接校准“市”,street直接切割 if '区' in street or '县' in street: print("cpca只检测到了省,但是street中有区,直接分割出区,再做cpca即可") flag = 1 if "区" in street: temp_region = street.split('市')[-1].split('区')[0] + "区" street = street.split('区')[-1] if "县" in street: temp_region = street.split('市')[-1].split('县')[0] + "县" street = street.split('县')[-1] region = cal_addrs(province, city, region, temp_region, areas, flag) elif '市' not in street: # 市,区都没有,那就直接返回 print("cpca只检测到了省,并且street中没有市,也没有区,直接跳过") elif '市' in street: print("cpca只检测到了省,并且street中有市,没有区,就只要校准city") flag = 0 temp_city = street.split('市')[0] + '市' street = street.split('市')[-1] city = cal_addrs(province, city, region, temp_city, areas, flag) addr = province + city + region + street province, city, region, street = get_cpca(addr)