test_addr.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. # 重构代码
  2. import json
  3. import cpca
  4. addr = "江苏省京市永扬大道"
  5. f = open('areas.json', 'r')
  6. content = f.read()
  7. areas = json.loads(content)
  8. def get_cpca(addr):
  9. df = cpca.transform([addr])
  10. print(df)
  11. province = df.iloc[0][0]
  12. city = df.iloc[0][1] or ""
  13. region = df.iloc[0][2] or ""
  14. street = df.iloc[0][3]
  15. print(province, city, region, street)
  16. return province, city, region, street
  17. province, city, region, street = get_cpca(addr)
  18. def get_region(city_or_province, temp_addr):
  19. """
  20. flag=1:city_or_province is city
  21. flag=0: city_or_province is province
  22. """
  23. maxnum = 0
  24. for _, json_region in enumerate(city_or_province['children']):
  25. num = 0
  26. if len(temp_addr) == len(json_region['name']):
  27. for i in range(len(temp_addr)):
  28. if temp_addr[i] == json_region['name'][i]:
  29. num += 1
  30. elif len(temp_addr) < len(json_region['name']):
  31. for i in range(len(temp_addr)):
  32. for j in range(len(json_region['name'])):
  33. if temp_addr[i] == json_region['name'][j]:
  34. num += 1
  35. break
  36. if maxnum <= num:
  37. maxnum = num
  38. region = json_region['name']
  39. return region
  40. def cal_regions(json_province, city, temp_addr, flag):
  41. """
  42. flag=0:cpca没有找到"区",也没有找到"市",只需要找到市即可
  43. flag=1:cpca没有找到"区",需要找到"区"
  44. """
  45. if flag == 0:
  46. return get_region(json_province, temp_addr)
  47. elif flag == 1:
  48. for _, json_city in enumerate(json_province['children']):
  49. if (city and city in json_city['name']) or (not city):
  50. return get_region(json_city, temp_addr)
  51. def cal_addrs(province, city, region, temp_addr, areas, flag):
  52. for _, json_province in enumerate(areas):
  53. if province in json_province['name']:
  54. region = cal_regions(json_province, city, temp_addr, flag)
  55. return region
  56. # 第一步,判断 df中的省市区是否存在,存在说明后面的street只要把原来的省市区丢掉即可
  57. if province and city and region:
  58. print("省市区都存在,只需要切割street中‘区’后面的内容")
  59. if "区" in street:
  60. street = street.split("市")[-1].split("区")[-1]
  61. if "县" in street:
  62. street = street.split("市")[-1].split("县")[-1]
  63. # 第二步,判断 df中“区”是否存在,如果存在,那么 “省市区” 肯定都存在
  64. elif not region:
  65. # 判断 df中“市”是否存在,如果存在,那么 ”省市“ 肯定都存在
  66. if city:
  67. if '区' in street or '县' in street:
  68. print("cpca没有检测到‘区’,但是检测到省,市,并且street中含有‘区’,需要校准区")
  69. flag = 1
  70. if "区" in street:
  71. temp_region = street.split("区")[0] + "区"
  72. street = street.split("区")[-1]
  73. if "县" in street:
  74. temp_region = street.split("县")[0] + "县"
  75. street = street.split("县")[-1]
  76. region = cal_addrs(province, city, region, temp_region, areas, flag)
  77. else:
  78. # (暂时)说明street没有必要切割了
  79. print("cpca没有检测到‘区’,但是检测到省,市,并且street中没有有‘区’,就不需要校准区")
  80. # 这里比较复杂
  81. elif not city:
  82. # 还是先校准“区”,如果区存在,就直接把区拿到,然后再进行一次cpca,
  83. # 如果“区”不存在,就直接校准“市”,street直接切割
  84. if '区' in street or '县' in street:
  85. print("cpca只检测到了省,但是street中有区,直接分割出区,再做cpca即可")
  86. flag = 1
  87. if "区" in street:
  88. temp_region = street.split('市')[-1].split('区')[0] + "区"
  89. street = street.split('区')[-1]
  90. if "县" in street:
  91. temp_region = street.split('市')[-1].split('县')[0] + "县"
  92. street = street.split('县')[-1]
  93. region = cal_addrs(province, city, region, temp_region, areas, flag)
  94. elif '市' not in street:
  95. # 市,区都没有,那就直接返回
  96. print("cpca只检测到了省,并且street中没有市,也没有区,直接跳过")
  97. elif '市' in street:
  98. print("cpca只检测到了省,并且street中有市,没有区,就只要校准city")
  99. flag = 0
  100. temp_city = street.split('市')[0] + '市'
  101. street = street.split('市')[-1]
  102. city = cal_addrs(province, city, region, temp_city, areas, flag)
  103. addr = province + city + region + street
  104. province, city, region, street = get_cpca(addr)