ori_filename = "data/geo_code_repl_cut_9.xlsx" colnames = ['门店名称','地址','合并地址','id','name','type','typecode','address','location','pcode','pname','citycode','cityname','adcode','adname','business_area','timestamp','rating','cost'] colnames += ['车辆服务', '餐饮服务', '生活服务', '体育服务', '娱乐服务', '医疗服务', '住宿服务', '商住区', '风景区', '教育院校', '交通枢纽', '公共交通', '购物专卖','购物综合'] loc_df = fr.readAddressFile(ori_filename, colnames) poly_list = list(loc_df['location']) ct_list = ["洛阳市"] * len(poly_list) field = '政府机构社会团体' types = '130000' extractor.typefilter = [] top = -1 extractor.infoExtract(poly_list, ct_list, types, top, simplify = True) new_filename = "data/geo_code_repl_cut_9.xlsx" loc_df[field] = extractor.result fr.writeAddressFile(loc_df, new_filename) ################################# ############# 科教文化 ############## colnames = ['门店名称','地址','合并地址','id','name','type','typecode','address','location','pcode','pname','citycode','cityname','adcode','adname','business_area','timestamp','rating','cost'] colnames += ['车辆服务', '餐饮服务', '生活服务', '体育服务', '娱乐服务', '医疗服务', '住宿服务', '商住区', '风景区', '教育院校', '交通枢纽', '公共交通', '购物专卖', '购物综合', '政府机构社会团体'] loc_df = fr.readAddressFile(ori_filename, colnames) poly_list = list(loc_df['location']) ct_list = ["洛阳市"] * len(poly_list) field = '科教文化' types = '140000' extractor.typefilter = ['141201','141206'] top = -1
'061101',\ '061102',\ '061103',\ '061104',\ '061201',\ '061202',\ '061203',\ '061204',\ '061205',\ '061206',\ '061207',\ '061208',\ '061209',\ '061211',\ '061212',\ '061213',\ '061214',\ '061300',\ '061301',\ '061302' ] extractor.infoExtract(poly_list, ct_list, types, top) import file_reader as fr field=['id', 'name', 'type', 'typecode', 'address', 'location', 'pcode', 'pname', 'citycode', \ 'cityname', 'adcode', 'adname', 'business_area', 'timestamp', 'rating', 'cost'] structured_data = fr.genStructuredData(extractor.result, field) new_filename = 'data/geo_code_replenish_basic.xlsx' fr.writeAddressFile(structured_data, new_filename)
def main(): ori_filename = "data/combined.xlsx" colnames = ["门店名称", "地址"] addr_df = fr.readAddressFile(ori_filename, colnames) addr_df["合并地址"] = addr_df.apply(lambda x: x["地址"] + x["门店名称"], axis=1) #addr_df = addr_df.loc[:30,:] ############# Using Geo Coder API ############# api_url = "https://restapi.amap.com/v3/geocode/geo" api_key = "Classified. You can obtain one on AMap Website as an Enterprise." field = [ 'formatted_address', 'country', 'province', 'citycode', 'city', 'district', 'adcode', 'location', 'level' ] geocoder = ge.GeoInfoExtractor(api_url, api_key) #addr_list = addr_df["合并地址"] #city_list = ["洛阳"] #extractor.infoExtract(addr_list, city_list, True) #extractor.poiAmend(list(addr_df["门店名称"]), city_list) ############# =================== ############# ############# Using POI Search API ############# api_url = "https://restapi.amap.com/v3/place/text" api_key = "Classified. You can obtain one on AMap Website as an Enterprise." field=['id', 'name', 'type', 'typecode', 'address', 'location', 'pcode', 'pname', 'citycode', \ 'cityname', 'adcode', 'adname', 'business_area', 'timestamp', 'rating', 'cost'] extractor = ps.POISearcher(api_url, api_key) extractor.typefilter = ['060301',\ '060302',\ '060303',\ '060304',\ '060305',\ '060306',\ '060307',\ '060308',\ '060500',\ '060501',\ '060502',\ '060600',\ '060601',\ '060602',\ '060603',\ '060604',\ '060605',\ '060606',\ '060701',\ '060702',\ '060705',\ '060706',\ '060800',\ '060900',\ '060901',\ '060902',\ '060903',\ '060904',\ '060905',\ '060906',\ '060907',\ '061000',\ '061001',\ '061100',\ '061101',\ '061102',\ '061103',\ '061104',\ '061201',\ '061202',\ '061203',\ '061204',\ '061205',\ '061206',\ '061207',\ '061208',\ '061209',\ '061211',\ '061212',\ '061213',\ '061214',\ '061300',\ '061301',\ '061302' ] remove_digits = str.maketrans('', '', digits) kw_list = list( addr_df["门店名称"]) #.apply(lambda x:x.translate(remove_digits)) ct_list = ["洛阳"] * len(kw_list) types = "060000" top = 1 extractor.infoExtract(kw_list, ct_list, types, top, geocoder, list(addr_df["合并地址"])) #地址 ############# =================== ############# structured_data = fr.genStructuredData(extractor.result, field) for f in field: addr_df[f] = list(structured_data[f]) new_filename = 'data/geo_code_basic.xlsx' fr.writeAddressFile(addr_df, new_filename)