def save(text, time_now, file_name): """将抓取下来的流数据处理保存到文本文件""" #判断文件是否存在,若不存在则创建文件并写入头 try: with open(file_name, 'r') as f: f.readline() except FileNotFoundError as e: with open(file_name, 'w', encoding='utf-8') as f: f.write('count,wgs_lng,wgs_lat,time\n') #写入数据 with open(file_name, "a", encoding="utf-8") as f: node_list = json.loads(text)["data"] try: min_count = node_list[0]["count"] for i in node_list: min_count = min(i['count'], min_count) for i in node_list: i['count'] = i['count'] / min_count gcj_lng = 1e-6 * ( 250.0 * i['grid_x'] + 125.0 ) #此处的算法在宜出行网页后台的js可以找到,文件路径是http://c.easygo.qq.com/eg_toc/js/map-55f0ea7694.bundle.js gcj_lat = 1e-6 * (250.0 * i['grid_y'] + 125.0) lng, lat = transCoordinateSystem.gcj02_to_wgs84( gcj_lng, gcj_lat) f.write( str(i['count']) + "," + str(lng) + "," + str(lat) + "," + time_now + "\n") except IndexError as e: pass # print("此区域没有点信息") except TypeError as e: print(node_list) raise CookieException
def save(self, text, time_now, file_name): try: with open(file_name, 'r') as f: f.readline() except FileNotFoundError as e: with open(file_name, 'w', encoding='utf-8') as f: f.write('count,wgs_lng,wgs_lat,time\n') # 写入数据 with open(file_name, "a", encoding="utf-8") as f: if text is None: return node_list = json.loads(text)["data"] try: min_count = json.loads(text)['max_data'] / 40 for i in node_list: # 此处的算法在宜出行网页后台的js可以找到,文件路径是http://c.easygo.qq.com/eg_toc/js/map-55f0ea7694.bundle.js i['count'] = i['count'] / min_count gcj_lng = 1e-6 * (250.0 * i['grid_x'] + 125.0) gcj_lat = 1e-6 * (250.0 * i['grid_y'] + 125.0) lng, lat = transCoordinateSystem.gcj02_to_wgs84( gcj_lng, gcj_lat) f.write( str(i['count']) + "," + str(lng) + "," + str(lat) + "," + time_now + "\n") except IndexError as e: pass # print("此区域没有点信息") except TypeError as e: print(node_list) raise CookieException
def parse_data(self, response): text = response.body_as_unicode() node_list = json.loads(text)["data"] code = json.loads(text)["code"] uin = response.request.cookies['uin'] # print(response.request.headers['User-Agent']) if (node_list != "") and (code == 0): print(uin + ' OK') try: min_count = node_list[0]["count"] for i in node_list: min_count = min(i['count'], min_count) for i in node_list: count = i['count'] / min_count gcj_lng = 1e-6 * ( 250.0 * i['grid_x'] + 125.0 ) #此处的算法在宜出行网页后台的js可以找到,文件路径是http://c.easygo.qq.com/eg_toc/js/map-55f0ea7694.bundle.js gcj_lat = 1e-6 * (250.0 * i['grid_y'] + 125.0) lng, lat = transCoordinateSystem.gcj02_to_wgs84( gcj_lng, gcj_lat) item = EasygoItem(lng=lng, lat=lat, count=count, time=self.time_now_str) yield item except IndexError as e: pass else: print(uin + ' No Data') print(text)
def get_road_info(traffic_json): if len(traffic_json) == 0: return [] roads = traffic_json['trafficinfo']['roads'] roads_json = [] for road in roads: lonlats = road['polyline'].split(';') polyline = [] for lonlat in lonlats: lon, lat = lonlat.split(',') point = gcj02_to_wgs84(float(lon), float(lat)) polyline.append(point) geom = geojson.LineString(polyline) speed = '' if 'speed' in road: # 部分路段无该属性 speed = road['speed'] properties = { 'name': road['name'], 'status': road['status'], 'direction': road['direction'], 'angle': road['angle'], 'speed': speed } road_json = { "type": "Feature", "properties": properties, "geometry": geom } roads_json.append(road_json) return roads_json
def write_to_xlsx(poilist, cityname, classfield): # 一个Workbook对象,这就相当于创建了一个Excel文件 wb = Workbook() ws = wb.active # 第一行(列标题) ws['A1'] = 'lon' ws['B1'] = 'lat' ws['C1'] = 'name' ws['D1'] = 'address' ws['E1'] = 'pname' ws['F1'] = 'cityname' ws['G1'] = 'business_area' ws['H1'] = 'type' for i, poi in enumerate(poilist): row = i + 2 name = poi['name'] address = poi['address'] if poi['address'] else '' pname = poi['pname'] cityname = poi['cityname'] business_area = json.dumps( poi['business_area'], ensure_ascii=False) if poi['business_area'] else '' type = poi['type'] lng = poi['location'].split(",")[0] lat = poi['location'].split(",")[1] if coord == 2: result = gcj02_to_wgs84(float(lng), float(lat)) lng = result[0] lat = result[1] elif coord == 3: result = gcj02_to_bd09(float(lng), float(lat)) lng = result[0] lat = result[1] # 每一行写入 ws['A%s' % row] = lng ws['B%s' % row] = lat ws['C%s' % row] = name ws['D%s' % row] = address ws['E%s' % row] = pname ws['F%s' % row] = cityname ws['G%s' % row] = business_area ws['H%s' % row] = type # 最后,将以上操作保存到指定的Excel文件中 folder = os.path.join(os.getcwd(), 'data') if not os.path.exists(folder): os.mkdir(folder) file_path = os.path.join(folder, 'poi-%s-%s.xlsx' % (cityname, classfield)) wb.save(file_path)
def write_to_excel(poilist, cityname, classfield, coord): # 一个Workbook对象,这就相当于创建了一个Excel文件 book = xlwt.Workbook(encoding='utf-8', style_compression=0) sheet = book.add_sheet(classfield, cell_overwrite_ok=True) # 第一行(列标题) sheet.write(0, 0, 'lon') sheet.write(0, 1, 'lat') sheet.write(0, 2, 'name') sheet.write(0, 3, 'address') sheet.write(0, 4, 'pname') sheet.write(0, 5, 'cityname') sheet.write(0, 6, 'business_area') sheet.write(0, 7, 'type') for i in range(len(poilist)): location = poilist[i]['location'] name = poilist[i]['name'] address = poilist[i]['address'] pname = poilist[i]['pname'] cityname = poilist[i]['cityname'] business_area = poilist[i]['business_area'] type = poilist[i]['type'] lng = str(location).split(",")[0] lat = str(location).split(",")[1] if (coord == "2"): result = gcj02_to_wgs84(float(lng), float(lat)) lng = result[0] lat = result[1] if (coord == "3"): result = gcj02_to_bd09(float(lng), float(lat)) lng = result[0] lat = result[1] # 每一行写入 sheet.write(i + 1, 0, lng) sheet.write(i + 1, 1, lat) sheet.write(i + 1, 2, name) sheet.write(i + 1, 3, address) sheet.write(i + 1, 4, pname) sheet.write(i + 1, 5, cityname) sheet.write(i + 1, 6, business_area) sheet.write(i + 1, 7, type) # 最后,将以上操作保存到指定的Excel文件中 p = Pinyin() p.get_pinyin(cityname) path = "data/poi/" + p.get_pinyin(cityname) + "-" + p.get_pinyin( classfield) + '.xls' book.save(r'' + os.getcwd() + "/" + path) return path
def write_to_csv(poilist, cityname, classfield): data_csv = {} lons, lats, names, addresss, pnames, citynames, business_areas, types = [], [], [], [], [], [], [], [] for i in range(len(poilist)): print('===================') print(poilist[i]) location = poilist[i].get('location') name = poilist[i].get('name') address = poilist[i].get('address') pname = poilist[i].get('pname') cityname = poilist[i].get('cityname') business_area = poilist[i].get('business_area') type = poilist[i].get('type') lng = str(location).split(",")[0] lat = str(location).split(",")[1] if (coord == 2): result = gcj02_to_wgs84(float(lng), float(lat)) lng = result[0] lat = result[1] if (coord == 3): result = gcj02_to_bd09(float(lng), float(lat)) lng = result[0] lat = result[1] lons.append(lng) lats.append(lat) names.append(name) addresss.append(address) pnames.append(pname) citynames.append(cityname) if business_area == []: business_area = '' business_areas.append(business_area) types.append(type) data_csv['lon'], data_csv['lat'], data_csv['name'], data_csv['address'], data_csv['pname'], \ data_csv['cityname'], data_csv['business_area'], data_csv['type'] = \ lons, lats, names, addresss, pnames, citynames, business_areas, types df = pd.DataFrame(data_csv) folder_name = 'poi-' + cityname + "-" + classfield folder_name_full = 'data' + os.sep + folder_name + os.sep if os.path.exists(folder_name_full) is False: os.makedirs(folder_name_full) file_name = 'poi-' + cityname + "-" + classfield + ".csv" file_path = folder_name_full + file_name df.to_csv(file_path, index=False, encoding='utf_8_sig') return folder_name_full, file_name
def save(text, time_now, file_name): """将抓取下来的流数据处理保存到文本文件""" global point_total #判断文件是否存在,若不存在则创建文件并写入头行 try: with open(file_name, mode='r') as f: f.readline() except FileNotFoundError as e: with open(file_name, mode='w') as f: f.write('count,wgs_lng,wgs_lat,time\n') finally: f.close() #写入数据, append, encoding="utf-8" with open(file_name, mode="a") as f: node_list = json.loads(text)["data"] try: min_count = node_list[0]["count"] for i in node_list: min_count = min(i['count'], min_count) for i in node_list: i['count'] = i['count'] / min_count #此处的算法在宜出行网页后台的js可以找到, #文件路径是http://c.easygo.qq.com/eg_toc/js/map-55f0ea7694.bundle.js gcj_lng = 1e-6 * (250.0 * i['grid_x'] + 125.0) gcj_lat = 1e-6 * (250.0 * i['grid_y'] + 125.0) lng, lat = transCoordinateSystem.gcj02_to_wgs84( gcj_lng, gcj_lat) point_total += i['count'] f.write( str(i['count']) + "," + str(lng) + "," + str(lat) + "," + time_now + "\n") except IndexError as e: #print("Save1 IndexError: 此区域没有点信息") pass except TypeError as e: print("saveProc TypeError:(text=%s, node=%s) " % (str(text), node_list)) f.write("saveProc TypeError:(text=%s, node=%s) " % (str(text), node_list)) """ save: http://ui.ptlogin2.qq.com/cgi-bin/login?appid=1600000601&style=9&s_url=http%3A%2F%2Fc.easygo.qq.com%2Feg_toc%2Fmap.html """ raise CookieException """如果同一个QQ号在一天内频繁登陆,则报错: 该用户访问次数过多,CookieExcepton启动,该用户访问次数过多,操作太频繁,明天试一试! """ finally: f.close() # close file, write end flag to file
def write_to_excel(poilist, cityname, classfield): # 一个Workbook对象,这就相当于创建了一个Excel文件 book = xlwt.Workbook(encoding='utf-8', style_compression=0) sheet = book.add_sheet(classfield, cell_overwrite_ok=True) # 第一行(列标题) sheet.write(0, 0, 'lon') sheet.write(0, 1, 'lat') sheet.write(0, 2, 'name') sheet.write(0, 3, 'address') sheet.write(0, 4, 'pname') sheet.write(0, 5, 'cityname') sheet.write(0, 6, 'business_area') sheet.write(0, 7, 'type') for i in range(len(poilist)): location = poilist[i].get('location') name = poilist[i].get('name') address = poilist[i].get('address') pname = poilist[i].get('pname') cityname = poilist[i].get('cityname') business_area = poilist[i].get('business_area') type = poilist[i].get('type') lng = str(location).split(",")[0] lat = str(location).split(",")[1] if (coord == 2): result = gcj02_to_wgs84(float(lng), float(lat)) lng = result[0] lat = result[1] if (coord == 3): result = gcj02_to_bd09(float(lng), float(lat)) lng = result[0] lat = result[1] # 每一行写入 sheet.write(i + 1, 0, lng) sheet.write(i + 1, 1, lat) sheet.write(i + 1, 2, name) sheet.write(i + 1, 3, address) sheet.write(i + 1, 4, pname) sheet.write(i + 1, 5, cityname) sheet.write(i + 1, 6, business_area) sheet.write(i + 1, 7, type) # 最后,将以上操作保存到指定的Excel文件中 book.save(r'data' + os.sep + 'poi-' + cityname + "-" + classfield + ".xls")
def call_back_fun(data_object): data_list = list() try: # 解析为Json字符串 poi_number = data_object['count'] poi_list = data_object['pois'] for one_poi in poi_list: poi_name = "" if "name" in one_poi.keys(): poi_name = one_poi['name'] poi_type = "" if "type" in one_poi.keys(): poi_type = one_poi['type'] address = "" if "address" in one_poi.keys(): address = one_poi['address'] longitude = 0 latitude = 0 if "location" in one_poi.keys(): longitude_t, latitude_t = one_poi['location'].split(',') longitude, latitude = gcj02_to_wgs84(eval(longitude_t), eval(latitude_t)) province_name = "" if "pname" in one_poi.keys(): province_name = one_poi['pname'] city_name = "" if "cityname" in one_poi.keys(): city_name = one_poi['cityname'] address_name = "" if "adname" in one_poi.keys(): address_name = one_poi['adname'] type_code = "" if "typecode" in one_poi.keys(): type_code = one_poi['typecode'] data_list.append([ province_name, city_name, address_name, poi_name, poi_type, type_code, longitude, latitude, address ]) # print(len(data_list)) except Exception as e: print('json文本解析出现异常 --->>> {0}'.format(data_object)) return False return data_list
def get_border(key, keywords): resjson = get_resjson(key, keywords, 0, 'all') district = resjson["districts"][0] if 'polyline' not in district: return {} cords = district['polyline'].split('|') polylines = [] for cord in cords: lonlats = cord.split(';') polyline = [] for lonlat in lonlats: lon, lat = lonlat.split(',') point = gcj02_to_wgs84(float(lon), float(lat)) polyline.append(point) polylines.append(polyline) geom = geojson.MultiLineString(polylines) # 多边形的拓扑关系不明,故使用线 properties = {'citycode': district['citycode'], 'adcode': district['adcode'], 'name': district['name'], 'level': district['level']} border = {"type": "Feature", "properties": properties, "geometry": geom} return border
def write_to_geojson(poilist, filename='poi_geo'): poi_json = [] n = len(poilist) for i in range(n): if 'location' not in poilist[i]: continue location = poilist[i]['location'] lon, lat = str(location).split(",") result = gcj02_to_wgs84(float(lon), float(lat)) # 坐标纠正 geom = geojson.Point((result[0], result[1])) properties = { 'id': poilist[i]['id'], 'name': poilist[i]['name'], 'address': poilist[i]['address'], 'adname': poilist[i]['adname'], 'typecode': poilist[i]['typecode'], 'type': poilist[i]['type'] } temp = {"type": "Feature", "properties": properties, "geometry": geom} poi_json.append(temp) pois_json = {"type": "FeatureCollection", "features": poi_json} res_file = open(filename + ".geojson", 'w') res_file.write(geojson.dumps(pois_json) + '\n') res_file.close()
def write_to_excel(poilist, citycode, classfield, coord): # 一个Workbook对象,这就相当于创建了一个Excel文件 book = xlwt.Workbook(encoding='utf-8', style_compression=0) sheet = book.add_sheet("sheet1", cell_overwrite_ok=True) # 第一行(列标题) sheet.write(0, 0, 'lon') sheet.write(0, 1, 'lat') sheet.write(0, 2, 'name') sheet.write(0, 3, 'address') sheet.write(0, 4, 'pname') sheet.write(0, 5, 'cityname') sheet.write(0, 6, 'adcode') sheet.write(0, 7, 'adname') sheet.write(0, 8, 'business_area') sheet.write(0, 9, 'type') sheet.write(0, 10, 'id') index = 0 if len(poilist) == 0: return for i in range(len(poilist)): location = poilist[i]['location'] name = poilist[i]['name'] address = poilist[i]['address'] pname = poilist[i]['pname'] cityname = poilist[i]['cityname'] business_area = poilist[i]['business_area'] type = poilist[i]['type'] id = poilist[i]['id'] adcode = poilist[i]['adcode'] adname = poilist[i]['adname'] #根据adcode判断当前数据是否属于当前所需要的城市 根据城市编码前四位判断 if adcode[:3] != citycode[:3]: continue lng = str(location).split(",")[0] lat = str(location).split(",")[1] if (coord == "2"): result = gcj02_to_wgs84(float(lng), float(lat)) lng = result[0] lat = result[1] if (coord == "3"): result = gcj02_to_bd09(float(lng), float(lat)) lng = result[0] lat = result[1] # 每一行写入 sheet.write(index + 1, 0, lng) sheet.write(index + 1, 1, lat) sheet.write(index + 1, 2, name) sheet.write(index + 1, 3, address) sheet.write(index + 1, 4, pname) sheet.write(index + 1, 5, cityname) sheet.write(index + 1, 6, adcode) sheet.write(index + 1, 7, adname) sheet.write(index + 1, 8, business_area) sheet.write(index + 1, 9, type) sheet.write(index + 1, 10, id) index = index + 1 # 最后,将以上操作保存到指定的Excel文件中 p = Pinyin() data_path = os.getcwd() + os.sep + "data" + os.sep + "poi" + os.sep if not os.path.exists(data_path): os.mkdir(data_path) path = data_path + str(classfield) + '.xls' book.save(r'' + path) print('写入成功') return path
def transfer(orgcoord, targetcoord, filename): ''' 坐标转换 默认第一二列为经纬度 :param filename: :return: ''' workbook = xlrd.open_workbook(filename) new_workbook = copy(workbook) new_worksheet = new_workbook.get_sheet(0) sheet = workbook.sheets()[0] index = 0 for i in range(1, sheet.nrows): lon, lat = sheet.cell_value(i, 0), sheet.cell_value(i, 1) # 坐标转换 if orgcoord == "1": if targetcoord == "1": pass elif targetcoord == "2": result = gcj02_to_wgs84(float(lon), float(lat)) lon = result[0] lat = result[1] elif targetcoord == "3": result = gcj02_to_bd09(float(lon), float(lat)) lon = result[0] lat = result[1] elif orgcoord == "2": if targetcoord == "1": result = wgs84_to_gcj02(float(lon), float(lat)) lon = result[0] lat = result[1] elif targetcoord == "2": pass elif targetcoord == "3": result = wgs84_to_bd09(float(lon), float(lat)) lon = result[0] lat = result[1] elif orgcoord == "3": if targetcoord == "1": result = bd09_to_gcj02(float(lon), float(lat)) lon = result[0] lat = result[1] elif targetcoord == "2": result = bd09_to_wgs84(float(lon), float(lat)) lon = result[0] lat = result[1] elif targetcoord == "3": pass for j in range(sheet.ncols): if index == 0: new_worksheet.write(i - 1, sheet.ncols + 1, 'lon-new') new_worksheet.write(i - 1, sheet.ncols + 2, 'lat-new') else: new_worksheet.write(i - 1, sheet.ncols + 1, lon) new_worksheet.write(i - 1, sheet.ncols + 2, lat) index = index + 1 new_file_name = "upload/" + str(filename).split("/")[-1].split( ".")[0] + "-new" + ".xls" new_file_path = os.path.abspath(os.getcwd()) + "/" + new_file_name new_workbook.save(new_file_path) # 保存工作簿 return new_file_name
def write_to_csv(poilist, provincename, classfield): data_csv = {} lons, lats, names, addresss, pnames, business_areas,citynames, adnames, types, typecodes\ , type_1s, type_2s, type_3s, type_4s, ids = [], [], [], [], [], [], [], [], [], [], [], [], [], [], [] for i in range(len(poilist)): location = poilist[i]['location'] name = poilist[i]['name'] address = poilist[i]['address'] pname = poilist[i]['pname'] #provincename = poilist[i]['provincename'] business_area = poilist[i]['business_area'] cityname = poilist[i]['cityname'] adname = poilist[i]['adname'] type = poilist[i]['type'] typecode = poilist[i]['typecode'] lng = str(location).split(",")[0] lat = str(location).split(",")[1] id = poilist[i]['id'] type = str(type) type_1 = '' type_2 = '' type_3 = '' type_4 = '' if str(type) != None and str(type) != '': type_strs = type.split(';') for i in range(len(type_strs)): ty = type_strs[i] if i == 0: type_1 = ty elif i == 1: type_2 = ty elif i == 2: type_3 = ty elif i == 3: type_4 = ty if (coord == 2): result = gcj02_to_wgs84(float(lng), float(lat)) lng = result[0] lat = result[1] if (coord == 3): result = gcj02_to_bd09(float(lng), float(lat)) lng = result[0] lat = result[1] lons.append(lng) lats.append(lat) names.append(name) addresss.append(address) citynames.append(cityname) adnames.append(adname) pnames.append(pname) #provincenames.append(provincename) if business_area == []: business_area = '' business_areas.append(business_area) types.append(type) typecodes.append(typecode) type_1s.append(type_1) type_2s.append(type_2) type_3s.append(type_3) type_4s.append(type_4) ids.append(id) data_csv['lon'], data_csv['lat'], data_csv['name'], data_csv['address'], data_csv['pname'], \ data_csv['business_area'], data_csv['cityname'], data_csv['adname'], data_csv['type'], data_csv['typecode'], \ data_csv['type1'], data_csv['type2'], data_csv['type3'], data_csv['type4'], data_csv['id'] = \ lons, lats, names, addresss, pnames, business_areas, citynames, adnames, types, typecodes, type_1s, type_2s, type_3s, type_4s, ids pin = Pinyin() provincename_pinyin = pin.get_pinyin(provincename) # 默认分割符为- df = pd.DataFrame(data_csv) folder_name_full = 'data' + os.sep + provincename_pinyin + os.sep #+ classfield + os.sep if os.path.exists(folder_name_full) is False: os.makedirs(folder_name_full) file_name = 'poi-' + provincename_pinyin + "-" + classfield + ".csv" file_path = folder_name_full + file_name df.to_csv(file_path, index=False, encoding='utf_8_sig') print('写入地址:', folder_name_full, file_name) return folder_name_full, file_name
def write_to_csv(poilist, citycode, classfield, coord): data_csv = {} lons, lats, names, addresss, pnames, citynames, business_areas, types, typecodes, ids, type_1s, type_2s, type_3s, type_4s = [], [], [], [], [], [], [], [], [], [], [], [], [], [] if len(poilist) == 0: print("处理完成,当前citycode:" + str(citycode), ", classfield为:", str(classfield) + ",数据为空,,,结束.......") return None, None for i in range(len(poilist)): location = poilist[i].get('location') name = poilist[i].get('name') address = poilist[i].get('address') pname = poilist[i].get('pname') cityname = poilist[i].get('cityname') business_area = poilist[i].get('business_area') type = poilist[i].get('type') typecode = poilist[i].get('typecode') lng = str(location).split(",")[0] lat = str(location).split(",")[1] id = poilist[i].get('id') if (coord == 2): result = gcj02_to_wgs84(float(lng), float(lat)) lng = result[0] lat = result[1] if (coord == 3): result = gcj02_to_bd09(float(lng), float(lat)) lng = result[0] lat = result[1] type_1, type_2, type_3, type_4 = '', '', '', '' if str(type) != None and str(type) != '': type_strs = type.split(';') for i in range(len(type_strs)): ty = type_strs[i] if i == 0: type_1 = ty elif i == 1: type_2 = ty elif i == 2: type_3 = ty elif i == 3: type_4 = ty lons.append(lng) lats.append(lat) names.append(name) addresss.append(address) pnames.append(pname) citynames.append(cityname) if business_area == []: business_area = '' business_areas.append(business_area) types.append(type) typecodes.append(typecode) ids.append(id) type_1s.append(type_1) type_2s.append(type_2) type_3s.append(type_3) type_4s.append(type_4) data_csv['lon'], data_csv['lat'], data_csv['name'], data_csv['address'], data_csv['pname'], \ data_csv['cityname'], data_csv['business_area'], data_csv['type'], data_csv['typecode'], data_csv['id'], data_csv[ 'type1'], data_csv['type2'], data_csv['type3'], data_csv['type4'] = \ lons, lats, names, addresss, pnames, citynames, business_areas, types, typecodes, ids, type_1s, type_2s, type_3s, type_4s df = pd.DataFrame(data_csv) folder_name = 'poi-' + citycode + "-" + classfield folder_name_full = 'data' + os.sep + folder_name + os.sep if os.path.exists(folder_name_full) is False: os.makedirs(folder_name_full) file_name = 'poi-' + citycode + "-" + classfield + ".csv" file_path = folder_name_full + file_name df.to_csv(file_path, index=False, encoding='utf_8_sig') print('写入成功') return folder_name_full, file_name