def spider(startIndex, endIndex): sqlparam = " and ptype = '" + poi_type + "'" sqlstart = " and tt.rowno >=" + startIndex sqlend = " and rownum <=" + endIndex print("查询" + city + "的" + poi_type + "数据") #查询对应city的grid1600数据 ora = cxOracle(dbname, dbpass, dbaddr) # 查询栅格表 rs = ora.Query( "select tt.* from (" "select t.*,rownum as rowno from(" "SELECT city, gridid, ptype, id, name FROM PY_POI WHERE CITY ='" + city_id + "' " + sqlparam + " " "AND ID NOT IN (SELECT ID FROM PY_POI_POINT WHERE CITY ='" + city_id + "' " + sqlparam + ") AND (ISSEL IS NULL or ISBONS ='1')" "ORDER BY GRIDID ASC)t where 1=1 " + sqlend + ")tt where 1=1 " + sqlstart) proxy_ip = get_proxy() if proxy_ip == None: raise NameError('代理池返回ip为空') for f in rs: print("GRID=" + str(f[1]) + ",ID=" + str(f[3])) OBJECTID = str(f[1]) id = str(f[3]) while True: # 使用while循环不断获取数据 datas = "" area = 0 try: bouns = getBounById2(id, proxy_ip) #bouns = getBounById(id) print("bouns=" + str(bouns)) for bound in bouns: if bound != None: lon = bound[0] lat = bound[1] area = bound[2] datas += str(lon) + "," + str(lat) + ";" print("area=" + str(area) + ",datas=" + str(datas)) print("ComputeArea=" + str(ComputeArea(datas[:-1]))) if abs(area - ComputeArea(datas[:-1])) < 2000: for bound in bouns: if bound != None: lon = bound[0] lat = bound[1] lon1, lat1 = gcj02towgs84(float(lon), float(lat)) sql2 = "insert into py_poi_point values('" + city_id + "','" + OBJECTID + "','" + poi_type + "','" + id + "'," + str( lon) + "," + str(lat) + "," + str( lon1) + "," + str(lat1) + ")" ora.Exec(sql2) break time.sleep(random.randint(1, 3)) except Exception as e: e = str(e).replace('\'', '`') print(e) sql3 = "insert into PY_POI_EXCEPTION values('" + city_id + "','" + OBJECTID + "','" + poi_type + "','" + id + "','" + e + "',sysdate)" ora.Exec(sql3) # 换ip proxy_ip = get_proxy() if proxy_ip == None: raise NameError('代理池返回ip为空') # 更新poi字段为已查询 sql4 = "" if area == 0: sql4 = "UPDATE PY_POI SET ISSEL = '1',ISBONS = '0' WHERE ID = '" + id + "' " else: sql4 = "UPDATE PY_POI SET ISSEL = '1',ISBONS = '1' WHERE ID = '" + id + "' " ora.Exec(sql4) time.sleep(random.randint(1, 3)) print(city + ":" + poi_type + "的" + startIndex + "-" + endIndex + "的数据爬取完成")
datas = "" area = 0 try: bouns = getBounById2(id, proxy_ip) for bound in bouns: if bound != None: lon = bound[0] lat = bound[1] area = bound[2] datas += str(lon) + "," + str(lat) + ";" if abs(area - ComputeArea(datas[:-1])) < 1: for bound in bouns: if bound != None: lon = bound[0] lat = bound[1] lon, lat = gcj02towgs84(float(lon), float(lat)) sql2 = "insert into py_poi_point values('" + city_id + "','" + OBJECTID + "','" + poi_type + "','" + id + "'," + str( lon) + "," + str(lat) + ")" ora.Exec(sql2) break except Exception as e: e = str(e).replace('\'', '`') print(e) sql3 = "insert into PY_POI_EXCEPTION values('" + city_id + "','" + OBJECTID + "','" + poi_type + "','" + id + "','" + e + "',sysdate)" ora.Exec(sql3) # 换ip proxy_ip = get_proxy() if proxy_ip == None: raise NameError('代理池返回ip为空') # 更新poi字段为已查询 sql4 = "UPDATE PY_POI SET ISSEL = '1' WHERE ID = '" + id + "' "
if pois != None: id = pois[i]['id'] name = pois[i]['name'] center_x, center_y = pois[i]['location'].split(",") pname = pois[i]['pname'] pcode = pois[i]['pcode'] cityname = pois[i]['cityname'] citycode = pois[i]['citycode'] adname = pois[i]['adname'] adcode = pois[i]['adcode'] address = pois[i]['address'] if len(address) == 0: address = "" type = pois[i]['type'] # 插入数据 center_x, center_y = gcj02towgs84(float(center_x), float(center_y)) sql = "insert into py_poi values('" + city_id + "','" + OBJECTID + "','" + poi_type + "','" + id + "','" + name + "'," + str( center_x ) + "," + str( center_y ) + ",'" + pname + "','" + pcode + "','" + cityname + "','" + citycode + "','" + adname + "','" + adcode + "','" + address + "','" + type + "') " ora.Exec(sql) # 查找边界并插入 try: bouns = getBounById2(id) for bound in bouns: if bound != None: lon = bound[0] lat = bound[1] lon, lat = gcj02towgs84(float(lon), float(lat)) sql2 = "insert into py_poi_point values('" + city_id + "','" + OBJECTID + "','" + poi_type + "','" + id + "'," + str(