def test_geo_getitem(self): data = { "A": range(5), "B": range(-5, 0), "location": [Point(x, y) for x, y in zip(range(5), range(5))], } df = GeoDataFrame(data, crs=self.crs, geometry="location") assert isinstance(df.geometry, GeoSeries) df["geometry"] = df["A"] assert isinstance(df.geometry, GeoSeries) assert df.geometry[0] == data["location"][0] # good if this changed in the future assert not isinstance(df["geometry"], GeoSeries) assert isinstance(df["location"], GeoSeries) df["buff"] = df.buffer(1) assert isinstance(df["buff"], GeoSeries) df["array"] = from_shapely( [Point(x, y) for x, y in zip(range(5), range(5))]) assert isinstance(df["array"], GeoSeries) data["geometry"] = [ Point(x + 1, y - 1) for x, y in zip(range(5), range(5)) ] df = GeoDataFrame(data, crs=self.crs) assert isinstance(df.geometry, GeoSeries) assert isinstance(df["geometry"], GeoSeries) # good if this changed in the future assert not isinstance(df["location"], GeoSeries)
def calc_surrounding_area(zone_gdf, buffer_m): geometry_without_holes = zone_gdf.convex_hull geometry_without_holes_gdf = Gdf(geometry=geometry_without_holes.values) geometry_without_holes_gdf["one_class"] = "buildings" geometry_merged = geometry_without_holes_gdf.dissolve(by='one_class', aggfunc='sum') geometry_merged_final = Gdf(geometry=geometry_merged.convex_hull) new_buffer = Gdf(geometry=geometry_merged_final.buffer(buffer_m)) area = overlay(geometry_merged_final, new_buffer, how='symmetric_difference') # THIS IS ANOTHER METHOD, NOT FUNCTIONAL THOUGH # from shapely.ops import Point # # new GeoDataFrame with same columns # points = [] # # Extraction of the polygon nodes and attributes values from polys and integration into the new GeoDataFrame # for index, row in zone_gdf.iterrows(): # for j in list(row['geometry'].exterior.coords): # points.append(Point(j)) # # concave_hull, edge_points = alpha_shape(points, alpha=0.1) # simple_polygons = [x for x in concave_hull] # geometry_merged_final = Gdf(geometry=simple_polygons) # geometry_merged_final.plot() # plt.show() # new_buffer = Gdf(geometry=geometry_merged_final.buffer(buffer_m)) # area = overlay(geometry_merged_final, new_buffer, how='symmetric_difference') # area.plot() return area, geometry_merged_final
def by_buffer( ref: gpd.GeoDataFrame, add: gpd.GeoDataFrame, name: str, buffer: float, sum_cols: list = ['median', 'mean', 'max', 'min', 'sum']) -> gpd.GeoDataFrame: """ Merge a reference and additional geo dataframe by summarising features of additional within a buffer of points in reference Parameters ---------- ref : gpd.GeoDataFrame Reference spatial dataset - usually address base or gazatteer. add : gpd.GeoDataFrame New spatial dataset to add. name : str Name to append to add columns. buffer : float Buffer around points in reference to summarise add in. sum_cols : TYPE, optional What summary variables to produce. The default is ['median', 'mean', 'max', 'min', 'sum']: list. Returns ------- df : gpd.GeoDataFrame ref with a summary of add left joined. """ add = add.copy() add_cols = add.columns add_agg = {col: sum_cols for col in add_cols if col != 'geometry'} ref = ref.copy() ref_buff = ref[['uprn', 'geometry']].assign(geometry=ref.buffer(buffer)) ref_join = (gpd.sjoin( ref_buff, add, how='left').drop(columns=['index_right', 'geometry'])) ref_sum = (ref_join.groupby("uprn").agg(add_agg).reset_index()) ref_sum.columns = ["_".join([a, b]) for (a, b) in ref_sum.columns] ref_sum.columns = ref_sum.columns.str.replace("_$", "") ref_sum.columns = [f"{col}_{name}" for col in ref_sum.columns] ref_sum.columns = ref_sum.columns.str.replace(f"uprn_{name}", "uprn") df = ref.merge(ref_sum) return df
def _transform_and_buffer(self, aoi: GeoDataFrame) -> GeoDataFrame: """Buffer the aoi. If training, then we take a negative buffer, equal to the smallest whole number closest to one half the chip size, so chips always fall completely in the aoi. Otherwise, it's a positive buffer equal to the distance from the center of a chip to any corner, ensuring that the full aoi is covered by all chips in a grid""" if aoi is not None: aoi = aoi.to_crs(self.crs) if self.buffer_aoi: if self.mode == "train": buf = floor(-1 * (self.feature_chip_size / 2) * self.res) else: buf = (self.feature_chip_size / 2) * self.res * sqrt(2) buffed_gds = aoi.buffer(buf) return gpd.GeoDataFrame(geometry=buffed_gds) return aoi return None
def clip_to_polygon(in_path: str, bounding_gdf: gpd.GeoDataFrame, buffer=None, out_path=None): """clips a raster, saves the clipped raster, opens, and returns a handle to it Arguments: in_path {str} -- Path of original raster bounding_gdf {gpd.GeoDataFrame} -- gdf where the first line is the bounding geometry """ if buffer: bounding_gdf = bounding_gdf.buffer(buffer, join_style=2) def get_features(gdf): # Function to parse features from GeoDataFrame in such a manner that rasterio wants them return [json.loads(gdf.to_json())['features'][0]['geometry']] geometry = get_features(bounding_gdf) with rasterio.open(in_path) as src: out_image, out_transform = rasterio.mask.mask(src, geometry, crop=True) out_meta = src.meta out_meta.update( { "driver": "GTiff", "height": out_image.shape[1], "width": out_image.shape[2], "transform": out_transform, } ) if out_path is None: out_path = in_path.split(".tif")[0] + "_clipped.tif" with rasterio.open(out_path, "w", **out_meta) as dest: dest.write(out_image) return rasterio.open(out_path)
fill_color="black", fill_opacity=1).add_to(my_map) my_map.save("matched_route.html") ###################################################### # build a geodataframe with VIASAT data geometry = [Point(xy) for xy in zip(viasat.longitude, viasat.latitude)] # viasat = viasat.drop(['longitude', 'latitude'], axis=1) crs = {'init': 'epsg:4326'} viasat_gdf = GeoDataFrame(viasat, crs=crs, geometry=geometry) # viasat_gdf.plot() # Buffer the points by some units (unit is kilometer) buffer = viasat_gdf.buffer(0.00025) #50 meters # this is a geoseries # buffer.plot() # make a dataframe buffer_viasat = pd.DataFrame(buffer) buffer_viasat.columns = ['geometry'] type(buffer_viasat) # transform a geoseries into a geodataframe # https://gis.stackexchange.com/questions/266098/how-to-convert-a-geoserie-to-a-geodataframe-with-geopandas ## circumscript the area of the track (buffer zone) # union = buffer.unary_union # envelope = union.envelope # rectangle_viasat = gpd.GeoDataFrame(geometry=gpd.GeoSeries(envelope)) # rectangle_viasat.plot() # geodataframe with edges
def transform_islands(islands: GeoDataFrame) -> GeoDataFrame: islands.geometry = islands.buffer(0) islands = islands[['Id', 'geometry']] islands['FID'] = islands.Id - 1 islands = islands.set_index('FID', drop=False, verify_integrity=True) return islands
def request_netatmo(client_ID: str, client_secret: str, pw: str, username: str, bounding_gdf: gpd.GeoDataFrame, areal_buffer: float, output_crs: str = "EPSG:32632", verbose=False): # TODO: update docstring for new inputs """Gets precipitation stations through netatmos api Arguments: client_ID {str} -- client_secret {str} -- pw {str} -- password username {str} -- username bounding_gdf {gpd.GeoDataFrame} -- gdf where first line has bounding geometry Keyword Arguments: output_crs {str} -- Output coordinate reference system (default: {'EPSG:32632'}) Raises: Exception: [description] Exception: [description] Returns: gpd.GeoDataFrame -- Georeferenced dataframe of the values """ # Authentication auth_params = { "client_id": client_ID, "client_secret": client_secret, "grant_type": "password", "username": username, "password": pw, "scope": "read_station", } auth_endpoint = "https://api.netatmo.com/oauth2/token" auth = requests.post(auth_endpoint, auth_params) auth_json = auth.json() if auth.status_code != 200: raise Exception(f"token request failed, response: {auth.text}") else: if verbose: print(f"token request succeded.") token = auth_json["access_token"] names = ("lon_sw", "lat_sw", "lon_ne", "lat_ne") coords = (bounding_gdf.buffer(areal_buffer).to_crs( "epsg:4326").geometry.iloc[0].bounds) bbox = dict(zip(names, coords)) if verbose: print( f"after {areal_buffer}m buffer, requested bounding box was: {bbox}" ) # Get data endpoint = "https://api.netatmo.com/api/getpublicdata" parameters = { **bbox, "required_data": "rain", } r = requests.get(endpoint, parameters, headers={"Authorization": "Bearer " + token}) json = r.json() if r.status_code != 200: raise Exception(f"data request returned error code {r.status_code}.\ {json['error']['message']}") else: if verbose: print("data resquest succeded") # Initial processing df_raw = pd.DataFrame.from_dict(json) df = pd.DataFrame.from_records(df_raw.body) df = df.drop(["measures", "modules", "module_types"], axis=1) df["lon"] = df.place.apply(lambda x: x["location"][0]) df["lat"] = df.place.apply(lambda x: x["location"][1]) df["masl"] = df.place.apply(lambda x: x["altitude"]) df["country"] = df.place.apply(lambda x: x["country"]) df = df.drop(["place"], axis=1) df = df[df.country == "NO"] # filters by stations in norway df = df.rename(columns={"_id": "id"}) # Generate GeoDataFrame gdf = gpd.GeoDataFrame(df) gdf["geometry"] = gdf.apply(lambda x: Point(x["lon"], x["lat"]), axis=1) gdf = gdf.drop(["lon", "lat"], axis=1) # Assign, then change the crs gdf.crs = CRS.from_epsg(4326) gdf = gdf.to_crs(output_crs) gdf["source"] = "NETATMO" gdf['owner'] = "PRIVATE" gdf["resolution"] = 60 if bounding_gdf is not None: bound_polygon = bounding_gdf.geometry.iloc[0] clipped_gdf = gdf[gdf.geometry.within(bound_polygon)] removed_stations = len(gdf) - len(clipped_gdf) if verbose: print( f"{removed_stations} statinos exceeded study area and were removed." ) return clipped_gdf
def populate_scan_environment(): print "[-] Loading accounts" accounts = get_available_accounts_queue() print "[-] Loading accounts - Done" print "[-] Loading proxies" proxies = get_available_proxies_queue() print "[-] Loading proxies - Done" print "[-] Loading shapefiles" cities = GeoDataFrame.from_file('./geodata/ca_cities/Cities2015.shp') water = GeoDataFrame.from_file('./geodata/baywater/bayarea_allwater.shp') print "[-] Loading shapefiles - Done" print "[-] Reduce + crop geometry" #lng1 = -122.6 #lat1 = 37.1 #lng2 = -121.5 #lat2 = 38 lng1 = -122.457 lat1 = 37.76 lng2 = -122.402 lat2 = 37.80 # This is the cropped area - full geojson is all cities in CA bayarea = Polygon([(lng1, lat1), (lng2,lat1), (lng2,lat2), (lng1,lat1)]) bayarea_crop_frame = GeoDataFrame(geometry=GeoSeries([bayarea]),crs={'init': 'epsg:4269'}) ca_cities = cities.intersection(bayarea_crop_frame.geometry.unary_union) ca_water = water.to_crs(ca_cities.crs) ca_cities_clean = ca_cities.difference(ca_water.geometry.unary_union) simple_ca = GeoDataFrame(geometry=GeoSeries(ca_cities_clean.buffer(0).geometry.unary_union), crs={'init': 'epsg:4269'}) # Buffer and reduce complexity of geometry - covers costal regions + improves performance simple_ca = simple_ca.buffer(0.003).simplify(0.01, preserve_topology=True) print "[-] Reduce + crop geometry - Done" print "[-] Generate coverings" circle_covering = create_circle_covering(70, 50, 65) norm_covering = transform_and_normalize_circle_covering(circle_covering, ca_cities_clean.crs) search_rects = chunked_rect_covering(lng1 - 0.05, lat1 - 0.1, 0.038, 30, 30, (simple_ca)) print "[-] Generate coverings - Done" with open('scanallocation.csv', 'wb') as csvfile: spamwriter = csv.writer(csvfile, delimiter=',') allocated_proxy_count = 0 allocated_region_count = 0 allocated_account_count = 0 for i, r in enumerate(tqdm(search_rects.geometry, position=0)): regions = get_scan_region_covering(i, r, ca_cities_clean, norm_covering).geometry # one ip for every 40*45 scans (40 workers per ip) for region_group_ips in chunks(regions, 40 * 45): try: allocated_proxy_count += 1 proxy = proxies.pop() except Exception as e: print "\nRan out of --- proxies --- {} regions, using {} accounts, using {} proxies".\ format(allocated_region_count, allocated_account_count, allocated_proxy_count) return # one worker for every 45 scans for i, region_group in enumerate(chunks(region_group_ips, 45)): try: allocated_account_count += 1 allocate_account = accounts.pop() except Exception as e: print "\nRan out of --- accounts --- {} regions, using {} accounts, using {} proxies".\ format(allocated_region_count, allocated_account_count, allocated_proxy_count) return allocated_region_count += 1 for region in region_group: # proxy, username, password, region_lat, region_lng spamwriter.writerow(["https://"+proxy, allocate_account["username"], allocate_account["password"], region.centroid.x, region.centroid.y]) print "\nDone!: {} regions, using {} accounts, using {} proxies".\ format(allocated_region_count, allocated_account_count, allocated_proxy_count)
fill=True, fill_color="black", fill_opacity=1).add_to(my_map) my_map.save("matched_route.html") ###################################################### # build a geodataframe with VIASAT data geometry = [Point(xy) for xy in zip(viasat.longitude, viasat.latitude)] # viasat = viasat.drop(['longitude', 'latitude'], axis=1) crs = {'init': 'epsg:4326'} viasat_gdf = GeoDataFrame(viasat, crs=crs, geometry=geometry) # viasat_gdf.plot() # Buffer the points by some units (unit is kilometer) buffer = viasat_gdf.buffer( buffer_diam) # 25 meters # this is a geoseries (0.00025) # buffer.plot() # make a dataframe buffer_viasat = pd.DataFrame(buffer) buffer_viasat.columns = ['geometry'] # geodataframe with edges type(gdf_edges) # gdf_edges.plot() ''' ## circumscript the area of the track (buffer zone) union = buffer.unary_union envelope = union.envelope rectangle_viasat = gpd.GeoDataFrame(geometry=gpd.GeoSeries(envelope)) rectangle_viasat.plot()