def subset_cophub_from_esa( esa_df: gpd.GeoDataFrame, cop_df: gpd.GeoDataFrame, outdir: Optional[Union[Path, str]] = Path(os.getcwd()), save_file: Optional[bool] = True) -> gpd.GeoDataFrame: """Subsets Cophub list based on the ESA list if their Geometry(Footprint) intersects for a given day. :param esa_df: The GeoDataFrame with attributes for ESA S3 listing. :param cop_df: The GeoDataFrame with attributes for Cophub(NCI) file listing. :param save_file: Flag to save the subset download list of not. :return: GeoDataFrame with Cophub FRP product footprint that intersects with ESA FRP footprint for a given day. """ # convert datetime string to datetime stamp esa_df['start_date'] = pd.to_datetime(esa_df["start_date"], format="%Y%m%dT%H%M%S") cop_df['start_date'] = pd.to_datetime(cop_df["start_date"], format="%Y%m%dT%H%M%S") # assign crs to GeoDataFrame esa_df.crs = 'EPSG:4326' cop_df.crs = 'EPSG:4326' esa_df['date'] = pd.to_datetime(esa_df['start_date']).dt.date cop_df['date'] = pd.to_datetime(cop_df['start_date']).dt.date column_names = [ "title", "start_date", "sensor", 'relative_orbit', 'geometry' ] cop_download_df = gpd.GeoDataFrame(columns=column_names) for idx_esa, esa_row in esa_df.iterrows(): cophub_df_subset = cop_df[cop_df['date'][:] == esa_row['date']].copy() cophub_df_subset['intersects'] = False esa_geom = esa_row['geometry'].buffer(0) for idx_cop, cop_row in cophub_df_subset.iterrows(): cop_geom = cop_row['geometry'].buffer(0) if esa_geom.intersects(cop_geom): cop_download_df = cop_download_df.append( { 'title': cop_row["title"], 'start_date': cop_row['start_date'], 'sensor': cop_row['sensor'], 'relative_orbit': cop_row['relative_orbit'], 'geometry': cop_row['geometry'], 'esa_geometry': esa_row['geometry'] }, ignore_index=True) subset_cop_download_df = cop_download_df.drop_duplicates(['title']) if save_file: subset_cop_download_df.to_csv( outdir.joinpath('cophub_download_list.csv')) return subset_cop_download_df
def test_SpatialTree_closest_links_in_north_canada_doesnt_find_link_within_10_metres( ): # (out in the boonies) n = Network('epsg:4326') n.add_nodes({ '1': { 'x': -93.25129666354827, 'y': 73.66401680598872 }, '2': { 'x': -93.25140295754169, 'y': 73.66417415921647 } }) n.add_link(link_id='link_1', u='1', v='2', attribs={'modes': ['car']}) spatial_tree = spatial.SpatialTree(n) stops = GeoDataFrame( {'geometry': { 'stop_15m_to_link_1': Point(-93.250971, 73.664114) }}) stops.crs = {'init': 'epsg:4326'} closest_links = spatial_tree.closest_links(stops, 10, modes='car') closest_links = closest_links.dropna() assert closest_links.empty
def test_SpatialTree_closest_links_in_indonesia_doesnt_find_link_within_10_metres( ): # (close to equator) n = Network('epsg:4326') n.add_nodes({ '1': { 'x': 109.380477773586, 'y': 0.3203433505415778 }, '2': { 'x': 109.38042852136014, 'y': 0.32031507655538294 } }) n.add_link(link_id='link_1', u='1', v='2', attribs={'modes': ['car']}) spatial_tree = spatial.SpatialTree(n) stops = GeoDataFrame( {'geometry': { 'stop_15m_to_link_1': Point(109.380607, 0.320333) }}) stops.crs = {'init': 'epsg:4326'} closest_links = spatial_tree.closest_links(stops, 10, modes='car') closest_links = closest_links.dropna() assert closest_links.empty
def test_SpatialTree_closest_links_in_north_canada_finds_link_within_30_metres( ): # (out in the boonies) n = Network('epsg:4326') n.add_nodes({ '1': { 'x': -93.25129666354827, 'y': 73.66401680598872 }, '2': { 'x': -93.25140295754169, 'y': 73.66417415921647 } }) n.add_link(link_id='link_1', u='1', v='2', attribs={'modes': ['car']}) spatial_tree = spatial.SpatialTree(n) stops = GeoDataFrame( {'geometry': { 'stop_15m_to_link_1': Point(-93.250971, 73.664114) }}) stops.crs = {'init': 'epsg:4326'} closest_links = spatial_tree.closest_links(stops, 30, modes='car') assert_semantically_equal( closest_links.reset_index().groupby('index')['link_id'].apply( list).to_dict(), {'stop_15m_to_link_1': ['link_1']})
def test_SpatialTree_closest_links_in_indonesia_finds_link_within_20_metres(): # (close to equator) n = Network('epsg:4326') n.add_nodes({ '1': { 'x': 109.380477773586, 'y': 0.3203433505415778 }, '2': { 'x': 109.38042852136014, 'y': 0.32031507655538294 } }) n.add_link(link_id='link_1', u='1', v='2', attribs={'modes': ['car']}) spatial_tree = spatial.SpatialTree(n) stops = GeoDataFrame( {'geometry': { 'stop_15m_to_link_1': Point(109.380607, 0.320333) }}) stops.crs = {'init': 'epsg:4326'} closest_links = spatial_tree.closest_links(stops, 20, modes='car') closest_links = closest_links.dropna() assert_semantically_equal( closest_links.reset_index().groupby('index')['link_id'].apply( list).to_dict(), {'stop_15m_to_link_1': ['link_1']})
def get_accessibility_gdf(travel_time_df, threshold=0.5): station_by_count = travel_time_df.groupby('station_id').count() threshold_ = len(station_by_count) * threshold connected_stations = station_by_count[ station_by_count['travel_time'] > threshold_].index mean_accessibility = travel_time_df.groupby('station_id').mean() mean_accessibility_filtered = mean_accessibility.loc[connected_stations] #mean_accessibility_filtered = mean_accessibility.loc[~mean_accessibility_filtered.index.isin(['NoID'])] mean_accessibility_gpd = mean_accessibility_filtered[[ 'travel_time', 'x', 'y' ]] geometry = [ Point(xy) for xy in zip(mean_accessibility_gpd.x, mean_accessibility_gpd.y) ] mean_accessibility_gpd['geometry'] = geometry mean_accessibility_gpd = GeoDataFrame(mean_accessibility_gpd, geometry='geometry') mean_accessibility_gpd.crs = {'init': 'epsg:4326'} mean_accessibility_gpd.reset_index(level=0, inplace=True) mean_accessibility_gpd = mean_accessibility_gpd[ mean_accessibility_gpd['station_id'] != 'NoID'] return mean_accessibility_gpd
def find_reached_stations(reached_stations_df: pd.DataFrame, target_stations: GeoDataFrame, buffer: int, counter: int = 1) -> list: mean_travel_time = reached_stations_df.groupby('station').mean() geometry = [ Point(xy) for xy in zip(mean_travel_time.x, mean_travel_time.y) ] mean_travel_time['geometry'] = geometry gdf_mean_travel_time = GeoDataFrame(mean_travel_time, geometry='geometry') gdf_mean_travel_time.crs = {'init': 'epsg:4326'} # create buffers for reached stations crs_meters = {'init': 'epsg:25832'} traveled_buffer = gdf_mean_travel_time.to_crs(crs_meters) traveled_buffer['geometry'] = traveled_buffer.buffer(buffer) # buffer target stations target_stations_buffer = target_stations.to_crs(crs_meters) target_stations_buffer['geometry'] = target_stations_buffer.buffer(buffer) # for debugging # if buffer >= 50: # traveled_buffer.to_file('data/debug/traveled_' + str(counter) + '.shp') # target_stations_buffer.to_file('data/debug/target_' + str(counter) + '.shp') # join already reached stations with stations stations_join = sjoin(target_stations_buffer, traveled_buffer[['travel_time', 'geometry']], how='left') reached_stations_index = stations_join[ stations_join['travel_time'].notna()]['station_id'].tolist() return reached_stations_index
def test_deprecation(self): with pytest.warns(FutureWarning): GeoDataFrame([], crs=27700) with pytest.warns(FutureWarning): df = GeoDataFrame([]) df.crs = 27700
def to_file(self, filename, driver="ESRI Shapefile", **kwargs): from geopandas import GeoDataFrame data = GeoDataFrame({"geometry": self, "id":self.index.values}, index=self.index) data.crs = self.crs data.to_file(filename, driver, **kwargs)
def set_crs(df: GDF, epsg_code: Union[int, str]) -> GDF: """Sets dataframe crs in geopandas pipeline. TODO: Deprecate with next rasterio version that will integrate set_crs method. """ df.crs = {'init': f'epsg:{str(epsg_code)}'} return df
def read_file(filename, **kwargs): """ Returns a GeoDataFrame from a file. *filename* is either the absolute or relative path to the file to be opened and *kwargs* are keyword args to be passed to the method when opening the file. Note: This method does not attempt to align rows. Properties that are not present in all features of the source file will not be properly aligned. This should be fixed. """ geoms = [] columns = defaultdict(lambda: []) bbox = kwargs.pop('bbox', None) with fiona.open(filename, **kwargs) as f: crs = f.crs if bbox != None: assert len(bbox) == 4 f_filt = f.filter(bbox=bbox) else: f_filt = f for rec in f_filt: geoms.append(shape(rec['geometry'])) for key, value in rec['properties'].iteritems(): columns[key].append(value) geom = GeoSeries(geoms) df = GeoDataFrame(columns) df['geometry'] = geom df.crs = crs return df
def test_SpatialTree_closest_links_in_london_finds_links_within_30_metres( network): spatial_tree = spatial.SpatialTree(network).modal_subtree(modes='car') stops = GeoDataFrame({ 'id': { 0: 'stop_10m_to_link_1', 1: 'stop_15m_to_link_2', 2: 'stop_20m_to_link_1' }, 'geometry': { 0: Point(-0.15186089346604492, 51.51950409732838), 1: Point(-0.15164747576623197, 51.520660715220636), 2: Point(-0.1520233977548685, 51.51952913606585) } }) stops.crs = {'init': 'epsg:4326'} closest_links = spatial_tree.closest_links(stops, 30) assert_semantically_equal( closest_links.reset_index().groupby('id')['link_id'].apply( list).to_dict(), { 'stop_10m_to_link_1': ['link_1'], 'stop_20m_to_link_1': ['link_1'], 'stop_15m_to_link_2': ['link_2', 'link_4'] })
def to_geometry(self, to_crs=None): """Makes a geometrical representation of the grid (e.g. for drawing). This can come also handy when doing shape-to-raster operations. TODO: currently returns one polygon of each grid points, but this could do more. Returns ------- a geopandas.GeoDataFrame """ from geopandas import GeoDataFrame from shapely.geometry import Polygon out = GeoDataFrame() geoms = [] ii = [] jj = [] xx = self.corner_grid.x0 + np.arange(self.nx+1) * self.dx yy = self.corner_grid.y0 + np.arange(self.ny+1) * self.dy for j, (y0, y1) in enumerate(zip(yy[:-1], yy[1:])): for i, (x0, x1) in enumerate(zip(xx[:-1], xx[1:])): coords = [(x0, y0), (x1, y0), (x1, y1), (x0, y1), (x0, y0)] geoms.append(Polygon(coords)) jj.append(j) ii.append(i) out['j'] = jj out['i'] = ii out['geometry'] = geoms out.crs = self.proj.srs if check_crs(to_crs): transform_geopandas(out, to_crs=to_crs, inplace=True) return out
def main(input_df: gpd.GeoDataFrame, distance: float, fix_geoms: bool = True, unite: bool = False): """ Makes buffers around geometries of the input_dataframe. """ if not input_df.crs: print('Warning! Input_file has no CRS. Assuming 4326.', file=sys.stderr) input_df.crs = crs.WGS initial_crs = input_df.crs distance = float(distance) # [list...] to make it a dataframe. Otherwise changing geometry type on the fly causes exceptions. new_df = input_df.copy().to_crs(crs.SIB)[list( input_df )] # list(input_file ) to transform it to pd.DataFrame. Otherwise if initial file had points or linestrings, geopandas still expects this geometry type. new_df['geometry'] = new_df['geometry'].buffer(distance) result = gpd.GeoDataFrame(new_df, crs=crs.SIB).to_crs(initial_crs) if fix_geoms: result['geometry'] = result['geometry'].buffer( 0 ) # CRS transformation may slightly move points and make polygons self intersect. Hence buffer(0) to fix. if unite: buf_union = result.geometry.unary_union if buf_union.geom_type == 'Polygon': buf_union = [buf_union] return gpd.GeoDataFrame({'geometry': list(buf_union)}, crs=input_df.crs) return result
def test_scalar(self, scalar): df = GeoDataFrame() df["geometry"] = scalar df.crs = 4326 assert df.crs == self.wgs assert df.geometry.crs == self.wgs assert df.geometry.values.crs == self.wgs
def build_person_travel_geodataframe(person, from_epsg=None, to_epsg=None): """ Loop through a persons legs, creating a geopandas GeoDataFrame defining travel for plotting. :param person: pam.core.Person object :param from_epsg: coordinate system the plans are currently in, optional :param to_epsg: coordinate system you want the geo dataframe to be projected to, optional, you need to specify from_epsg as well to use this. :return: """ df = pd.DataFrame() for leg in person.legs: if (leg.start_location.loc is None) or (leg.end_location.loc is None): raise AttributeError('To create a geopandas.DataFrame you need specific locations. Make sure Legs have' 'loc attribute defined with a shapely.Point or s2sphere.CellId.') _leg_dict = leg.__dict__.copy() _leg_dict['geometry'] = utils.get_linestring(leg.start_location.loc, leg.end_location.loc) coords = list(_leg_dict['geometry'].coords) _leg_dict['start_location'] = coords[0] _leg_dict['end_location'] = coords[-1] df = df.append(pd.Series(_leg_dict), ignore_index=True) df['pid'] = person.pid df = GeoDataFrame(df, geometry='geometry') if from_epsg: df.crs = from_epsg if to_epsg: df = df.to_crs(to_epsg) return df
def read_file(filename, **kwargs): """ Returns a GeoDataFrame from a file. *filename* is either the absolute or relative path to the file to be opened and *kwargs* are keyword args to be passed to the method when opening the file. Note: This method does not attempt to align rows. Properties that are not present in all features of the source file will not be properly aligned. This should be fixed. """ geoms = [] columns = defaultdict(lambda: []) bbox = kwargs.pop('bbox', None) with fiona.open(filename, **kwargs) as f: crs = f.crs if bbox != None: assert len(bbox)==4 f_filt = f.filter(bbox=bbox) else: f_filt = f for rec in f_filt: geoms.append(shape(rec['geometry'])) for key, value in rec['properties'].iteritems(): columns[key].append(value) geom = GeoSeries(geoms) df = GeoDataFrame(columns) df['geometry'] = geom df.crs = crs return df
def run_calculation_intersection(parking_geojson, road_geosjon, output_file): '''Finding the entry and exist points by finding the intersection-points of roads and polygons Args: parking_geojson: road_geosjon: output_file: Returns: ''' # load road and parking geometries osm_roads = gpd.read_file(road_geosjon) parkings = gpd.read_file(parking_geojson) print('loading files are done !') # finding the entry and exit points id = 0 save_geometry = {'geometry': []} spatial_index_roads = osm_roads.sindex print('creating spatial indexes are done !') for _, row in parkings.iterrows(): possible_matches_index = list( spatial_index_roads.intersection(row.geometry.bounds)) possible_matches = osm_roads.iloc[possible_matches_index] precise_matches = possible_matches[possible_matches.intersects( row.geometry)] if len(precise_matches) > 0: print('found parking intersects with roads:', len(precise_matches)) parking_line_ring = polygon.LinearRing( list(row.geometry.exterior.coords)) for i in range(len(precise_matches)): point = precise_matches.geometry.iloc[i].intersection( polygon.LineString(parking_line_ring)) if str(point) != 'GEOMETRYCOLLECTION EMPTY': save_geometry['geometry'].append(point) if id % 100 == 0: print(id) id = id + 1 # save the exit points into a single geojson file df = GeoDataFrame(save_geometry) # remove duplications G = df["geometry"].apply(lambda geom: geom.wkb) df = df.loc[G.drop_duplicates().index] # set crs of the dataframe df.crs = {'init': 'epsg:32632', 'no_defs': True} write_gdf_json(df, out_filename=output_file, epsg_out=32632, should_format=False)
def test_scalar(self, scalar): with pytest.warns(FutureWarning): df = GeoDataFrame() df.crs = 4326 df["geometry"] = scalar assert df.crs == self.wgs assert df.geometry.crs == self.wgs assert df.geometry.values.crs == self.wgs
def filter_geo_data_by_swaths(self, swaths_selected=None, swaths_only=False, source_boundary=False): ''' method to select geo_data depending on swaths selected Parameters: :self: instance of GeoData :swaths_only: boolean - True if swath selection is required, default False Returns: :_date: date in datetime.date format :swaths: list of selected swaths :self.geo_df: pandas dataframe of reveivers :swaths_pnt_polygon: union of selected swaths polygon in points (RL, RP) :swaths_geo_polygon: union of selected swaths polygon in (easting, northing) ''' swaths, swaths_pnt_polygon, swaths_geo_polygon = swath_selection( swaths_selected=swaths_selected) bnd_gdf = read_file(geo_shapefile) bnd_gdf.crs = EPSG_31256_adapted rcv_bnd_gdf = bnd_gdf[bnd_gdf['OBJECTID'] == 1] src_bnd_gdf = bnd_gdf[bnd_gdf['OBJECTID'] > 1] swaths_bnd_gdf = GeoDataFrame(geometry=GeoSeries(swaths_geo_polygon), ) swaths_bnd_gdf.crs = EPSG_31256_adapted if swaths_pnt_polygon: swaths_bnd_gdf = overlay(rcv_bnd_gdf, swaths_bnd_gdf, how='intersection') else: swaths_bnd_gdf = rcv_bnd_gdf if source_boundary and swaths != []: src_bnd_gdf = overlay(src_bnd_gdf, swaths_bnd_gdf, how='intersection') swaths_bnd_gdf = overlay(swaths_bnd_gdf, src_bnd_gdf, how='union') else: pass if not swaths_only and swaths_pnt_polygon: for index, row in self.geo_df.iterrows(): # check if point is within swath selection line = string_to_value_or_nan( str(row['STATIONVIX'])[0:4], 'int') station = string_to_value_or_nan( str(row['STATIONVIX'])[4:8], 'int') point = Point(line, station) if swaths_pnt_polygon.contains(point) or \ swaths_pnt_polygon.intersects(point): pass # point is in or on the polygon else: self.geo_df = self.geo_df.drop([index]) self.geo_df = self.geo_df.reset_index(drop=True) else: pass return swaths, self.geo_df, swaths_pnt_polygon, swaths_bnd_gdf
def update_cost( # pylint: disable=too-many-arguments G: nx.Graph, gdf: gpd.GeoDataFrame, edge_df: Optional[gpd.GeoDataFrame] = None, cost_attr: Optional[str] = "cost", weight_attr: Optional[str] = "weight", key_attr: Optional[str] = "key", ) -> nx.Graph: """Update the cost of edges the graph from a geo dataframe. Args: G: Input graph. Must have a geometry attribute on the edges. gdf: Must contain geometry column and value column. edge_df: The edge geo dataframe of the graph. Other Args: cost_attr: Name of the cost function. weight_attr: Name of the weight function. key_attr: Name of the key for multi graphs. Returns: Graph with updated cost attribute. """ # convert G to geodataframe if edge_df is None: # for a multigraph, remember the keys for u, v, k in G.edges(keys=True): G[u][v][k][key_attr] = k # # create a geodataframe edge_df = ox.graph_to_gdfs(G, nodes=False, fill_edge_geometry=True) edge_df = edge_df.rename(columns=dict(u="source", v="target")) # check the crs of geometries if edge_df.crs is None and not gdf.crs is None: edge_df.crs = gdf.crs elif gdf.crs is None and not edge_df.crs is None: gdf.crs = edge_df.crs # get intersection of the geodataframes logging.info("%s rows in edge dataframe", len(edge_df)) join = gpd.sjoin(edge_df, gdf, how="left") logging.info("%s rows in join dataframe", len(join)) edges_in_join = zip(join["source"], join["target"]) for u, v in G.edges(): assert (u, v) in edges_in_join or (v, u) in edges_in_join # group the edges and take average pollution for key, value in ( join.groupby(["source", "target", "key"])[cost_attr].mean().iteritems() ): i, j, k = key[0], key[1], key[2] G[i][j][k]["gamma"] = value if value >= 0 else 0 G[i][j][k][cost_attr] = value * G[i][j][k][weight_attr] return G
def calc_pandas(self): featureio = self.inputs[0] original_projection = featureio.get_epsg() epsg = original_projection srs = osr.SpatialReference() srs.ImportFromEPSG(int(original_projection)) if not srs.GetAttrValue('UNIT').lower().startswith('met'): epsg = 3857 else: original_projection = None feature_df = featureio.read(epsg=epsg) buffer = GeoSeries(feature_df.buffer(self.buffer_size).unary_union) buffer_df = GeoDataFrame(geometry=buffer) buffer_df.crs = feature_df.crs if original_projection: buffer_df[buffer_df.geometry.name] = buffer_df.to_crs( epsg=original_projection) buffer_df.crs = fiona.crs.from_epsg(original_projection) return buffer_df
def get_area(): p1 = (AREA_EASTING_MIN, AREA_NORHING_MAX) p2 = (AREA_EASTING_MAX, AREA_NORHING_MAX) p3 = (AREA_EASTING_MAX, AREA_NORTHING_MIN) p4 = (AREA_EASTING_MIN, AREA_NORTHING_MIN) area_polygon = Polygon([p1, p2, p3, p4]) area_gpd = GeoDataFrame(geometry=GeoSeries(area_polygon)) area_gpd.crs = f'epsg:{EPSG_UTM_40N}' return area_gpd
def vectorize_image(raster_1, raster_2, output_file, category, threshold): """ Core function for converting raster to vector features in a *.pickle file Args: input_params: zipped list containing -> (fid, temp_dir, output_dir, category_info) Returns: *.pickle file containing GeoDataFrame containing shapes by category """ # load merged classified raster -> vectorize im1 = rasterio.open(raster_1) data_1 = im1.read(1) # load merged classified raster -> vectorize im2 = rasterio.open(raster_2) data_2 = im2.read(1) data_1 = (data_1 / 2).astype('uint8') data_2 = (data_2 / 2).astype('uint8') data = data_1 + data_2 data[data <= threshold] = 0 data[data > threshold] = 1 print(data.max(), data.min()) mask = np.array(data, dtype=np.bool) # contour raster image -> build polygons temp = shapes(data, mask, transform=im1.transform) # compile results together as shapely geometry -> build GeoSeries out = [] for t, v in temp: v = int(v) geo = shape(t) #geo = box(*geo.bounds) out.append(GeoSeries({ 'geometry': geo, 'num': v, 'category': category })) print(len(out)) # only write out features if they exist! if len(out) > 0: out = GeoDataFrame(out).sort_values(by='num', ascending=True) out.crs = im1.crs # get epsg from input file # write file to *.geojson out.to_file(driver='ESRI Shapefile', filename=output_file)
def make_transit_stops(input_folder,output_name): os.chdir(input_folder) crs = {'init' :'epsg:4326'} # http://spatialreference.org/ref/epsg/wgs-84/ df=pd.read_csv('stops.txt',header=0) geometry = [Point(xy) for xy in zip(df.stop_lon, df.stop_lat)] df_out = GeoDataFrame(df, geometry=geometry) df_out.crs=crs df_out['stop_id']=df['stop_id'] df_out['stop_name']=df['stop_name'] df_out.to_file(output_name, driver='ESRI Shapefile') return df_out
def transform_and_normalize_circle_covering(circles, crs): # EPSG:2768 ca meters search_circles = GeoDataFrame(geometry=GeoSeries(circles)) df = GeoDataFrame(geometry=GeoSeries(circles)) df.crs = {'init': 'epsg:2768'} # Transfrom from meter baed EPSG:2768 coordinate system to lat/long degrees based EPSG:4269 df = df.to_crs(crs) # Translation normalization from offset due to change in coordinate systems df = df.translate(xoff=141.22, yoff=-30.02, zoff=0.0) return df
def test_deprecation(self): with pytest.warns(FutureWarning): df = GeoDataFrame([], crs=27700) # https://github.com/geopandas/geopandas/issues/1548 # ensure we still have converted the crs value to a CRS object assert isinstance(df.crs, pyproj.CRS) with pytest.warns(FutureWarning): df = GeoDataFrame([]) df.crs = 27700 assert isinstance(df.crs, pyproj.CRS)
def run(input_geojson, output_file): # read input geojson df = gpd.read_file(input_geojson) spatial_index = df.sindex # init geo dataframe union_df = {'geometry': []} already_merge_geo = [] for index, row in df.iterrows(): if index % 1000 == 0: print(index) possible_matches_index = list( spatial_index.intersection(row.geometry.bounds)) possible_matches = df.iloc[possible_matches_index] precise_matches = possible_matches[possible_matches.intersects( row.geometry)] if len(precise_matches) > 1: if index in already_merge_geo: continue else: intersections = precise_matches.geometry.intersection( row.geometry) union = cascaded_union(intersections) union_points['geometry'].append(union) index_list = list(intersections.index) already_merge_geo = already_merge_geo + index_list else: if index in already_merge_geo: continue else: union_points['geometry'].append(row.geometry) df = GeoDataFrame(union_points) # remove duplications G = df["geometry"].apply(lambda geom: geom.wkb) df = df.loc[G.drop_duplicates().index] # set crs of the dataframe df.crs = {'init': 'epsg:32632', 'no_defs': True} write_gdf_json(df, out_filename=output_file, epsg_out=32632, should_format=False)
def create_geojson(yml_file, class_mapping, output_name, output_dir): output = [] id = 0 with open(yml_file, 'r') as fp: spec = yaml.load(fp.read()) test_labels = spec['testing']['labels'] # if len(test_labels) == 0: # test_labels = spec['validation']['labels'] print 'the number of evluation images is:', len(test_labels) for label in test_labels: data = json.loads(open(spec['prefix'] + label, "r").read()) img_name = data['img_name'] if len(data['bboxes']) == 0: output.append( GeoSeries({ 'id': id, 'category': 'bg', 'category_num': class_mapping[str(category)], 'geometry': None, 'img_name': img_name, })) id += 1 else: for bbox in data['bboxes']: category = bbox['category'] temp_poly = [[bbox['x1'], bbox['y1']], [bbox['x1'], bbox['y2']], [bbox['x2'], bbox['y2']], [bbox['x2'], bbox['y1']]] output.append( GeoSeries({ 'id': id, 'category': category, 'category_num': class_mapping[str(category)], 'geometry': Polygon(temp_poly), 'img_name': img_name, })) id += 1 df = GeoDataFrame(output) df.crs = {'init': 'epsg:32632'} write_json(df, output_name=output_name, output_dir=output_dir, geo_flag=True, indent=None)
def get_iso_distance(lat, lon, distance=1000, network_type='walk'): G = ox.graph_from_point((lat, lon), distance=distance, network_type=network_type) # find the centermost node and then project the graph to UTM center = ox.get_nearest_node(G, (lat, lon), method='euclidean', return_dist=True) G1 = ox.project_graph(G) center_node = center[0] iso = make_iso_polys(G1, center_node, distance, 'length', edge_buff=25, node_buff=0, infill=True) gdf = GeoDataFrame(geometry=GeoSeries(iso)) gdf.crs = G1.graph['crs'] gdf = gdf.to_crs("EPSG:4326") bounds = gdf.bounds return gdf, bounds
def geocode_dataframe(address_dataframe, address_column): df = address_dataframe.copy() df['geocode'] = df[address_column].apply(geolocator.geocode) df['geometry'] = df['geocode'].apply( lambda x: Point(x.longitude, x.latitude)) # create a geometry column gdf = GeoDataFrame( df, geometry='geometry', ) gdf.crs = {'init': 'epsg:4326'} return gdf.drop(columns=['geocode'])
from geopandas.tools import sjoin from shapely.geometry import Point here = os.path.dirname(os.path.abspath('__file__')) data_dir = os.path.join(here, '..', 'data') vector_file = os.path.join(here, 'nybb_15b', 'nybb.shp') boros = read_file(vector_file) xmin, ymin, xmax, ymax = boros.total_bounds N = 1000 X = np.random.uniform(low=xmin, high=xmax, size=N) Y = np.random.uniform(low=ymin, high=ymax, size=N) points = GeoDataFrame(geometry=GeoSeries([Point(x, y) for x, y in zip(X, Y)])) points.crs = boros.crs joined = sjoin(points, boros, how='inner') joined.geometry = joined.buffer(2000) ax = plt.subplot(121) boros.plot(column='BoroCode', categorical=True, axes=ax) points.plot(axes=ax) ax.set_aspect('equal') locs, labels = plt.xticks() plt.setp(labels, rotation=90) ax = plt.subplot(122) joined.plot(column='BoroCode', categorical=True, axes=ax) ax.set_aspect('equal') locs, labels = plt.xticks() plt.setp(labels, rotation=90)
def makePoints(experiment): path = os.getcwd() + '/diysco2-db/campaigns/'+experiment+'/diysco2-filtered/' ipaths = [os.path.join(path,i) for i in os.listdir(path) if i.endswith('.csv')] # experiment data data = [readdata(i) for i in ipaths] # copy data to ldata ldata = [i.copy() for i in data] # --- print number of measurements --- # featcount(ldata) ''' ------------- Spatial Operations ------------- ''' for i in range(0,len(ldata)): ldata[i].lon = ldata[i].lon.astype('float') ldata[i].lat = ldata[i].lat.astype('float') # need to keep datetime field # create geopoints for i in ldata: i['datetime'] = i.index for i in ldata: i.index = [j for j in range(len(i))] i['geometry'] = GeoSeries([Point(x, y) for x, y in zip(i.lon, i.lat)]) # convert datetime string to iso format i['datetime'] = i.datetime.map(lambda x: datetime.strftime(x, '%Y-%m-%dT%H:%M:%SZ')) print ldata[0].head() # Projections gridproj = {'init': 'epsg:3740', 'no_defs': True} wgs84 = {'datum':'WGS84', 'no_defs':True, 'proj':'longlat'} # create geodataframe from data ldata = [GeoDataFrame(i) for i in ldata] # set projection as wgs84 for i in ldata: i.crs = wgs84 # reproject to utm zone 10N for i in ldata: i.geometry = i.geometry.to_crs(epsg=3740) # i.geometry = i.geometry.to_crs(epsg=4326) for i in ldata: i = i[pd.isnull(i.geometry) == False] # --- Merge geodata together --- # mergedgeo = pd.concat([ldata[0], ldata[1],ldata[2],ldata[3],ldata[4]]) mergedgeo = GeoDataFrame(mergedgeo) mergedgeo.crs = gridproj print len(mergedgeo) mergedgeo = mergedgeo[pd.isnull(mergedgeo.lat)==False] print len(mergedgeo) # mergedgeo['date'] = mergedgeo['date'].str.replace('/', '-').astype(str) # mergedgeo['datetime'] = mergedgeo['datetime'].astype(str) print mergedgeo.head() # mergedgeo.to_crs(wgs84) opath = os.getcwd() + '/diysco2-db/campaigns/'+experiment+'/diysco2-filtered-points/' print opath if(os.path.isdir(opath)): print "already a folder!" else: os.mkdir(opath) if(os.path.isfile(opath + 'all_20150528.geojson')): os.remove(opath + 'all_20150528.geojson') mergedgeo.to_file(opath + 'all_20150528.geojson', driver="GeoJSON") # with open(opath + 'all_20150528.geojson', 'w') as f: # f.write(mergedgeo.to_json()) mergedgeo.to_file(opath + 'all_20150528.shp', driver='ESRI Shapefile') return mergedgeo del mergedgeo
def makeGrid(ipoints, experiment, gridsize): # Projections gridproj = {'init': 'epsg:3740', 'no_defs': True} wgs84 = {'datum':'WGS84', 'no_defs':True, 'proj':'longlat'} # import grid script sys.path.insert(0, os.getcwd()+'/mapping/libs/') import grid as g opath = os.getcwd() + '/diysco2-db/campaigns/'+experiment+'/diysco2-grid' if(os.path.isdir(opath)): print "already a folder!" else: os.mkdir(opath) # gridsize = 200 ogridname = "grid_"+str(gridsize)+"m.shp" ofile = opath + "/" + ogridname print "making grid" g.main(ofile, ipoints.total_bounds[0], ipoints.total_bounds[2], ipoints.total_bounds[1], ipoints.total_bounds[3], gridsize, gridsize) print "grid complete! " # read in the grid that was just made grid = GeoDataFrame.from_file(ofile) grid.crs = gridproj # create grid id to groupby grid['id'] = [i for i in range(len(grid))] # Read in transect to spatial subset grids in transect transect = GeoDataFrame.from_file(os.getcwd()+'/diysco2-db/_main_/study-area/' +'transect_epicc2sp_woss.shp') transect.crs = gridproj # subset grid # transectgrid = grid[grid.geometry.intersects(transect.geometry)]; print transectgrid sagrid = [] for i in range(len(grid)): if np.array(transect.intersects(grid.geometry[i]))[0] != False: sagrid.append(grid.geometry[i]) transectgrid = GeoDataFrame(sagrid) transectgrid.columns = ['geometry'] transectgrid['id'] = [i for i in range(len(transectgrid))] transectgrid.crs = gridproj transectgrid.to_file(ofile[:-4]+"_transect.shp") # transectgrid.to_file(ofile[:-4]+"_transect.geojson",driver="GeoJSON") ## !!!Some weird things with reading in data makes the sjoin work !!! :( transectgrid = GeoDataFrame.from_file(ofile[:-4]+"_transect.shp") transectgrid.crs = gridproj print transectgrid.head() ipoints = GeoDataFrame.from_file( os.getcwd() + '/diysco2-db/campaigns/'+experiment+'/diysco2-filtered-points/all_20150528.shp') ipoints.crs = gridproj print ipoints.head() # ipoints['id'] = [i for i in range(len(ipoints))] # Spatial join points to grid oname = "gridjoin_"+str(gridsize)+"m.shp" # join_inner_df = sjoin(transectgrid, ipoints, how="inner") join_inner_df = sjoin(transectgrid, ipoints, how="left", op='intersects') # join_inner_df.to_file(opath+ "/"+oname) return join_inner_df
def query(self, **kwargs): """ A query function to extract data out of MapServer layers. I've exposed every option here Parameters ========== where: str, required sql query string. out_fields: list or str, (default: '*') fields to pass from the header out return_geometry: bool, (default: True) bool describing whether to return geometry or just the dataframe geometry_precision: str, (default: None) a number of significant digits to which the output of the query should be truncated out_sr: int or str, (default: None) ESRI WKID spatial reference into which to reproject the geodata return_ids_only: bool, (default: False) bool stating to only return ObjectIDs return_z: bool, (default: False) whether to return z components of shp-z return_m: bool, (default: False) whether to return m components of shp-m Returns ======= Dataframe or GeoDataFrame containing entries from the geodatabase Notes ===== Most of the time, this should be used leaning on the SQL "where" argument: cxn.query(where='GEOID LIKE "06*"') In most cases, you'll be querying against layers, not MapServices overall. """ #parse args pkg = kwargs.pop('pkg', 'pysal') gpize = kwargs.pop('gpize', False) if pkg.lower() == 'geopandas': pkg = 'shapely' gpize = True kwargs = {''.join(k.split('_')):v for k,v in diter(kwargs)} #construct query string self._basequery = copy.deepcopy(_basequery) for k,v in diter(kwargs): try: self._basequery[k] = v except KeyError: raise KeyError("Option '{k}' not recognized, check parameters") qstring = '&'.join(['{}={}'.format(k,v) for k,v in diter(self._basequery)]) self._last_query = self._baseurl + '/query?' + qstring #run query resp = r.get(self._last_query + '&f=json') resp.raise_for_status() datadict = resp.json() #convert to output format features = datadict['features'] todf = [] for i, feature in enumerate(features): locfeat = gpsr.__dict__[datadict['geometryType']](feature) todf.append(locfeat['properties']) todf[i].update({'geometry':locfeat['geometry']}) df = pd.DataFrame(todf) outdf = gpsr.convert_geometries(df) if gpize: try: from geopandas import GeoDataFrame outdf = GeoDataFrame(outdf) except: print('Geopandas dataframe conversion failed! Continuing...') outdf.crs = datadict.pop('spatialReference', {}) return outdf
def query(self, **kwargs): """ A query function to extract data out of MapServer layers. I've exposed every option here Parameters ========== where: str, required sql query string. out_fields: list or str, (default: '*') fields to pass from the header out return_geometry: bool, (default: True) bool describing whether to return geometry or just the dataframe geometry_precision: str, (default: None) a number of significant digits to which the output of the query should be truncated out_sr: int or str, (default: None) ESRI WKID spatial reference into which to reproject the geodata return_ids_only: bool, (default: False) bool stating to only return ObjectIDs return_z: bool, (default: False) whether to return z components of shp-z return_m: bool, (default: False) whether to return m components of shp-m pkg : str (default: 'geopandas') what geometry type to provide in the results of the query. Uses shapely shapes by default. Supports "pysal," which constructs a pandas dataframe with pysal shapes in a geometry column; 'shapely', which builds a pandas dataframe with shapely shapes in a geometry column, and "geopandas," which returns a geopandas GeoDataFrame. strict : bool (default: True) whether to throw an error if invalid polygons are provided from the API (True) or just warn that at least one polygon is invalid (False) Returns ======= Dataframe or GeoDataFrame containing entries from the geodatabase Notes ===== Most of the time, this should be used leaning on the SQL "where" argument: cxn.query(where='GEOID LIKE "06*"') In most cases, you'll be querying against layers, not MapServices overall. """ #parse args pkg = kwargs.pop('pkg', 'geopandas') gpize = kwargs.pop('gpize', False) strict = kwargs.pop('strict', False) if pkg.lower() == 'geopandas': pkg = 'shapely' gpize = True kwargs = {''.join(k.split('_')):v for k,v in diter(kwargs)} #construct query string self._basequery = copy.deepcopy(_basequery) for k,v in diter(kwargs): try: self._basequery[k] = v except KeyError: raise KeyError("Option '{k}' not recognized, check parameters") qstring = '&'.join(['{}={}'.format(k,v) for k,v in diter(self._basequery)]) self._last_query = self._baseurl + '/query?' + qstring #run query resp = r.get(self._last_query + '&f=json') resp.raise_for_status() datadict = resp.json() #convert to output format try: features = datadict['features'] except KeyError: code, msg = datadict['error']['code'], datadict['error']['message'] details = datadict['error']['details'] if details is []: details = 'Mapserver provided no detailed error' raise KeyError(('Response from API is malformed. You may have ' 'submitted too many queries, or experienced ' 'significant network connectivity issues.\n' '(API ERROR {}:{}({}))'.format(code, msg, details))) todf = [] for i, feature in enumerate(features): locfeat = gpsr.__dict__[datadict['geometryType']](feature) todf.append(locfeat['properties']) todf[i].update({'geometry':locfeat['geometry']}) df = pd.DataFrame(todf) outdf = gpsr.convert_geometries(df, pkg, strict=strict) if gpize: try: from geopandas import GeoDataFrame outdf = GeoDataFrame(outdf) except: print('Geopandas dataframe conversion failed! Continuing...') crs = datadict.pop('spatialReference', None) if crs is not None: crs = crs.get('latestWkid', crs.get('wkid')) crs = dict(init='epsg:{}'.format(crs)) outdf.crs = crs return outdf