def _find_overlapping_points_in_network(points: gpd.GeoDataFrame) -> Dict[int, List[int]]: """ seeks unique points in network Parameters ---------- points: geodataframe with all points of the network Returns ------- """ unique_points = [] same_points = {} for index, point in points.iterrows(): point_list = [] for index2, point2 in points.iterrows(): if index2 not in unique_points: if point.geometry.distance(point2.geometry) < 1e-1 and index != index2: point_list.append(index2) if len(point_list) == 0: unique_points.append(index) else: same_points[index] = point_list return same_points
def from_data_frame(self, trace_data: GeoDataFrame): """ :param trace_data: :return: """ if geom_check(trace_data, "Point"): assert all( element in list(trace_data.columns.values) for element in [ "trace_id", ] ), ( "Expected ['trace_id',] key to be in Data", "got %s", list(trace_data.columns.values), ) for idx, feature in trace_data.iterrows(): feature_geometry, feature_property = decompose_data_frame_row(feature) if "trace_point_id" in feature_property: trace_point_id = feature_property["trace_point_id"] else: trace_point_id = uuid.uuid1().int self.add( x=feature_geometry["coordinates"][0], y=feature_geometry["coordinates"][1], trace_point_id=trace_point_id, **feature_property, ) elif geom_check(trace_data, "LineString"): for idx, feature in trace_data.iterrows(): trace_id = uuid.uuid1().int feature_geometry, feature_property = decompose_data_frame_row(feature) line_string_coordinate = feature_geometry["coordinates"] for nodes in line_string_coordinate: trace_point_id = uuid.uuid1().int self.add( x=nodes[0], y=nodes[-1], trace_point_id=trace_point_id, trace_id=trace_id, **feature_property, ) else: raise ValueError("Expected Geometry Type to be in ['Point', 'LineString']") return self
def test_cluster_points2(self): pts = [ Point(0, 0), Point(6, 0), Point(6, 6), Point(0.2, 0.2), Point(6.2, 0.2), Point(6.2, 6.2) ] expected = GeoDataFrame( pd.DataFrame([{ 'geometry': Point(0.1, 0.1), 'n': 2 }, { 'geometry': Point(6.1, 0.1), 'n': 2 }, { 'geometry': Point(6.1, 6.1), 'n': 2 }])) actual = PointClusterer(pts, max_distance=5, is_latlon=False).get_clusters() actual = [(c.centroid, len(c.points)) for c in actual] expected = [(c.geometry, c.n) for key, c in expected.iterrows()] assert len(actual) == len(expected) for pt in expected: assert pt in actual
def __points2grids(gdf: geopandas.GeoDataFrame, shift: float = 1 / 32, crs: str = '4326') -> geopandas.GeoDataFrame: ''' Converts livneh centroid 'lat' 'lon' coordinates to polygon grids. Inputs: (1) a geopandas GeoDataFrame containing 'lat' and 'lon' series (2) a float resenting the shortest distance between the centroid and the grid edge (3) the coordinate system identified to be used, '4326' by default Output: (1) a geopandas GeoDataFrame containing the grid cells polygon geometry Notes: The 'shift' parameter is used to identify the north, east, south and west most extents of the polygon, as so... ne---nw | | se---sw ''' grids_geometry: list = [] for i, row in gdf.iterrows(): w: float = float(row['lon']) - shift e: float = float(row['lon']) + shift n: float = float(row['lat']) + shift s: float = float(row['lat']) - shift lons, lats = [w, e, e, w], [n, n, s, s] grids_geometry.append(Polygon(zip(lons, lats))) gdf['geometry'] = grids_geometry return gdf.to_crs(epsg=crs)
def s_test_gdf_series(bin_gdf: GeoDataFrame, test_config: dict, N_norm: float = 1.0): return [ s_test_bin(row.SpacemagBin, test_config, N_norm) for i, row in bin_gdf.iterrows() ]
def calc_onborder(geoms_gdf: gpd.GeoDataFrame, border_bounds: Tuple[float, float, float, float], onborder_column_name: str = "onborder") -> gpd.GeoDataFrame: """ Add/update a column to the GeoDataFrame with: * 0 if the polygon isn't on the border and * 1 if it is. Args geoms_gdf: input GeoDataFrame border_bounds: the bounds (tupple with (xmin, ymin, xmax, ymax) to check against to determine onborder onborder_column_name: the column name of the onborder column """ # Split geoms that need unioning versus geoms that don't # -> They are on the edge of a tile if geoms_gdf is not None and len(geoms_gdf.index) > 0: # Check for i, geom_row in geoms_gdf.iterrows(): # Check if the geom is on the border of the tile geom_bounds = geom_row['geometry'].bounds # type: ignore onborder = 0 if (geom_bounds[0] <= border_bounds[0] or geom_bounds[1] <= border_bounds[1] or geom_bounds[2] >= border_bounds[2] or geom_bounds[3] >= border_bounds[3]): onborder = 1 geoms_gdf.loc[i, onborder_column_name] = onborder # type: ignore geoms_gdf.reset_index(drop=True, inplace=True) return geoms_gdf
def add_nodes_from_gdf(self, gdf: gpd.GeoDataFrame, node_index_attr=None): """Add nodes with the given `GeoDataFrame` and fill nodes attributes with the geodataframe columns. Parameters ---------- gdf : GeoDataFrame representing nodes to add (one row for one node). node_index_attr : Node index attribute for labeling nodes. If ``None``, the dataframe index is used, else the given column is used. (Default value = None) See Also -------- add_edges_from_gdf """ if not (gnx.is_null_crs(self.crs) or gnx.is_null_crs(gdf.crs) or gnx.crs_equals(gdf.crs, self.crs)): gdf = gdf.to_crs(self.crs, inplace=False) if node_index_attr is not None: gdf = gdf.set_index(node_index_attr, drop=True, inplace=False) if gdf._geometry_column_name != self.nodes_geometry_key: gdf = gdf.rename( columns={gdf._geometry_column_name: self.nodes_geometry_key}, inplace=False) gdf.set_geometry(self.nodes_geometry_key, inplace=True) self.add_nodes_from(gdf.iterrows()) self.check_nodes_validity()
def add_edges_from_gdf(self, gdf: gpd.GeoDataFrame, edge_first_node_attr=None, edge_second_node_attr=None): """Add edges with the given `GeoDataFrame`. If no dataframe columns are specified for first and second node, the dataframe index must be a multi-index `(u, v)`. Parameters ---------- gdf : GeoDataFrame representing edges to add (one row for one edge). edge_first_node_attr : Edge first node attribute. If ``None``, the dataframe index is used, else the given column is used. Must be used with ``edge_second_node_attr``. (Default value = None) edge_second_node_attr : Edge second node attribute. If ``None``, the dataframe index is used, else the given column is used. Must be used with ``edge_first_node_attr``. (Default value = None) See Also -------- add_nodes_from_gdf """ if not (gnx.is_null_crs(self.crs) or gnx.is_null_crs(gdf.crs) or gnx.crs_equals(gdf.crs, self.crs)): gdf = gdf.to_crs(self.crs, inplace=False) if edge_first_node_attr is not None and edge_second_node_attr is not None: gdf = gdf.set_index([edge_first_node_attr, edge_second_node_attr], drop=True, inplace=False) if gdf._geometry_column_name != self.edges_geometry_key: gdf = gdf.rename( columns={gdf._geometry_column_name: self.edges_geometry_key}, inplace=False) gdf.set_geometry(self.edges_geometry_key, inplace=True) self.add_edges_from((*r[0], r[1]) for r in gdf.iterrows())
def disagg(vec: gpd.GeoDataFrame): """Dissagregate collections and multi geomtries""" # Split GeometryCollections no_coll = [] for i, row in vec.iterrows(): geom = row.geometry if geom.type == 'GeometryCollection': for part in geom: row2 = row.copy() row2.geometry = part no_coll.append(row2) else: no_coll.append(row) # Split Multi geomries res = [] for row in no_coll: geom = row.geometry if geom.type.startswith('Multi'): for part in geom: row2 = row.copy() row2.geometry = part res.append(row2) else: res.append(row) return gpd.GeoDataFrame(res, crs=vec.crs).reset_index(drop=True)
def road_network_from_data_frame(road_data: GeoDataFrame) -> RoadNetwork: assert supported_crs(road_data), ( "Supported CRS ['epsg:26910', 'epsg:32649']" "got %s", (road_data.crs,), ) road_table = RoadTable() assert geom_check( road_data, "LineString" ), "Expected all geometries in to be LineString" for idx, feature in road_data.iterrows(): feature_geometry, feature_property = decompose_data_frame_row(feature) assert "u" in feature_property and "v" in feature_property, ( "Expected 'u' and 'v' to be present in the property" "indicating the start node and the end node of the provided" "geometry" ) if "fid" in feature_property: idx = feature_property["fid"] road_table.add( idx, feature_property, feature_geometry, shape(feature_geometry).length ) return RoadNetwork( road_table=road_table, maximum_distance=compute_diagonal_distance_of_extent(road_data), )
def test_cluster_vertical_points(self): pts = [ Point(0, 2), Point(6, 2), Point(6, 2), Point(0.2, 2), Point(6.2, 2), Point(6.2, 2), ] expected = GeoDataFrame( pd.DataFrame([ { "geometry": Point(0.1, 2), "n": 2 }, { "geometry": Point(6.1, 2), "n": 4 }, ])) actual = PointClusterer(pts, max_distance=5, is_latlon=False).get_clusters() actual = [(c.centroid, len(c.points)) for c in actual] expected = [(c.geometry, c.n) for key, c in expected.iterrows()] assert len(actual) == len(expected) print([str(c) for p in actual for c in p]) for pt in expected: assert pt in actual
def fetch_ecodivision_name(latitude: str, longitude: str, ecodivisions: geopandas.GeoDataFrame): """ Returns the ecodivision name for a given lat/long coordinate """ station_coord = Point(float(longitude), float(latitude)) for _, ecodivision_row in ecodivisions.iterrows(): geom = ecodivision_row['geometry'] if station_coord.within(geom): return ecodivision_row['CDVSNNM'] return None
def get_model_annual_eq_rate(bin_gdf: gpd.GeoDataFrame) -> float: annual_rup_rate = 0.0 for i, row in bin_gdf.iterrows(): sb = row.SpacemagBin min_bin_center = np.min(sb.mag_bin_centers) bin_mfd = sb.get_rupture_mfd(cumulative=True) annual_rup_rate += bin_mfd[min_bin_center] return annual_rup_rate
def fetch_ecodivision_name(lat: str, long: str, ecodivisions: geopandas.GeoDataFrame): """ Returns the ecodivision name for a given lat/long coordinate """ station_coord = Point(float(long), float(lat)) for index, row in ecodivisions.iterrows(): # pylint: disable=redefined-outer-name, unused-variable geom = row['geometry'] if station_coord.within(geom): return row['CDVSNNM'] return None
def _check_has_overlaps_old(polygons: gpd.GeoDataFrame): for i, (inda, pola) in enumerate(polygons.iterrows()): for (indb, polb) in polygons.iloc[i + 1:].iterrows(): if pola.geometry.intersects(polb.geometry): warnings.warn( f"List of shapes contains overlap between {inda} and {indb}. Points will be assigned to {inda}.", UserWarning, stacklevel=5, )
def assign_images_to_tiles( df_tiles: gpd.GeoDataFrame, gdf: gpd.GeoDataFrame ) -> gpd.GeoDataFrame: """ For each tile, specify which images it can be obtained from. Adds a "pre-event" and "post-event" column to `df_tiles`. Arguments: df_tiles: The output of `generate_tiles`. gdf: The output of `get_extents`. Returns the `df_tiles` input argument. """ df_tiles['pre-event'] = [[] for x in range(len(df_tiles))] df_tiles['post-event'] = [[] for x in range(len(df_tiles))] for ixt, rowt in df_tiles.iterrows(): pre_event_images, post_event_images = [], [] for ix, row in gdf.iterrows(): bounds_image = rasterio.coords.BoundingBox(*row['geometry'].bounds) bounds_tile = rasterio.coords.BoundingBox(*rowt['geometry'].bounds) if not rasterio.coords.disjoint_bounds(bounds_image, bounds_tile): if row['pre-post'] == 'pre-event': pre_event_images.append(row['file']) else: post_event_images.append(row['file']) if len(pre_event_images) > 0: df_tiles.at[ixt, 'pre-event'] = pre_event_images else: df_tiles.at[ixt, 'pre-event'] = np.nan if len(post_event_images) > 0: df_tiles.at[ixt, 'post-event'] = post_event_images else: df_tiles.at[ixt, 'post-event'] = np.nan # drop tiles that do not contain both pre- and post-event images df_tiles = df_tiles[(~pd.isna(df_tiles['pre-event'])) & (~pd.isna(df_tiles['post-event']))] # The pre-event and post-event columns contain lists of filenames, but geopandas # doesn't allow lists as GeoJSON properties to maintain compatibility with # shapefiles. We can work around this by converting them to dictionaries. df_tiles.loc[:, "pre-event"] = df_tiles["pre-event"].map(lambda l: dict(enumerate(l))) df_tiles.loc[:, "post-event"] = df_tiles["post-event"].map(lambda l: dict(enumerate(l))) return df_tiles
def _calculate_row_uncertainties(self, grid: GeoDataFrame, period: DateRange) -> list[float]: """Combine daily grid uncertainties to value for the full period for all cells.""" # TODO It would be great to make this faster using clever iteration/numpy/CPython uncertainties = Series([TEMIS_CELL_UNCERTAINTY] * len(period)) return [ self._combine_uncertainties(row[-(len(period) + 3):-3], uncertainties) for _, row in grid.iterrows() ]
def subset_cophub_from_esa( esa_df: gpd.GeoDataFrame, cop_df: gpd.GeoDataFrame, outdir: Optional[Union[Path, str]] = Path(os.getcwd()), save_file: Optional[bool] = True) -> gpd.GeoDataFrame: """Subsets Cophub list based on the ESA list if their Geometry(Footprint) intersects for a given day. :param esa_df: The GeoDataFrame with attributes for ESA S3 listing. :param cop_df: The GeoDataFrame with attributes for Cophub(NCI) file listing. :param save_file: Flag to save the subset download list of not. :return: GeoDataFrame with Cophub FRP product footprint that intersects with ESA FRP footprint for a given day. """ # convert datetime string to datetime stamp esa_df['start_date'] = pd.to_datetime(esa_df["start_date"], format="%Y%m%dT%H%M%S") cop_df['start_date'] = pd.to_datetime(cop_df["start_date"], format="%Y%m%dT%H%M%S") # assign crs to GeoDataFrame esa_df.crs = 'EPSG:4326' cop_df.crs = 'EPSG:4326' esa_df['date'] = pd.to_datetime(esa_df['start_date']).dt.date cop_df['date'] = pd.to_datetime(cop_df['start_date']).dt.date column_names = [ "title", "start_date", "sensor", 'relative_orbit', 'geometry' ] cop_download_df = gpd.GeoDataFrame(columns=column_names) for idx_esa, esa_row in esa_df.iterrows(): cophub_df_subset = cop_df[cop_df['date'][:] == esa_row['date']].copy() cophub_df_subset['intersects'] = False esa_geom = esa_row['geometry'].buffer(0) for idx_cop, cop_row in cophub_df_subset.iterrows(): cop_geom = cop_row['geometry'].buffer(0) if esa_geom.intersects(cop_geom): cop_download_df = cop_download_df.append( { 'title': cop_row["title"], 'start_date': cop_row['start_date'], 'sensor': cop_row['sensor'], 'relative_orbit': cop_row['relative_orbit'], 'geometry': cop_row['geometry'], 'esa_geometry': esa_row['geometry'] }, ignore_index=True) subset_cop_download_df = cop_download_df.drop_duplicates(['title']) if save_file: subset_cop_download_df.to_csv( outdir.joinpath('cophub_download_list.csv')) return subset_cop_download_df
def yield_top_point_slices( data: geopandas.GeoDataFrame ) -> Generator[geopandas.GeoDataFrame, None, None]: """Yields slices of the GeoDataFrame containing the top FRP point and the corresponding nearby points""" start = 0 for index, row in data.iterrows(): if index == 0: # avoid yielding first row by itself continue if row.is_top_point is True: stop = index yield data.iloc[start:stop, :] start = stop
def __init__(self, fault_geodataframe: gpd.GeoDataFrame, exclude_regions: List[Polygon] = None): for field in required_fields: if field not in fault_geodataframe.columns: raise ValueError("Missing required field: {}".format(field)) for field in expected_fields: if field not in fault_geodataframe.columns: print("Warning: missing expected field: {}".format(field)) self._faults = [] # If appropriate, clip out data that fall within exclude_regions if exclude_regions is not None: assert isinstance(exclude_regions, list) assert all([isinstance(a, Polygon) for a in exclude_regions]) exclude_regions_nztm = [] # Check that polygons are in NZTM, otherwise convert them for poly in exclude_regions: x, y = poly.exterior.xy if all(np.array(y) < 0): # Assume in WGS (Lon Lat), convert to NZTM new_x, new_y = transform_wgs2nztm.transform( np.array(x), np.array(y)) new_poly = Polygon([(xi, yi) for xi, yi in zip(new_x, new_y)]) exclude_regions_nztm.append(new_poly) else: # Assume NZTM, do nothing exclude_regions_nztm.append(poly) # Make list of faults outside region trimmed_fault_ls = [] for i, row in fault_geodataframe.iterrows(): if not any([ row.geometry.within(poly) for poly in exclude_regions_nztm ]): trimmed_fault_ls.append(row) trimmed_fault_gdf = gpd.GeoDataFrame(trimmed_fault_ls) else: trimmed_fault_gdf = fault_geodataframe # Sort alphabetically by name sorted_df = trimmed_fault_gdf.sort_values("FZ_Name") # Reset index to line up with alphabetical sorting sorted_df = sorted_df.reset_index(drop=True) for i, row in sorted_df.iterrows(): self.add_fault(row) self.df = sorted_df
def build(self, geo_df: GeoDataFrame): gdp_list = [] df_columns = list(geo_df.columns) for i, row in geo_df.iterrows(): polygons: List[Polygon] = row["geometry"].geoms # TODO: Check for non-polygon entries for poly in polygons: meta = { column: row[column] for column in list( filter(lambda x: x != "geometry", df_columns)) } gdp_list.append(GeoDataPoint(meta, poly)) self.strtree.build(gdp_list)
def bboxes_per_year( df: geopandas.GeoDataFrame, box_date: date, surrounding_metres: int) -> List[Tuple[EEBoundingBox, date]]: boxes: List[Tuple[EEBoundingBox, date]] = [] for _, row in df.iterrows(): boxes.append(( bounding_box_from_centre( mid_lat=row["lat"], mid_lon=row["lon"], surrounding_metres=surrounding_metres, ), box_date, )) return boxes
def create_get_urls(county_gdf: gpd.GeoDataFrame) -> List[str]: """ Create a list of urls to issue get requests Args: county_gdf: GeoDataFrame representing Indiana counties Returns: List of formatted urls to issue GET requests """ urls = [] for index, row in county_gdf.iterrows(): urls.append(config.ARDA_ESRI_REST_ENDPOINT.format(xmin=row.minx, ymin=row.miny, xmax=row.maxx, ymax=row.maxy)) return urls
def feature_collection(self, ft_name, **kwargs): """ Call bdq wfs feature_collection and format the result to a pandas DataFrame """ global cache cv_list = None if 'ts' in kwargs: cv_list = kwargs['ts'] del kwargs['ts'] fc = self.wfs.feature_collection(ft_name, **kwargs) geo_data = pd.DataFrame(fc['features']) geo_data = GeoDataFrame(geo_data, geometry='geometry', crs=fc['crs']['properties']['name']) metadata = {'total': fc['total'], 'total_features': fc['total_features']} # retrieve coverage attributes if cv_list is not None: # TODO: paralelizar essas chamadas for cv in cv_list: if type(cv['attributes']) is str: cv['attributes'] = tuple([cv['attributes']]) # create columns as object to support lists for c in cv['attributes']: name = '{}.{}'.format(cv['coverage'], c) geo_data[name] = '' geo_data[name] = geo_data[name].astype(object) s_date = None if 'start_date' in cv: s_date = cv['start_date']; e_date = None if 'end_date' in cv: e_date = cv['end_date']; for idx, row in geo_data.iterrows(): ts, ts_metadata = self.time_series(cv['coverage'], cv['attributes'], row['geometry'].y, row['geometry'].x, s_date, e_date) for c in cv['attributes']: name = '{}.{}'.format(cv['coverage'], c) if s_date == e_date and s_date is not None: if ts_metadata['total'] == 1: geo_data.set_value(idx, name, ts[c].values.tolist()[0]) else: geo_data.set_value(idx, name, None) else: geo_data.set_value(idx, name, ts[c].values.tolist()) return geo_data, metadata
def prepare_neighbor_net(gdf: GeoDataFrame, nbr: dict): for index, row in gdf.iterrows(): name = row[1] index = name region = gdf[gdf.name == index] print(index) nbr[index] = {"coords": baricenter(region)} disjoints = gdf.geometry.disjoint(row.geometry) print("disjoint", row, disjoints) neighbors = gdf[~disjoints].name.tolist() # neighbors = gdf[disjoints].name.tolist() if index in neighbors: neighbors.remove(index) print(index, neighbors) nbr[index]["nbr"] = neighbors nbr[index]["count"] = len(neighbors)
def build(self, geo_df: GeoDataFrame): self.gt = GeoTrie(self.gh_len) df_columns = list(geo_df.columns) for i, row in geo_df.iterrows(): polygons: List[Polygon] = row["geometry"].geoms # TODO: Check for non-polygon entries for poly in polygons: meta = { column: row[column] for column in list( filter(lambda x: x != "geometry", df_columns)) } gdp = GeoDataPoint(meta, poly) geos = self.__gh_intersecting(poly) for gh in geos: self.gt.insert(gh, gdp)
def get_total_obs_eqs(bin_gdf: gpd.GeoDataFrame, prospective: bool = False) -> list: """ Returns a list of all of the observed earthquakes within the model domain. """ obs_eqs = [] for i, row in bin_gdf.iterrows(): sb = row.SpacemagBin if prospective is False: for mb in sb.observed_earthquakes.values(): obs_eqs.extend(mb) else: for mb in sb.prospective_earthquakes.values(): obs_eqs.extend(mb) return obs_eqs
def get_source_bins(bin_gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: """ Returns a subset of all of the spatial bins, where each bin actually contains earthquake sources. :param bin_gdf: GeoDataFrame of bins :returns: GeoDataFrame of bins with sources """ source_list = [] for i, row in bin_gdf.iterrows(): cum_mfd = row.SpacemagBin.get_rupture_mfd(cumulative=True) if sum(cum_mfd.values()) > 0: source_list.append(i) source_bin_gdf = bin_gdf.loc[source_list] return source_bin_gdf
def split_plots( geodataframe: gpd.GeoDataFrame, target_col: str, separator: str = ',', ) -> gpd.GeoDataFrame: """ Splits plot rows in parts by based on separator :param geodataframe: :param target_col: column name :param separator: separator, default ',' :return: GeoDataFrame with split rows """ new_geodataframe = gpd.GeoDataFrame(columns=geodataframe.columns) for _, row in geodataframe.iterrows(): for c in str(row[target_col]).split(separator): new_row = row new_row[target_col] = c new_geodataframe = new_geodataframe.append(new_row) return new_geodataframe.reindex()
def points2grids(gdf: geopandas.GeoDataFrame, shift: int = 1 / 32): ''' Turns Livneh gridcell point geometry into a Livneh grid polygon. Inputs: (1) gdf: geopandas.GeoDataFrame contining the Livneh grid cell centroids. (2) shift: a radius shift from gridcell centroid to the grid polygon edges. 1/32nd degree by default. Output: a geopandas.GeoDataFrame containing the Livneh grids polygon geometry. ''' geometry: list = [] for i, row in gdf.iterrows(): w: float = float(row['lon']) - shift e: float = float(row['lon']) + shift n: float = float(row['lat']) + shift s: float = float(row['lat']) - shift lons, lats = [w, e, e, w], [n, n, s, s] geometry.append(Polygon(zip(lons, lats))) gdf['geometry'] = geometry return gdf