def test_explode_pandas_fallback_ignore_index(self): d = { "col1": [["name1", "name2"], ["name3", "name4"]], "geometry": [ MultiPoint([(1, 2), (3, 4)]), MultiPoint([(2, 1), (0, 0)]), ], } gdf = GeoDataFrame(d, crs=4326) expected_df = GeoDataFrame( { "col1": ["name1", "name2", "name3", "name4"], "geometry": [ MultiPoint([(1, 2), (3, 4)]), MultiPoint([(1, 2), (3, 4)]), MultiPoint([(2, 1), (0, 0)]), MultiPoint([(2, 1), (0, 0)]), ], }, crs=4326, ) # Test with column provided as arg exploded_df = gdf.explode("col1", ignore_index=True) assert_geodataframe_equal(exploded_df, expected_df) # Test with column provided as kwarg exploded_df = gdf.explode(column="col1", ignore_index=True) assert_geodataframe_equal(exploded_df, expected_df)
def test_explode_pandas_multi_index_ignore_index(self, outer_index): index = MultiIndex.from_arrays( [[outer_index, outer_index, outer_index], [1, 2, 3]], names=("first", "second"), ) df = GeoDataFrame( {"vals": [1, 2, 3]}, geometry=[MultiPoint([(x, x), (x, 0)]) for x in range(3)], index=index, ) test_df = df.explode(ignore_index=True) expected_s = GeoSeries([ Point(0, 0), Point(0, 0), Point(1, 1), Point(1, 0), Point(2, 2), Point(2, 0), ]) expected_df = GeoDataFrame({ "vals": [1, 1, 2, 2, 3, 3], "geometry": expected_s }) expected_index = Index(range(len(expected_df))) expected_df = expected_df.set_index(expected_index) assert_frame_equal(test_df, expected_df) # index_parts is ignored if ignore_index=True test_df = df.explode(ignore_index=True, index_parts=True) assert_frame_equal(test_df, expected_df)
def test_explode_duplicated_index(self): df = GeoDataFrame( {"vals": [1, 2, 3]}, geometry=[MultiPoint([(x, x), (x, 0)]) for x in range(3)], index=[1, 1, 2], ) test_df = df.explode(index_parts=True) expected_index = MultiIndex.from_arrays( [[1, 1, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], ) expected_geometry = GeoSeries( [ Point(0, 0), Point(0, 0), Point(1, 1), Point(1, 0), Point(2, 2), Point(2, 0), ], index=expected_index, ) expected_df = GeoDataFrame( {"vals": [1, 1, 2, 2, 3, 3]}, geometry=expected_geometry, index=expected_index, ) assert_geodataframe_equal(test_df, expected_df)
def test_explode_order_mixed(self): df = GeoDataFrame( {"vals": [1, 2, 3]}, geometry=[MultiPoint([(x, x), (x, 0)]) for x in range(2)] + [Point(0, 10)], index=[2, 9, 7], ) test_df = df.explode(index_parts=True) expected_index = MultiIndex.from_arrays( [[2, 2, 9, 9, 7], [0, 1, 0, 1, 0]], ) expected_geometry = GeoSeries( [ Point(0, 0), Point(0, 0), Point(1, 1), Point(1, 0), Point(0, 10), ], index=expected_index, ) expected_df = GeoDataFrame( {"vals": [1, 1, 2, 2, 3]}, geometry=expected_geometry, index=expected_index, ) assert_geodataframe_equal(test_df, expected_df)
def test_explode_pandas_multi_index(self, outer_index): index = MultiIndex.from_arrays( [[outer_index, outer_index, outer_index], [1, 2, 3]], names=("first", "second"), ) df = GeoDataFrame( {"vals": [1, 2, 3]}, geometry=[MultiPoint([(x, x), (x, 0)]) for x in range(3)], index=index, ) test_df = df.explode(index_parts=True) expected_s = GeoSeries([ Point(0, 0), Point(0, 0), Point(1, 1), Point(1, 0), Point(2, 2), Point(2, 0), ]) expected_df = GeoDataFrame({ "vals": [1, 1, 2, 2, 3, 3], "geometry": expected_s }) expected_index = MultiIndex.from_tuples( [(outer_index, *pair) for pair in [(1, 0), (1, 1), (2, 0), (2, 1), (3, 0), (3, 1)]], names=["first", "second", None], ) expected_df = expected_df.set_index(expected_index) assert_frame_equal(test_df, expected_df)
def explode(gdf: gpd.GeoDataFrame, ratios: Iterable[str] = None) -> gpd.GeoDataFrame: """ Explode MultiPolygon to multiple Polygon geometries. Args: gdf: GeoDataFrame with non-zero-area (Multi)Polygon geometries. ratios: Names of columns to rescale by the area fraction of the Polygon relative to the MultiPolygon. If provided, MultiPolygon cannot self-intersect. By default, the original value is used unchanged. Raises: ValueError: Geometry contains self-intersecting MultiPolygon. Returns: GeoDataFrame with each Polygon as a separate row in the GeoDataFrame. The index is the number of the source row in the input GeoDataFrame. """ check_gdf(gdf) gdf = gdf.reset_index(drop=True) is_mpoly = gdf.geometry.geom_type == "MultiPolygon" if ratios and is_mpoly.any(): union_area = gdf.geometry[is_mpoly].apply(shapely.ops.unary_union).area if (union_area != gdf.geometry[is_mpoly].area).any(): raise ValueError( "Geometry contains self-intersecting MultiPolygon") result = gdf.explode().droplevel(1) if ratios: fraction = result.geometry.area.values / gdf.geometry.area[ result.index].values result[ratios] = result[ratios].multiply(fraction, axis="index") return result[gdf.columns]
def test_explode_geodataframe(self, index_name): s = GeoSeries([MultiPoint([Point(1, 2), Point(2, 3)]), Point(5, 5)]) df = GeoDataFrame({'col': [1, 2], 'geometry': s}) df.index.name = index_name test_df = df.explode() expected_s = GeoSeries([Point(1, 2), Point(2, 3), Point(5, 5)]) expected_df = GeoDataFrame({'geometry': expected_s, 'col': [1, 1, 2]}) expected_index = MultiIndex(levels=[[0, 1], [0, 1]], labels=[[0, 0, 1], [0, 1, 0]], names=[index_name, None]) expected_df = expected_df.set_index(expected_index) assert_frame_equal(test_df, expected_df)
def test_explode_geodataframe_no_multiindex(self, index_name): # GH1393 s = GeoSeries([MultiPoint([Point(1, 2), Point(2, 3)]), Point(5, 5)]) df = GeoDataFrame({"level_1": [1, 2], "geometry": s}) df.index.name = index_name test_df = df.explode(index_parts=False) expected_s = GeoSeries([Point(1, 2), Point(2, 3), Point(5, 5)]) expected_df = GeoDataFrame({"level_1": [1, 1, 2], "geometry": expected_s}) expected_index = Index([0, 0, 1], name=index_name) expected_df = expected_df.set_index(expected_index) assert_frame_equal(test_df, expected_df)
def process_terraces(bmp_gdf: geopandas.GeoDataFrame, name: str = "terrace"): """ Get representative points (locations) for each BMP feature in a geopandas geodataframe """ points = (bmp_gdf.explode().reset_index(drop=True).loc[:, BMPCOLS].pipe( _fix_ints, filter(lambda c: c.startswith("Present"), BMPCOLS)).rename( columns=lambda c: c.lower().replace("present", "isin")).assign( geometry=lambda gdf: gdf.geometry.apply(_smooth_line)).explode( ).rename_axis(["obj_id", "geo_id"], axis="index").reset_index()) return points
def test_explode_geodataframe(self, index_name): s = GeoSeries([MultiPoint([Point(1, 2), Point(2, 3)]), Point(5, 5)]) df = GeoDataFrame({'col': [1, 2], 'geometry': s}) df.index.name = index_name test_df = df.explode() expected_s = GeoSeries([Point(1, 2), Point(2, 3), Point(5, 5)]) expected_df = GeoDataFrame({'col': [1, 1, 2], 'geometry': expected_s}) expected_index = MultiIndex([[0, 1], [0, 1]], # levels [[0, 0, 1], [0, 1, 0]], # labels/codes names=[index_name, None]) expected_df = expected_df.set_index(expected_index) assert_frame_equal(test_df, expected_df)
def test_explode_order_no_multi(self): df = GeoDataFrame( {"vals": [1, 2, 3]}, geometry=[Point(0, x) for x in range(3)], index=[2, 9, 7], ) test_df = df.explode(index_parts=True) expected_index = MultiIndex.from_arrays([[2, 9, 7], [0, 0, 0]], ) expected_df = GeoDataFrame( {"vals": [1, 2, 3]}, geometry=[Point(0, x) for x in range(3)], index=expected_index, ) assert_geodataframe_equal(test_df, expected_df)
def test_explode_geodataframe(self, index_name): s = GeoSeries([MultiPoint([Point(1, 2), Point(2, 3)]), Point(5, 5)]) df = GeoDataFrame({"col": [1, 2], "geometry": s}) df.index.name = index_name test_df = df.explode() expected_s = GeoSeries([Point(1, 2), Point(2, 3), Point(5, 5)]) expected_df = GeoDataFrame({"col": [1, 1, 2], "geometry": expected_s}) expected_index = MultiIndex( [[0, 1], [0, 1]], # levels [[0, 0, 1], [0, 1, 0]], # labels/codes names=[index_name, None], ) expected_df = expected_df.set_index(expected_index) assert_frame_equal(test_df, expected_df)
def test_explode_geodataframe_level_1(self, index_name): # GH1393 s = GeoSeries([MultiPoint([Point(1, 2), Point(2, 3)]), Point(5, 5)]) df = GeoDataFrame({"level_1": [1, 2], "geometry": s}) df.index.name = index_name test_df = df.explode() expected_s = GeoSeries([Point(1, 2), Point(2, 3), Point(5, 5)]) expected_df = GeoDataFrame({"level_1": [1, 1, 2], "geometry": expected_s}) expected_index = MultiIndex( [[0, 1], [0, 1]], # levels [[0, 0, 1], [0, 1, 0]], # labels/codes names=[index_name, None], ) expected_df = expected_df.set_index(expected_index) if not compat.PANDAS_GE_024: expected_df = expected_df[["level_1", "geometry"]] assert_frame_equal(test_df, expected_df)
def aggregate_raw_PV_polygons_to_raw_PV_installations( self, raw_PV_polygons_gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: """ Aggregate raw PV polygons belonging to the same PV installation. Raw refers to the fact that the PV area is not corrected by the tilt angle. For each PV installation, we compute its raw area and a unique identifier. Parameters ---------- raw_PV_polygons_gdf : GeoPandas.GeoDataFrame GeoDataFrame which contains all the raw PV polygons which have been detected during the previous pipeline step. Returns ------- GeoPandas.GeoDataFrame GeoDataFrame with dissolved PV polygon geometries. """ # Buffer polygons, i.e. overwrite the original polygons with their buffered versions # Based on our experience, the buffer value should be within [1e-6, 1e-8] degrees raw_PV_polygons_gdf["geometry"] = raw_PV_polygons_gdf[ "geometry"].buffer(1e-6) # Dissolve, i.e. aggregate, all PV polygons into one Multipolygon raw_PV_polygons_gdf = raw_PV_polygons_gdf.dissolve(by="class") # Explode multi-part geometries into multiple single geometries raw_PV_installations_gdf = ( raw_PV_polygons_gdf.explode().reset_index().drop( columns=["level_1"])) # Compute the raw area for each pv installation raw_PV_installations_gdf["raw_area"] = ( raw_PV_installations_gdf["geometry"].to_crs(epsg=5243).area) # Create a unique identifier for each pv installation raw_PV_installations_gdf[ "identifier"] = raw_PV_installations_gdf.index.map( lambda id: "polygon_" + str(id)) return raw_PV_installations_gdf
def import_geodataframe( self, gdf: gpd.GeoDataFrame, tablename: str, gpd_kwargs: dict = {}, uid_col: str = "uid", explode: bool = False, ) -> None: """ TODO: option to use multipart features instead of exploding to singlepart """ gdf = gdf.copy() gdf = helpers.sanitize_df_for_sql(gdf) epsg_code = int(str(gdf.crs).split(":")[1]) # Get a list of all geometry types in the dataframe geom_types = list(gdf.geometry.geom_type.unique()) # If there are multi- and single-part features, explode to singlepart if explode: # Explode multipart to singlepart and reset the index gdf = gdf.explode(index_parts=True) gdf["explode"] = gdf.index.to_numpy() gdf = gdf.reset_index() else: if len(geom_types) > 1: print(f"Warning! This dataset has {geom_types=}") print("Run with explode=True") return None # Use the non-multi version of the geometry geom_type_to_use = min(geom_types, key=len).upper() # Replace the 'geom' column with 'geometry' if "geom" in gdf.columns: gdf["geometry"] = gdf["geom"] gdf.drop(labels="geom", axis=1, inplace=True) # Drop the 'gid' column if "gid" in gdf.columns: gdf.drop(labels="gid", axis=1, inplace=True) # Rename 'uid' to 'old_uid' if uid_col in gdf.columns: gdf[f"old_{uid_col}"] = gdf[uid_col] gdf.drop(labels=uid_col, axis=1, inplace=True) # Build a 'geom' column using geoalchemy2 # and drop the source 'geometry' column gdf["geom"] = gdf["geometry"].apply( lambda x: WKTElement(x.wkt, srid=epsg_code)) gdf.drop(labels="geometry", axis=1, inplace=True) # Ensure that the target schema exists schema, tbl = helpers.convert_full_tablename_to_parts(tablename) self.schema_add(schema) # Write geodataframe to SQL database engine = sqlalchemy.create_engine(self.uri) gdf.to_sql( tbl, engine, schema=schema, dtype={"geom": Geometry(geom_type_to_use, srid=epsg_code)}, **gpd_kwargs, ) engine.dispose() self.table_add_uid_column(tablename) self.gis_table_add_spatial_index(tablename)
def explode_multipolygons(polygons: geopandas.GeoDataFrame): return polygons.explode()