示例#1
0
def test_from_dask_dataframe_with_column_name():
    df = pd.DataFrame({"x": [0, 1, 2, 3], "y": [1, 2, 3, 4]})
    df["geoms"] = geopandas.points_from_xy(df["x"], df["y"])
    dask_obj = dd.from_pandas(df, npartitions=2)
    dask_obj = dask_geopandas.from_dask_dataframe(dask_obj, geometry="geoms")
    expected = geopandas.GeoDataFrame(df, geometry="geoms")
    assert_geodataframe_equal(dask_obj.compute(), expected)
示例#2
0
def test_from_dask_dataframe_with_dask_geoseries():
    df = pd.DataFrame({"x": [0, 1, 2, 3], "y": [1, 2, 3, 4]})
    dask_obj = dd.from_pandas(df, npartitions=2)
    dask_obj = dask_geopandas.from_dask_dataframe(
        dask_obj, geometry=dask_geopandas.points_from_xy(dask_obj, "x", "y"))
    expected = df.set_geometry(geopandas.points_from_xy(df["x"], df["y"]))
    assert_geoseries_equal(dask_obj.geometry.compute(), expected.geometry)
示例#3
0
def test_points_from_xy():
    x = [1, 2, 3, 4, 5]
    y = [4, 5, 6, 7, 8]
    expected = geopandas.points_from_xy(x, y)
    df = pd.DataFrame({"x": x, "y": y})
    ddf = dd.from_pandas(df, npartitions=2)
    actual = dask_geopandas.points_from_xy(ddf)
    assert isinstance(actual, dask_geopandas.GeoSeries)
    assert list(actual) == list(expected)

    # assign to geometry column and convert to GeoDataFrame
    df["geometry"] = expected
    expected = geopandas.GeoDataFrame(df)
    ddf["geometry"] = actual
    ddf = dask_geopandas.from_dask_dataframe(ddf)
    result = ddf.compute()
    assert_geodataframe_equal(result, expected)
示例#4
0
def spatial_join_map_partition(points_filepath,
                               nuts_filepath,
                               blocksize=1_000_000):
    def spatial_join(gdf_regions):
        def compute_spatial_join(df):
            df = sjoin(df, gdf_regions, how='left')
            return df[['nuts_id', 'amenity', 'osm_id']]

        return compute_spatial_join

    gdf_nuts = gpd.read_file(nuts_filepath,
                             ignore_fields=[
                                 'levl_code', 'cntr_code', 'name_latn',
                                 'nuts_name', 'population'
                             ],
                             driver='GPKG')

    ddf_amenity = dd.read_csv(points_filepath, blocksize=blocksize)
    ddf_amenity = dask_geopandas.from_dask_dataframe(ddf_amenity)
    ddf_amenity = ddf_amenity.set_geometry(
        dask_geopandas.points_from_xy(ddf_amenity, 'lon', 'lat'))
    ddf_amenity = ddf_amenity.set_crs(pyproj.CRS(4326))
    print(ddf_amenity.npartitions)

    ddf_amenity = ddf_amenity.map_partitions(spatial_join(gdf_nuts.copy()),
                                             meta={
                                                 'nuts_id': object,
                                                 'amenity': object,
                                                 'osm_id': object
                                             })

    s = ddf_amenity.groupby(['nuts_id', 'amenity'])['osm_id'] \
                   .count() \
                   .compute()
    s.name = 'counts'
    df = s.reset_index()

    return df