示例#1
0
def test_read_file_layer(tmp_path):
    df_points = geopandas.GeoDataFrame(
        {
            "col": [1, 2, 3, 4],
            "geometry": geopandas.points_from_xy([1, 2, 3, 4], [2, 3, 4, 1]),
        },
        crs=4326,
    )
    df_polygons = geopandas.GeoDataFrame(
        {
            "col": [5, 6, 7, 8],
            "geometry": [
                Polygon([(random.random(), random.random()) for i in range(3)])
                for _ in range(4)
            ],
        },
        crs=4326,
    )

    path = tmp_path / "test_layers.gpkg"
    df_points.to_file(path, layer="points")
    df_polygons.to_file(path, layer="polygons")

    ddf_points = dask_geopandas.read_file(path, npartitions=2, layer="points")
    assert_geodataframe_equal(ddf_points.compute(), df_points)
    ddf_polygons = dask_geopandas.read_file(path,
                                            npartitions=2,
                                            layer="polygons")
    assert_geodataframe_equal(ddf_polygons.compute(), df_polygons)
示例#2
0
def test_read_file_columns():
    path = geopandas.datasets.get_path("naturalearth_lowres")
    df = geopandas.read_file(path)

    # explicit column selection
    result = dask_geopandas.read_file(path,
                                      npartitions=4,
                                      columns=["pop_est", "geometry"])
    assert isinstance(result, dask_geopandas.GeoDataFrame)
    assert result.npartitions == 4
    assert result.crs == df.crs
    assert len(result.columns) == 2
    assert_geodataframe_equal(result.compute(), df[["pop_est", "geometry"]])
    # only selecting non-geometry column
    result = dask_geopandas.read_file(path, npartitions=4, columns=["pop_est"])
    assert type(result) == dd.DataFrame
    assert len(result.columns) == 1
    assert result.npartitions == 4
    assert_frame_equal(result.compute(), df[["pop_est"]])

    # column selection through getitem
    ddf = dask_geopandas.read_file(path, npartitions=4)
    result = ddf[["pop_est", "geometry"]]
    assert isinstance(result, dask_geopandas.GeoDataFrame)
    assert result.npartitions == 4
    assert result.crs == df.crs
    assert_geodataframe_equal(result.compute(), df[["pop_est", "geometry"]])

    # only select non-geometry column
    result = ddf["pop_est"]
    assert isinstance(result, dd.Series)
    assert_series_equal(result.compute(), df["pop_est"])

    # only select geometry column
    result = ddf["geometry"]
    assert isinstance(result, dask_geopandas.GeoSeries)
    assert_geoseries_equal(result.compute(), df["geometry"])
示例#3
0
def test_read_file():
    path = geopandas.datasets.get_path("naturalearth_lowres")
    df = geopandas.read_file(path)
    result = dask_geopandas.read_file(path, npartitions=4)
    assert isinstance(result, dask_geopandas.GeoDataFrame)
    assert result.npartitions == 4
    assert result.crs == df.crs
    assert_geodataframe_equal(result.compute(), df)

    result = dask_geopandas.read_file(path, chunksize=100)
    assert isinstance(result, dask_geopandas.GeoDataFrame)
    assert result.npartitions == 2
    assert result.crs == df.crs
    assert_geodataframe_equal(result.compute(), df)

    msg = "Exactly one of npartitions and chunksize must be specified"
    with pytest.raises(ValueError, match=msg):
        dask_geopandas.read_file(path)
    with pytest.raises(ValueError, match=msg):
        dask_geopandas.read_file(path, npartitions=4, chunksize=100)
示例#4
0
def test_read_file_index():
    path = geopandas.datasets.get_path("naturalearth_lowres")
    df = geopandas.read_file(path)
    result = dask_geopandas.read_file(path, npartitions=4)
    assert (result.index.compute() == pd.RangeIndex(0, len(df))).all()
示例#5
0
def test_read_file_divisions():
    path = geopandas.datasets.get_path("naturalearth_lowres")
    result = dask_geopandas.read_file(path, npartitions=4)
    assert result.known_divisions
    assert result.index.divisions == (0, 45, 90, 135, 176)
    assert result.divisions == (0, 45, 90, 135, 176)