Exemplo n.º 1
0
def from_shapely(data):
    """Converts shapely based data formats to spatialpandas.GeoDataFrame.

    Args:
        data: A list of shapely objects or dictionaries containing
              shapely objects

    Returns:
        A GeoDataFrame containing the shapely geometry data.
    """

    from spatialpandas import GeoDataFrame, GeoSeries
    from shapely.geometry.base import BaseGeometry

    if not data:
        pass
    elif all(isinstance(d, BaseGeometry) for d in data):
        data = GeoSeries(data).to_frame()
    elif all(
            isinstance(d, dict) and 'geometry' in d
            and isinstance(d['geometry'], BaseGeometry) for d in data):
        new_data = {col: [] for col in data[0]}
        for d in data:
            for col, val in d.items():
                new_data[col].append(val if isscalar(val) or isinstance(
                    val, BaseGeometry) else np.asarray(val))
        new_data['geometry'] = GeoSeries(new_data['geometry'])
        data = GeoDataFrame(new_data)
    return data
Exemplo n.º 2
0
def test_multipoint_cx_series_selection(gp_multipoint, rect):
    x0, y0, x1, y1 = rect
    expected = GeoSeries(gp_multipoint.cx[x0:x1, y0:y1], dtype='multipoint')

    sp_multipoint = GeoSeries(gp_multipoint)
    result = sp_multipoint.cx[x0:x1, y0:y1]
    assert_series_equal(expected, result, obj='GeoSeries')
Exemplo n.º 3
0
def test_parquet_dask(gp_multipoint, gp_multiline, tmp_path):
    # Build dataframe
    n = min(len(gp_multipoint), len(gp_multiline))
    df = GeoDataFrame({
        'points': GeoSeries(gp_multipoint[:n]),
        'lines': GeoSeries(gp_multiline[:n]),
        'a': list(range(n))
    })
    ddf = dd.from_pandas(df, npartitions=3)

    path = tmp_path / 'ddf.parq'
    ddf.to_parquet(path)
    ddf_read = read_parquet_dask(path)

    # Check type
    assert isinstance(ddf_read, DaskGeoDataFrame)

    # Check that partition bounds were loaded
    assert set(ddf_read._partition_bounds) == {'points', 'lines'}
    pd.testing.assert_frame_equal(
        ddf['points'].partition_bounds,
        ddf_read._partition_bounds['points'],
    )
    pd.testing.assert_frame_equal(
        ddf['lines'].partition_bounds,
        ddf_read._partition_bounds['lines'],
    )
Exemplo n.º 4
0
def test_pack_partitions(gp_multipoint, gp_multiline):
    # Build dataframe
    n = min(len(gp_multipoint), len(gp_multiline))
    df = GeoDataFrame({
        'points': GeoSeries(gp_multipoint[:n]),
        'lines': GeoSeries(gp_multiline[:n]),
        'a': list(range(n))
    }).set_geometry('lines')
    ddf = dd.from_pandas(df, npartitions=3)

    # Pack partitions
    ddf_packed = ddf.pack_partitions(npartitions=4)

    # Check the number of partitions
    assert ddf_packed.npartitions == 4

    # Check that rows are now sorted in order of hilbert distance
    total_bounds = df.lines.total_bounds
    hilbert_distances = ddf_packed.lines.map_partitions(
        lambda s: s.hilbert_distance(total_bounds=total_bounds)).compute(
        ).values

    # Compute expected total_bounds
    expected_distances = np.sort(
        df.lines.hilbert_distance(total_bounds=total_bounds).values)

    np.testing.assert_equal(expected_distances, hilbert_distances)
Exemplo n.º 5
0
def test_points_intersects_multipolygon(gp_points, gp_multipolygon):
    # Get scalar MultiPolygon
    sg_multipolygon = gp_multipolygon[0]

    # Compute expected intersection
    expected = gp_points.intersects(sg_multipolygon)

    # Create spatialpandas objects
    multipolygon = MultiPolygon.from_shapely(sg_multipolygon)
    points = PointArray.from_geopandas(gp_points)
    points_series = GeoSeries(points, index=np.arange(10, 10 + len(points)))

    # Test Point.intersects
    result = np.array([
        point_el.intersects(multipolygon) for point_el in points
    ])
    np.testing.assert_equal(result, expected)

    # Test PointArray.intersect
    result = points.intersects(multipolygon)
    np.testing.assert_equal(result, expected)

    # Test PointArray.intersects with inds
    inds = np.flipud(np.arange(0, len(points)))
    result = points.intersects(multipolygon, inds)
    np.testing.assert_equal(result, np.flipud(expected))

    # Test GeoSeries.intersects
    pd.testing.assert_series_equal(
        points_series.intersects(multipolygon),
        pd.Series(expected, index=points_series.index)
    )
Exemplo n.º 6
0
def test_points_intersects_line(gp_points, gp_line):
    # Get scalar Line
    sg_line = gp_line[0]

    # Compute expected intersection
    expected = gp_points.intersects(sg_line)

    # Create spatialpandas objects
    line = Line.from_shapely(sg_line)
    points = PointArray.from_geopandas(gp_points)
    points_series = GeoSeries(points, index=np.arange(10, 10 + len(points)))

    # Test Point.intersects
    result = np.array([
        point_el.intersects(line) for point_el in points
    ])
    np.testing.assert_equal(result, expected)

    # Test PointArray.intersect
    result = points.intersects(line)
    np.testing.assert_equal(result, expected)

    # Test PointArray.intersects with inds
    inds = np.flipud(np.arange(0, len(points)))
    result = points.intersects(line, inds)
    np.testing.assert_equal(result, np.flipud(expected))

    # Test GeoSeries.intersects
    pd.testing.assert_series_equal(
        points_series.intersects(line),
        pd.Series(expected, index=points_series.index)
    )
Exemplo n.º 7
0
def test_points_intersects_multipoint(gp_points, gp_multipoint):
    # Get scalar Point
    sg_multipoint = gp_multipoint[0]
    if len(gp_points) > 0:
        # Add gp_point to gp_multipoints so we know something will intersect
        gp_points = from_shapely(list(gp_points) + [gp_multipoint[0][-1]])

    # Compute expected intersection
    expected = gp_points.intersects(sg_multipoint)

    # Create spatialpandas PointArray
    multipoint = MultiPoint.from_shapely(sg_multipoint)
    points = PointArray.from_geopandas(gp_points)
    points_series = GeoSeries(points, index=np.arange(10, 10 + len(points)))

    # Test Point.intersects
    result = np.array([
        point_el.intersects(multipoint) for point_el in points
    ])
    np.testing.assert_equal(result, expected)

    # Test PointArray.intersect
    result = points.intersects(multipoint)
    np.testing.assert_equal(result, expected)

    # Test PointArray.intersects with inds
    inds = np.flipud(np.arange(0, len(points)))
    result = points.intersects(multipoint, inds)
    np.testing.assert_equal(result, np.flipud(expected))

    # Test GeoSeries.intersects
    pd.testing.assert_series_equal(
        points_series.intersects(multipoint),
        pd.Series(expected, index=points_series.index)
    )
Exemplo n.º 8
0
def test_multipoint_cx_series_selection_dask(gp_multipoint, rect):
    x0, y0, x1, y1 = rect
    expected = GeoSeries(gp_multipoint.cx[x0:x1, y0:y1], dtype='multipoint')

    sp_multipoint = dd.from_pandas(GeoSeries(gp_multipoint), npartitions=3)
    result = sp_multipoint.cx[x0:x1, y0:y1].compute()
    assert_series_equal(expected, result, obj='GeoSeries')
Exemplo n.º 9
0
def test_multipoint_cx_frame_selection(gp_multipoint, rect):
    x0, y0, x1, y1 = rect
    expected = GeoDataFrame(
        GeoSeries(gp_multipoint.cx[x0:x1, y0:y1], dtype='multipoint'))

    sp_multipoint = GeoSeries(gp_multipoint).to_frame()
    result = sp_multipoint.cx[x0:x1, y0:y1]
    assert_frame_equal(expected, result, obj='GeoDataFrame')
Exemplo n.º 10
0
def test_pack_partitions_to_parquet_glob(gp_multipoint1, gp_multiline1,
                                         gp_multipoint2, gp_multiline2,
                                         tmp_path):
    # Build dataframe1
    n = min(len(gp_multipoint1), len(gp_multiline1))
    df1 = GeoDataFrame({
        'points': GeoSeries(gp_multipoint1[:n]),
        'lines': GeoSeries(gp_multiline1[:n]),
        'a': list(range(n))
    }).set_geometry('lines')
    ddf1 = dd.from_pandas(df1, npartitions=3)
    path1 = tmp_path / 'ddf1.parq'
    ddf_packed1 = ddf1.pack_partitions_to_parquet(str(path1), npartitions=3)

    # Build dataframe2
    n = min(len(gp_multipoint2), len(gp_multiline2))
    df2 = GeoDataFrame({
        'points': GeoSeries(gp_multipoint2[:n]),
        'lines': GeoSeries(gp_multiline2[:n]),
        'a': list(range(n))
    }).set_geometry('lines')
    ddf2 = dd.from_pandas(df2, npartitions=3)
    path2 = tmp_path / 'ddf2.parq'
    ddf_packed2 = ddf2.pack_partitions_to_parquet(str(path2), npartitions=4)

    # Load both packed datasets with glob
    ddf_globbed = read_parquet_dask(tmp_path / "ddf*.parq", geometry="lines")

    # Check the number of partitions (< 7 can happen in the case of empty partitions)
    assert ddf_globbed.npartitions <= 7

    # Check contents
    expected_df = pd.concat([ddf_packed1.compute(), ddf_packed2.compute()])
    df_globbed = ddf_globbed.compute()
    pd.testing.assert_frame_equal(df_globbed, expected_df)

    # Check partition bounds
    expected_bounds = {
        'points':
        pd.concat([
            ddf_packed1._partition_bounds['points'],
            ddf_packed2._partition_bounds['points'],
        ]).reset_index(drop=True),
        'lines':
        pd.concat([
            ddf_packed1._partition_bounds['lines'],
            ddf_packed2._partition_bounds['lines'],
        ]).reset_index(drop=True),
    }
    expected_bounds['points'].index.name = 'partition'
    expected_bounds['lines'].index.name = 'partition'
    pd.testing.assert_frame_equal(expected_bounds['points'],
                                  ddf_globbed._partition_bounds['points'])

    pd.testing.assert_frame_equal(expected_bounds['lines'],
                                  ddf_globbed._partition_bounds['lines'])

    assert ddf_globbed.geometry.name == 'lines'
Exemplo n.º 11
0
def test_pack_partitions_to_parquet(gp_multipoint, gp_multiline,
                                    use_temp_format, tmp_path_factory):
    with tmp_path_factory.mktemp("spatialpandas", numbered=True) as tmp_path:
        # Build dataframe
        n = min(len(gp_multipoint), len(gp_multiline))
        df = GeoDataFrame({
            'points': GeoSeries(gp_multipoint[:n]),
            'lines': GeoSeries(gp_multiline[:n]),
            'a': list(range(n))
        }).set_geometry('lines')
        ddf = dd.from_pandas(df, npartitions=3)

        path = tmp_path / 'ddf.parq'
        if use_temp_format:
            (tmp_path / 'scratch').mkdir(parents=True, exist_ok=True)
            tempdir_format = str(tmp_path / 'scratch' /
                                 'part-{uuid}-{partition:03d}')
        else:
            tempdir_format = None

        _retry_args = dict(wait_exponential_multiplier=10,
                           wait_exponential_max=20000,
                           stop_max_attempt_number=4)

        ddf_packed = ddf.pack_partitions_to_parquet(
            str(path),
            npartitions=12,
            tempdir_format=tempdir_format,
            _retry_args=_retry_args,
        )

        # Check the number of partitions (< 4 can happen in the case of empty partitions)
        assert ddf_packed.npartitions <= 12

        # Check that rows are now sorted in order of hilbert distance
        total_bounds = df.lines.total_bounds
        hilbert_distances = ddf_packed.lines.map_partitions(
            lambda s: s.hilbert_distance(total_bounds=total_bounds)).compute(
            ).values

        # Compute expected total_bounds
        expected_distances = np.sort(
            df.lines.hilbert_distance(total_bounds=total_bounds).values)

        np.testing.assert_equal(expected_distances, hilbert_distances)
        assert ddf_packed.geometry.name == 'points'

        # Read columns
        columns = ['a', 'lines']
        ddf_read_cols = read_parquet_dask(path, columns=columns)
        pd.testing.assert_frame_equal(ddf_read_cols.compute(),
                                      ddf_packed[columns].compute())
Exemplo n.º 12
0
def test_parquet(gp_point, gp_multipoint, gp_multiline, tmp_path):
    # Build dataframe
    n = min(len(gp_multipoint), len(gp_multiline))
    df = GeoDataFrame({
        'point': GeoSeries(gp_point[:n]),
        'multipoint': GeoSeries(gp_multipoint[:n]),
        'multiline': GeoSeries(gp_multiline[:n]),
        'a': list(range(n))
    })

    path = tmp_path / 'df.parq'
    to_parquet(df, path)
    df_read = read_parquet(path)
    assert isinstance(df_read, GeoDataFrame)
    assert all(df == df_read)
Exemplo n.º 13
0
def test_parquet_columns(gp_point, gp_multipoint, gp_multiline, tmp_path):
    # Build dataframe
    n = min(len(gp_multipoint), len(gp_multiline))
    df = GeoDataFrame({
        'point': GeoSeries(gp_point[:n]),
        'multipoint': GeoSeries(gp_multipoint[:n]),
        'multiline': GeoSeries(gp_multiline[:n]),
        'a': list(range(n))
    })

    path = tmp_path / 'df.parq'
    to_parquet(df, path)
    columns = ['a', 'multiline']
    df_read = read_parquet(str(path), columns=columns)
    assert isinstance(df_read, GeoDataFrame)
    pd.testing.assert_frame_equal(df[columns], df_read)
Exemplo n.º 14
0
def test_pack_partitions_to_parquet(gp_multipoint, gp_multiline,
                                    use_temp_format, tmp_path):
    # Build dataframe
    n = min(len(gp_multipoint), len(gp_multiline))
    df = GeoDataFrame({
        'points': GeoSeries(gp_multipoint[:n]),
        'lines': GeoSeries(gp_multiline[:n]),
        'a': list(range(n))
    }).set_geometry('lines')
    ddf = dd.from_pandas(df, npartitions=3)

    path = tmp_path / 'ddf.parq'
    if use_temp_format:
        tempdir_format = str(tmp_path / 'scratch' /
                             'part-{uuid}-{partition:03d}')
    else:
        tempdir_format = None

    ddf_packed = ddf.pack_partitions_to_parquet(path,
                                                npartitions=12,
                                                tempdir_format=tempdir_format)

    # Check the number of partitions (< 4 can happen in the case of empty partitions)
    assert ddf_packed.npartitions <= 12

    # Check that rows are now sorted in order of hilbert distance
    total_bounds = df.lines.total_bounds
    hilbert_distances = ddf_packed.lines.map_partitions(
        lambda s: s.hilbert_distance(total_bounds=total_bounds)).compute(
        ).values

    # Compute expected total_bounds
    expected_distances = np.sort(
        df.lines.hilbert_distance(total_bounds=total_bounds).values)

    np.testing.assert_equal(expected_distances, hilbert_distances)
    assert ddf_packed.geometry.name == 'points'

    # Read columns
    columns = ['a', 'lines']
    ddf_read_cols = read_parquet_dask(path,
                                      columns=columns + ['hilbert_distance'])
    pd.testing.assert_frame_equal(ddf_read_cols.compute(),
                                  ddf_packed[columns].compute())
Exemplo n.º 15
0
def test_parquet(gp_point, gp_multipoint, gp_multiline, tmp_path):
    # Build dataframe
    n = min(len(gp_multipoint), len(gp_multiline))
    df = GeoDataFrame({
        'point': GeoSeries(gp_point[:n]),
        'multipoint': GeoSeries(gp_multipoint[:n]),
        'multiline': GeoSeries(gp_multiline[:n]),
        'a': list(range(n))
    })

    df.index.name = 'range_idx'

    path = tmp_path / 'df.parq'
    to_parquet(df, path)
    df_read = read_parquet(str(path),
                           columns=['point', 'multipoint', 'multiline', 'a'])
    assert isinstance(df_read, GeoDataFrame)
    pd.testing.assert_frame_equal(df, df_read)
    assert df_read.index.name == df.index.name
Exemplo n.º 16
0
def test_parquet_dask(gp_multipoint, gp_multiline, tmp_path_factory):
    with tmp_path_factory.mktemp("spatialpandas", numbered=True) as tmp_path:
        # Build dataframe
        n = min(len(gp_multipoint), len(gp_multiline))
        df = GeoDataFrame({
            'points': GeoSeries(gp_multipoint[:n]),
            'lines': GeoSeries(gp_multiline[:n]),
            'a': list(range(n))
        })
        ddf = dd.from_pandas(df, npartitions=3)

        path = tmp_path / 'ddf.parq'
        ddf.to_parquet(str(path))
        ddf_read = read_parquet_dask(str(path))

        # Check type
        assert isinstance(ddf_read, DaskGeoDataFrame)

        # Check that partition bounds were loaded
        nonempty = np.nonzero(
            np.asarray(ddf.map_partitions(len).compute() > 0))[0]
        assert set(ddf_read._partition_bounds) == {'points', 'lines'}
        expected_partition_bounds = (
            ddf['points'].partition_bounds.iloc[nonempty].reset_index(
                drop=True))
        expected_partition_bounds.index.name = 'partition'

        pd.testing.assert_frame_equal(
            expected_partition_bounds,
            ddf_read._partition_bounds['points'],
        )

        expected_partition_bounds = (
            ddf['lines'].partition_bounds.iloc[nonempty].reset_index(
                drop=True))
        expected_partition_bounds.index.name = 'partition'
        pd.testing.assert_frame_equal(
            expected_partition_bounds,
            ddf_read._partition_bounds['lines'],
        )

        assert ddf_read.geometry.name == 'points'
Exemplo n.º 17
0
    def split(cls, dataset, start, end, datatype, **kwargs):
        from spatialpandas import GeoDataFrame, GeoSeries
        from ...element import Polygons

        objs = []
        if not len(dataset.data):
            return []
        xdim, ydim = cls.geom_dims(dataset)
        value_dims = [
            dim for dim in dataset.kdims + dataset.vdims
            if dim not in (xdim, ydim)
        ]
        row = dataset.data.iloc[0]
        col = cls.geo_column(dataset.data)
        geom_type = cls.geom_type(dataset)
        if datatype is not None:
            arr = geom_to_array(row[col], geom_type=geom_type)
            d = {(xdim.name, ydim.name): arr}
            d.update({dim.name: row[dim.name] for dim in value_dims})
            ds = dataset.clone(d, datatype=['dictionary'])

        holes = cls.holes(dataset) if cls.has_holes(dataset) else None
        for i, row in dataset.data.iterrows():
            if datatype is None:
                gdf = GeoDataFrame({
                    c: GeoSeries([row[c]]) if c == 'geometry' else [row[c]]
                    for c in dataset.data.columns
                })
                objs.append(dataset.clone(gdf))
                continue

            geom = row[col]
            gt = geom_type or get_geom_type(dataset.data, col)
            arr = geom_to_array(geom, geom_type=gt)
            d = {xdim.name: arr[:, 0], ydim.name: arr[:, 1]}
            d.update({dim.name: row[dim.name] for dim in value_dims})
            if datatype in ('dictionary', 'columns'):
                if holes is not None:
                    d[Polygons._hole_key] = holes[i]
                d['geom_type'] = gt
                objs.append(d)
                continue

            ds.data = d
            if datatype == 'array':
                obj = ds.array(**kwargs)
            elif datatype == 'dataframe':
                obj = ds.dframe(**kwargs)
            else:
                raise ValueError("%s datatype not support" % datatype)
            objs.append(obj)
        return objs
Exemplo n.º 18
0
def test_multipoint_array_to_geopandas(gp_multipoint):
    result = GeoSeries(gp_multipoint, dtype='multipoint').to_geopandas()
    assert_series_equal(result, gp_multipoint)
Exemplo n.º 19
0
def test_pack_partitions_to_parquet_list_bounds(
    gp_multipoint1,
    gp_multiline1,
    gp_multipoint2,
    gp_multiline2,
    bounds,
    tmp_path,
):
    # Build dataframe1
    n = min(len(gp_multipoint1), len(gp_multiline1))
    df1 = GeoDataFrame({
        'points': GeoSeries(gp_multipoint1[:n]),
        'lines': GeoSeries(gp_multiline1[:n]),
        'a': list(range(n))
    }).set_geometry('lines')
    ddf1 = dd.from_pandas(df1, npartitions=3)
    path1 = tmp_path / 'ddf1.parq'
    ddf_packed1 = ddf1.pack_partitions_to_parquet(str(path1), npartitions=3)

    # Build dataframe2
    n = min(len(gp_multipoint2), len(gp_multiline2))
    df2 = GeoDataFrame({
        'points': GeoSeries(gp_multipoint2[:n]),
        'lines': GeoSeries(gp_multiline2[:n]),
        'a': list(range(n))
    }).set_geometry('lines')
    ddf2 = dd.from_pandas(df2, npartitions=3)
    path2 = tmp_path / 'ddf2.parq'
    ddf_packed2 = ddf2.pack_partitions_to_parquet(str(path2), npartitions=4)

    # Load both packed datasets with glob
    ddf_read = read_parquet_dask(
        [str(tmp_path / "ddf1.parq"),
         str(tmp_path / "ddf2.parq")],
        geometry="points",
        bounds=bounds)

    # Check the number of partitions (< 7 can happen in the case of empty partitions)
    assert ddf_read.npartitions <= 7

    # Check contents
    xslice = slice(bounds[0], bounds[2])
    yslice = slice(bounds[1], bounds[3])
    expected_df = pd.concat([
        ddf_packed1.cx_partitions[xslice, yslice].compute(),
        ddf_packed2.cx_partitions[xslice, yslice].compute()
    ])
    df_read = ddf_read.compute()
    pd.testing.assert_frame_equal(df_read, expected_df)

    # Compute expected partition bounds
    points_bounds = pd.concat([
        ddf_packed1._partition_bounds['points'],
        ddf_packed2._partition_bounds['points'],
    ]).reset_index(drop=True)

    x0, y0, x1, y1 = bounds
    x0, x1 = (x0, x1) if x0 <= x1 else (x1, x0)
    y0, y1 = (y0, y1) if y0 <= y1 else (y1, y0)
    partition_inds = ~((points_bounds.x1 < x0) | (points_bounds.y1 < y0) |
                       (points_bounds.x0 > x1) | (points_bounds.y0 > y1))
    points_bounds = points_bounds[partition_inds].reset_index(drop=True)

    lines_bounds = pd.concat([
        ddf_packed1._partition_bounds['lines'],
        ddf_packed2._partition_bounds['lines'],
    ]).reset_index(drop=True)[partition_inds].reset_index(drop=True)
    points_bounds.index.name = 'partition'
    lines_bounds.index.name = 'partition'

    # Check partition bounds
    pd.testing.assert_frame_equal(points_bounds,
                                  ddf_read._partition_bounds['points'])

    pd.testing.assert_frame_equal(lines_bounds,
                                  ddf_read._partition_bounds['lines'])

    # Check active geometry column
    assert ddf_read.geometry.name == 'points'
Exemplo n.º 20
0
def test_multiline_array_to_geopandas(gp_multiline):
    result = GeoSeries(gp_multiline, dtype='multiline').to_geopandas()
    assert_series_equal(result, gp_multiline)
Exemplo n.º 21
0
def to_spatialpandas(data, xdim, ydim, columns=[], geom='point'):
    """Converts list of dictionary format geometries to spatialpandas line geometries.

    Args:
        data: List of dictionaries representing individual geometries
        xdim: Name of x-coordinates column
        ydim: Name of y-coordinates column
        columns: List of columns to add
        geom: The type of geometry

    Returns:
        A spatialpandas.GeoDataFrame version of the data
    """
    from spatialpandas import GeoSeries, GeoDataFrame
    from spatialpandas.geometry import (Point, Line, Polygon, Ring, LineArray,
                                        PolygonArray, PointArray,
                                        MultiLineArray, MultiPolygonArray,
                                        MultiPointArray, RingArray)
    from ...element import Polygons
    poly = any(Polygons._hole_key in d for d in data) or geom == 'Polygon'
    if poly:
        geom_type = Polygon
        single_array, multi_array = PolygonArray, MultiPolygonArray
    elif geom == 'Line':
        geom_type = Line
        single_array, multi_array = LineArray, MultiLineArray
    elif geom == 'Ring':
        geom_type = Ring
        single_array, multi_array = RingArray, MultiLineArray
    else:
        geom_type = Point
        single_array, multi_array = PointArray, MultiPointArray

    array_type = None
    hole_arrays, geom_arrays = [], []
    for geom in data:
        geom = dict(geom)
        if xdim not in geom or ydim not in geom:
            raise ValueError('Could not find geometry dimensions')
        xs, ys = geom.pop(xdim), geom.pop(ydim)
        xscalar, yscalar = isscalar(xs), isscalar(ys)
        if xscalar and yscalar:
            xs, ys = np.array([xs]), np.array([ys])
        elif xscalar:
            xs = np.full_like(ys, xs)
        elif yscalar:
            ys = np.full_like(xs, ys)
        geom_array = np.column_stack([xs, ys])

        if geom_type in (Polygon, Ring):
            geom_array = ensure_ring(geom_array)

        splits = np.where(
            np.isnan(geom_array[:, :2].astype('float')).sum(axis=1))[0]
        split_geoms = np.split(geom_array, splits +
                               1) if len(splits) else [geom_array]
        split_holes = geom.pop(Polygons._hole_key, None)
        if split_holes is not None:
            if len(split_holes) != len(split_geoms):
                raise DataError(
                    'Polygons with holes containing multi-geometries '
                    'must declare a list of holes for each geometry.',
                    SpatialPandasInterface)
            else:
                split_holes = [[ensure_ring(np.asarray(h)) for h in hs]
                               for hs in split_holes]

        geom_arrays.append(split_geoms)
        hole_arrays.append(split_holes)
        if geom_type is Point:
            if len(splits) > 1 or any(len(g) > 1 for g in split_geoms):
                array_type = multi_array
            elif array_type is None:
                array_type = single_array
        elif len(splits):
            array_type = multi_array
        elif array_type is None:
            array_type = single_array

    converted = defaultdict(list)
    for geom, arrays, holes in zip(data, geom_arrays, hole_arrays):
        parts = []
        for i, g in enumerate(arrays):
            if i != (len(arrays) - 1):
                g = g[:-1]
            if len(g) < (3 if poly else 2) and geom_type is not Point:
                continue
            if poly:
                parts.append([])
                subparts = parts[-1]
            else:
                subparts = parts
            subparts.append(g[:, :2])
            if poly and holes is not None:
                subparts += [np.array(h) for h in holes[i]]

        for c, v in geom.items():
            converted[c].append(v)

        if array_type is PointArray:
            parts = parts[0].flatten()
        elif array_type is MultiPointArray:
            parts = np.concatenate([sp.flatten() for sp in parts])
        elif array_type is multi_array:
            parts = [[ssp.flatten() for ssp in sp] if poly else sp.flatten()
                     for sp in parts]
        else:
            parts = [np.asarray(sp).flatten()
                     for sp in parts[0]] if poly else parts[0].flatten()
        converted['geometry'].append(parts)

    if converted:
        geometries = converted['geometry']
        if array_type is PointArray:
            geometries = np.concatenate(geometries)
        geom_array = array_type(geometries)
        if poly:
            geom_array = geom_array.oriented()
        converted['geometry'] = GeoSeries(geom_array)
    else:
        converted['geometry'] = GeoSeries(single_array([]))
    return GeoDataFrame(converted, columns=['geometry'] + columns)
Exemplo n.º 22
0
    def iloc(cls, dataset, index):
        from spatialpandas import GeoSeries
        from spatialpandas.geometry import MultiPointDtype
        rows, cols = index
        geom_dims = cls.geom_dims(dataset)
        geom_col = cls.geo_column(dataset.data)
        scalar = False
        columns = list(dataset.data.columns)
        if isinstance(cols, slice):
            cols = [d.name for d in dataset.dimensions()][cols]
        elif np.isscalar(cols):
            scalar = np.isscalar(rows)
            cols = [dataset.get_dimension(cols).name]
        else:
            cols = [dataset.get_dimension(d).name for d in index[1]]
        if not all(d in cols for d in geom_dims):
            raise DataError(
                "Cannot index a dimension which is part of the "
                "geometry column of a spatialpandas DataFrame.", cls)
        cols = list(
            unique_iterator([
                columns.index(geom_col) if c in geom_dims else columns.index(c)
                for c in cols
            ]))

        if not isinstance(dataset.data[geom_col].dtype, MultiPointDtype):
            if scalar:
                return dataset.data.iloc[rows[0], cols[0]]
            elif isscalar(rows):
                rows = [rows]
            return dataset.data.iloc[rows, cols]

        geoms = dataset.data[geom_col]
        count = 0
        new_geoms, indexes = [], []
        for i, geom in enumerate(geoms):
            length = int(len(geom.buffer_values) / 2)
            if np.isscalar(rows):
                if count <= rows < (count + length):
                    idx = (rows - count) * 2
                    data = geom.buffer_values[idx:idx + 2]
                    new_geoms.append(type(geom)(data))
                    indexes.append(i)
                    break
            elif isinstance(rows, slice):
                if rows.start is not None and rows.start > (count + length):
                    continue
                elif rows.stop is not None and rows.stop < count:
                    break
                start = None if rows.start is None else max(
                    rows.start - count, 0) * 2
                stop = None if rows.stop is None else min(
                    rows.stop - count, length) * 2
                if rows.step is not None:
                    dataset.param.warning(
                        ".iloc step slicing currently not supported for"
                        "the multi-tabular data format.")
                sliced = geom.buffer_values[start:stop]
                if len(sliced):
                    indexes.append(i)
                    new_geoms.append(type(geom)(sliced))
            else:
                sub_rows = [
                    v for r in rows
                    for v in ((r - count) * 2, (r - count) * 2 + 1)
                    if count <= r < (count + length)
                ]
                if sub_rows:
                    indexes.append(i)
                    idxs = np.array(sub_rows, dtype=int)
                    new_geoms.append(type(geom)(geom.buffer_values[idxs]))
            count += length

        new = dataset.data.iloc[indexes].copy()
        new[geom_col] = GeoSeries(new_geoms)
        return new
Exemplo n.º 23
0
def geohashes_to_geoseries(s: pd.Series) -> GeoSeries:
    """Create spatialpandas GeoSeries from geohashes."""
    s = pd.Series(s)
    return GeoSeries(PointArray(s.apply(geohash_decode_xy)), index=s.index)
Exemplo n.º 24
0
def test_multipolygon_array_to_geopandas(gp_multipolygon):
    result = GeoSeries(gp_multipolygon, dtype='multipolygon').to_geopandas()
    assert_series_equal(result, gp_multipolygon)
Exemplo n.º 25
0
def test_ring_array_to_geopandas(gp_ring):
    result = GeoSeries(gp_ring, dtype='ring').to_geopandas()
    assert_series_equal(result, gp_ring)