示例#1
0
    def _copy_type_metadata(self, other, include_index: bool = True):
        """
        Copy type metadata from each column of `other` to the corresponding
        column of `self`.
        See `ColumnBase._with_type_metadata` for more information.
        """
        for name, col, other_col in zip(self._data.keys(), self._data.values(),
                                        other._data.values()):
            # libcudf APIs lose all information about GeoColumns, operating
            # solely on the underlying base data. Therefore, our only recourse
            # is to recreate a new GeoColumn with the same underlying data.
            # Since there's no easy way to create a GeoColumn from a
            # NumericalColumn, we're forced to do so manually.
            if isinstance(other_col, GeoColumn):
                col = GeoColumn(other_col._geo, other_col._meta,
                                cudf.Index(col))

            self._data.set_by_label(name,
                                    col._with_type_metadata(other_col.dtype),
                                    validate=False)

        if include_index:
            if self._index is not None and other._index is not None:
                self._index._copy_type_metadata(other._index)
                # When other._index is a CategoricalIndex, there is
                if isinstance(
                        other._index,
                        cudf.core.index.CategoricalIndex) and not isinstance(
                            self._index, cudf.core.index.CategoricalIndex):
                    self._index = cudf.Index(self._index._column)

        return self
示例#2
0
 def __init__(
     self,
     data: Union[gpd.GeoSeries],
     index: Union[cudf.Index, pd.Index] = None,
     dtype=None,
     name=None,
     nan_as_null=True,
 ):
     # Condition index
     if isinstance(data, (gpGeoSeries, GeoSeries)):
         if index is None:
             index = data.index
     if index is None:
         index = cudf.RangeIndex(0, len(data))
     # Condition data
     if isinstance(data, pd.Series):
         data = gpGeoSeries(data)
     # Create column
     if isinstance(data, GeoColumn):
         column = data
     elif isinstance(data, GeoSeries):
         column = data._column
     elif isinstance(data, gpGeoSeries):
         adapter = GeoPandasAdapter(data)
         buffers = GeoArrowBuffers(adapter.get_geoarrow_host_buffers())
         pandas_meta = GeoMeta(adapter.get_geopandas_meta())
         column = GeoColumn(buffers, pandas_meta)
     else:
         raise TypeError(
             f"Incompatible object passed to GeoSeries ctor {type(data)}"
         )
     super().__init__(column, index, dtype, name, nan_as_null)
示例#3
0
    def __init__(self, data: gpGeoDataFrame = None):
        """
        Constructs a GPU GeoDataFrame from a GeoPandas dataframe.

        Parameters
        ----------
        data : A geopandas.geodataframe.GeoDataFrame object
        """
        super().__init__()
        if isinstance(data, gpGeoDataFrame):
            self.index = data.index
            for col in data.columns:
                if is_geometry_type(data[col]):
                    adapter = GeoPandasAdapter(data[col])
                    buffers = GeoArrowBuffers(
                        adapter.get_geoarrow_host_buffers())
                    pandas_meta = GeoMeta(adapter.get_geopandas_meta())
                    column = GeoColumn(buffers, pandas_meta)
                    self._data[col] = column
                else:
                    self._data[col] = data[col]
        elif data is None:
            pass
        else:
            raise ValueError("Invalid type passed to GeoDataFrame ctor")
示例#4
0
def test_points():
    buffers = GeoArrowBuffers({"points_xy": [0, 1, 2, 3]})
    cudf.testing.assert_series_equal(cudf.Series([0, 1, 2, 3]),
                                     buffers.points.xy)
    assert len(buffers.points) == 2
    column = GeoColumn(buffers)
    pd.testing.assert_series_equal(
        GeoSeries(column).to_pandas(), gpGeoSeries([Point(0, 1),
                                                    Point(2, 3)]))
示例#5
0
def test_homogeneous_lines():
    buffers = GeoArrowBuffers({
        "lines_xy": range(24),
        "lines_offsets": np.array(range(5)) * 6
    })
    cudf.testing.assert_series_equal(cudf.Series(range(24)), buffers.lines.xy)
    assert len(buffers.lines) == 4
    column = GeoColumn(buffers)
    pd.testing.assert_series_equal(
        GeoSeries(column).to_pandas(),
        gpGeoSeries([
            LineString([[0, 1], [2, 3], [4, 5]]),
            LineString([[6, 7], [8, 9], [10, 11]]),
            LineString([[12, 13], [14, 15], [16, 17]]),
            LineString([[18, 19], [20, 21], [22, 23]]),
        ]),
    )
def test_multipoints():
    buffers = GeoArrowBuffers({
        "mpoints_xy": np.arange(0, 16),
        "mpoints_offsets": [0, 4, 8, 12, 16]
    })
    assert_eq(cudf.Series(np.arange(0, 16)), buffers.multipoints.xy)
    assert len(buffers.multipoints) == 4
    column = GeoColumn(buffers)
    assert_eq(
        GeoSeries(column),
        gpGeoSeries([
            MultiPoint([Point([0, 1]), Point([2, 3])]),
            MultiPoint([Point(4, 5), Point(6, 7)]),
            MultiPoint([Point(8, 9), Point(10, 11)]),
            MultiPoint([Point(12, 13), Point(14, 15)]),
        ]),
    )
示例#7
0
def test_polygons():
    polygons_xy = np.array([
        np.concatenate((x[0:6], x[0:2]), axis=None)
        for x in np.arange(60).reshape(10, 6)
    ])
    buffers = GeoArrowBuffers({
        "polygons_xy":
        polygons_xy.flatten(),
        "polygons_polygons":
        np.array([0, 1, 3, 5, 7, 9, 10]),
        "polygons_rings":
        np.arange(11) * 8,
        "mpolygons": [2, 4],
    })
    cudf.testing.assert_series_equal(cudf.Series(polygons_xy.flatten()),
                                     buffers.polygons.xy)
    assert len(buffers.polygons) == 5
    column = GeoColumn(buffers)
    pd.testing.assert_series_equal(
        GeoSeries(column).to_pandas(),
        gpGeoSeries([
            Polygon(((0, 1), (2, 3), (4, 5))),
            Polygon(
                ((6, 7), (8, 9), (10, 11)),
                [((12, 13), (14, 15), (16, 17))],
            ),
            MultiPolygon([
                (
                    ((18, 19), (20, 21), (22, 23)),
                    [((24, 25), (26, 27), (28, 29))],
                ),
                (
                    ((30, 31), (32, 33), (34, 35)),
                    [((36, 37), (38, 39), (40, 41))],
                ),
            ]),
            Polygon(
                ((42, 43), (44, 45), (46, 47)),
                [((48, 49), (50, 51), (52, 53))],
            ),
            Polygon(((54, 55), (56, 57), (58, 59))),
        ]),
    )
def test_mixed_lines():
    buffers = GeoArrowBuffers({
        "lines_xy": range(24),
        "lines_offsets": np.array(range(5)) * 6,
        "mlines": [1, 3],
    })
    assert_eq(cudf.Series(range(24)), buffers.lines.xy)
    assert len(buffers.lines) == 3
    column = GeoColumn(buffers)
    assert_eq(
        GeoSeries(column),
        gpGeoSeries([
            LineString([[0, 1], [2, 3], [4, 5]]),
            MultiLineString([
                LineString([[6, 7], [8, 9], [10, 11]]),
                LineString([[12, 13], [14, 15], [16, 17]]),
            ]),
            LineString([[18, 19], [20, 21], [22, 23]]),
        ]),
    )
def test_points():
    buffers = GeoArrowBuffers({"points_xy": [0, 1, 2, 3]})
    assert_eq(cudf.Series([0, 1, 2, 3]), buffers.points.xy)
    assert len(buffers.points) == 2
    column = GeoColumn(buffers)
    assert_eq(GeoSeries(column), gpGeoSeries([Point(0, 1), Point(2, 3)]))