Exemplo n.º 1
0
def prefixsum(vals):
    """Compute the full prefixsum.

    Given the input of N.  The output size is N + 1.
    The first value is always 0.  The last value is the sum of *vals*.
    """
    import cudf._lib as libcudf

    from cudf.core.column import NumericalColumn
    from cudf.core.buffer import Buffer

    # Allocate output
    slots = rmm.device_array(shape=vals.size + 1, dtype=vals.dtype)
    # Fill 0 to slot[0]
    gpu_fill_value[1, 1](slots[:1], 0)

    # Compute prefixsum on the mask
    in_col = NumericalColumn(data=Buffer(vals),
                             mask=None,
                             null_count=0,
                             dtype=vals.dtype)
    out_col = NumericalColumn(data=Buffer(slots[1:]),
                              mask=None,
                              null_count=0,
                              dtype=vals.dtype)
    libcudf.reduce.scan(in_col, out_col, "sum", inclusive=True)
    return slots
Exemplo n.º 2
0
def test_typecast_from_decimal(data, from_dtype, to_dtype):
    actual = data.astype(from_dtype)
    pa_arr = actual.to_arrow().cast(to_dtype, safe=False)

    actual = actual.astype(to_dtype)
    expected = cudf.Series(NumericalColumn.from_arrow(pa_arr))

    assert_eq(actual, expected)
Exemplo n.º 3
0
 def as_column(self):
     if len(self) > 0:
         vals = cudautils.arange(self._start, self._stop, dtype=self.dtype)
     else:
         vals = rmm.device_array(0, dtype=self.dtype)
     return NumericalColumn(data=Buffer(vals),
                            dtype=vals.dtype,
                            name=self.name)
Exemplo n.º 4
0
def test_typecast_from_decimal(data, from_dtype, to_dtype):
    got = data.astype(from_dtype)
    pa_arr = got.to_arrow().cast(to_dtype, safe=False)

    got = got.astype(to_dtype)
    expected = cudf.Series(NumericalColumn.from_arrow(pa_arr))

    assert_eq(got, expected)
Exemplo n.º 5
0
 def get_dt_field(self, field):
     out_column = self._values.get_dt_field(field)
     # column.column_empty_like always returns a Column object
     # but we need a NumericalColumn for GenericIndex..
     # how should this be handled?
     out_column = NumericalColumn(
         data=out_column.data,
         mask=out_column.mask,
         null_count=out_column.null_count,
         dtype=out_column.dtype,
         name=self.name,
     )
     return as_index(out_column)
Exemplo n.º 6
0
def build_column(data,
                 dtype,
                 mask=None,
                 offset=0,
                 children=(),
                 categories=None):
    """
    Build a Column of the appropriate type from the given parameters

    Parameters
    ----------
    data : Buffer
        The data buffer (can be None if constructin certain Column
        types like StringColumn or CategoricalColumn)
    dtype
        The dtype associated with the Column to construct
    mask : Buffer, optionapl
        The mask buffer
    offset : int, optional
    children : tuple, optional
    categories : Column, optional
        If constructing a CategoricalColumn, a Column containing
        the categories
    """
    from cudf.core.column.numerical import NumericalColumn
    from cudf.core.column.datetime import DatetimeColumn
    from cudf.core.column.categorical import CategoricalColumn
    from cudf.core.column.string import StringColumn

    dtype = pd.api.types.pandas_dtype(dtype)

    if is_categorical_dtype(dtype):
        if not len(children) == 1:
            raise ValueError(
                "Must specify exactly one child column for CategoricalColumn")
        if not isinstance(children[0], ColumnBase):
            raise TypeError("children must be a tuple of Columns")
        return CategoricalColumn(dtype=dtype,
                                 mask=mask,
                                 offset=offset,
                                 children=children)
    elif dtype.type is np.datetime64:
        return DatetimeColumn(data=data, dtype=dtype, mask=mask, offset=offset)
    elif dtype.type in (np.object_, np.str_):
        return StringColumn(mask=mask, offset=offset, children=children)
    else:
        return NumericalColumn(data=data,
                               dtype=dtype,
                               mask=mask,
                               offset=offset)
Exemplo n.º 7
0
 def _find_segments(self):
     seg, markers = cudautils.find_segments(self.gpu_values)
     return NumericalColumn(data=Buffer(seg), dtype=seg.dtype), markers