示例#1
0
def bitwise_op_test(dtype, expect_fn, test_fn, nelem=128):
    h_lhs = gen_rand(dtype, nelem)
    h_rhs = gen_rand(dtype, nelem)

    d_lhs = rmm.to_device(h_lhs)
    d_rhs = rmm.to_device(h_rhs)
    d_result = rmm.device_array_like(d_lhs)

    col_lhs = new_column()
    col_rhs = new_column()
    col_result = new_column()
    gdf_dtype = get_dtype(dtype)

    libgdf.gdf_column_view(col_lhs, unwrap_devary(d_lhs), ffi.NULL, nelem,
                           gdf_dtype)
    libgdf.gdf_column_view(col_rhs, unwrap_devary(d_rhs), ffi.NULL, nelem,
                           gdf_dtype)
    libgdf.gdf_column_view(col_result, unwrap_devary(d_result), ffi.NULL,
                           nelem, gdf_dtype)

    expect = expect_fn(h_lhs, h_rhs)
    test_fn(col_lhs, col_rhs, col_result)
    got = d_result.copy_to_host()
    print('got')
    print(got)
    print('expect')
    print(expect)
    np.testing.assert_array_equal(expect, got)
示例#2
0
def test_output_dtype_mismatch():
    lhs_dtype = np.int32
    rhs_dtype = np.int32
    nelem = 5
    h_lhs = np.arange(nelem, dtype=lhs_dtype)
    h_rhs = np.arange(nelem, dtype=rhs_dtype)

    d_lhs = rmm.to_device(h_lhs)
    d_rhs = rmm.to_device(h_rhs)
    d_result = rmm.device_array(d_lhs.size, dtype=np.float32)

    col_lhs = new_column()
    col_rhs = new_column()
    col_result = new_column()

    libgdf.gdf_column_view(col_lhs, unwrap_devary(d_lhs), ffi.NULL, nelem,
                           get_dtype(lhs_dtype))
    libgdf.gdf_column_view(col_rhs, unwrap_devary(d_rhs), ffi.NULL, nelem,
                           get_dtype(rhs_dtype))
    libgdf.gdf_column_view(col_result, unwrap_devary(d_result), ffi.NULL,
                           nelem, get_dtype(d_result.dtype))

    with pytest.raises(GDFError) as raises:
        libgdf.gdf_add_generic(col_lhs, col_rhs, col_result)
    raises.match("GDF_UNSUPPORTED_DTYPE")

    with pytest.raises(GDFError) as raises:
        libgdf.gdf_eq_generic(col_lhs, col_rhs, col_result)
    raises.match("GDF_UNSUPPORTED_DTYPE")

    with pytest.raises(GDFError) as raises:
        libgdf.gdf_bitwise_and_generic(col_lhs, col_rhs, col_result)
    raises.match("GDF_UNSUPPORTED_DTYPE")
示例#3
0
def logical_op_test(dtype, expect_fn, test_fn, nelem=128, gdf_dtype=None):
    h_lhs = gen_rand(dtype, nelem)
    h_rhs = gen_rand(dtype, nelem)
    d_lhs = rmm.to_device(h_lhs)
    d_rhs = rmm.to_device(h_rhs)
    d_result = rmm.device_array(d_lhs.size, dtype=np.bool)

    col_lhs = new_column()
    col_rhs = new_column()
    col_result = new_column()
    gdf_dtype = get_dtype(dtype) if gdf_dtype is None else gdf_dtype

    libgdf.gdf_column_view(col_lhs, unwrap_devary(d_lhs), ffi.NULL, nelem,
                           gdf_dtype)
    libgdf.gdf_column_view(col_rhs, unwrap_devary(d_rhs), ffi.NULL, nelem,
                           gdf_dtype)
    libgdf.gdf_column_view(col_result, unwrap_devary(d_result), ffi.NULL,
                           nelem, libgdf.GDF_INT8)

    expect = expect_fn(h_lhs, h_rhs)
    test_fn(col_lhs, col_rhs, col_result)

    got = d_result.copy_to_host()
    print(expect, got)
    np.testing.assert_equal(expect, got)
示例#4
0
def arith_op_test(dtype,
                  ulp,
                  expect_fn,
                  test_fn,
                  nelem=128,
                  non_zero_rhs=False):
    h_lhs = gen_rand(dtype, nelem)
    h_rhs = gen_rand(dtype, nelem)
    if non_zero_rhs:
        fix_zeros(h_rhs)
    d_lhs = rmm.to_device(h_lhs)
    d_rhs = rmm.to_device(h_rhs)
    d_result = rmm.device_array_like(d_lhs)

    col_lhs = new_column()
    col_rhs = new_column()
    col_result = new_column()
    gdf_dtype = get_dtype(dtype)

    libgdf.gdf_column_view(col_lhs, unwrap_devary(d_lhs), ffi.NULL, nelem,
                           gdf_dtype)
    libgdf.gdf_column_view(col_rhs, unwrap_devary(d_rhs), ffi.NULL, nelem,
                           gdf_dtype)
    libgdf.gdf_column_view(col_result, unwrap_devary(d_result), ffi.NULL,
                           nelem, gdf_dtype)

    expect = expect_fn(h_lhs, h_rhs)
    test_fn(col_lhs, col_rhs, col_result)
    got = d_result.copy_to_host()
    print('got')
    print(got)
    print('expect')
    print(expect)
    np.testing.assert_array_max_ulp(expect, got, maxulp=ulp)
示例#5
0
def test_validity_add(dtype, nelem):
    expect_fn = np.add
    test_fn = libgdf.gdf_add_generic

    # data
    h_lhs = gen_rand(dtype, nelem)
    h_rhs = gen_rand(dtype, nelem)
    d_lhs = rmm.to_device(h_lhs)
    d_rhs = rmm.to_device(h_rhs)
    d_result = rmm.device_array_like(d_lhs)

    # valids
    h_lhs_valids = gen_rand(np.int8, (nelem + 8 - 1) // 8)
    h_rhs_valids = gen_rand(np.int8, (nelem + 8 - 1) // 8)

    d_lhs_valids = rmm.to_device(h_lhs_valids)
    d_rhs_valids = rmm.to_device(h_rhs_valids)
    d_result_valids = rmm.device_array_like(d_lhs_valids)

    # columns
    col_lhs = new_column()
    col_rhs = new_column()
    col_result = new_column()
    gdf_dtype = get_dtype(dtype)

    libgdf.gdf_column_view(col_lhs, unwrap_devary(d_lhs),
                           unwrap_devary(d_lhs_valids), nelem, gdf_dtype)
    libgdf.gdf_column_view(col_rhs, unwrap_devary(d_rhs),
                           unwrap_devary(d_rhs_valids), nelem, gdf_dtype)
    libgdf.gdf_column_view(col_result, unwrap_devary(d_result),
                           unwrap_devary(d_result_valids), nelem, gdf_dtype)

    libgdf.gdf_validity_and(col_lhs, col_rhs, col_result)

    expect = expect_fn(h_lhs, h_rhs)
    test_fn(col_lhs, col_rhs, col_result)
    got = d_result.copy_to_host()

    # Ensure validity mask is matching
    expect_valids = h_lhs_valids & h_rhs_valids
    got_valids = d_result_valids.copy_to_host()

    np.testing.assert_array_equal(expect_valids, got_valids)

    # Masked data
    mask = buffer_as_bits(expect_valids.data)[:expect.size]
    expect_masked = expect[mask]
    got_masked = got[mask]

    print('expect')
    print(expect_masked)
    print('got')
    print(got_masked)

    np.testing.assert_array_equal(expect_masked, got_masked)
示例#6
0
def test_radixsort(nelem, descending, dtype):
    def expected_fn(key):
        # Use mergesort for stable sort
        # Negate the key for descending
        if issubclass(dtype, np.integer):

            def negate_values(v):
                return ~key
        else:
            # Note: this doesn't work on the smallest value of integer
            #       i.e. -((int8)-128) -> -128
            def negate_values(v):
                return -key

        sorted_idx = np.argsort(negate_values(key) if descending else key,
                                kind='mergesort')
        sorted_keys = key[sorted_idx]
        # Returns key, vals
        return sorted_keys, sorted_idx

    # Make data
    key = gen_rand(dtype, nelem)
    d_key = rmm.to_device(key)
    col_key = new_column()
    libgdf.gdf_column_view(col_key, unwrap_devary(d_key), ffi.NULL, nelem,
                           get_dtype(d_key.dtype))

    val = np.arange(nelem, dtype=np.int64)
    d_val = rmm.to_device(val)
    col_val = new_column()
    libgdf.gdf_column_view(col_val, unwrap_devary(d_val), ffi.NULL, nelem,
                           get_dtype(d_val.dtype))

    sizeof_key = d_key.dtype.itemsize
    sizeof_val = d_val.dtype.itemsize
    begin_bit = 0
    end_bit = sizeof_key * 8

    # Setup plan
    plan = libgdf.gdf_radixsort_plan(nelem, descending, begin_bit, end_bit)
    libgdf.gdf_radixsort_plan_setup(plan, sizeof_key, sizeof_val)
    # Sort
    libgdf.gdf_radixsort_generic(plan, col_key, col_val)
    # Cleanup
    libgdf.gdf_radixsort_plan_free(plan)

    # Check
    got_keys = d_key.copy_to_host()
    got_vals = d_val.copy_to_host()
    sorted_keys, sorted_vals = expected_fn(key)

    np.testing.assert_array_equal(sorted_keys, got_keys)
    np.testing.assert_array_equal(sorted_vals, got_vals)
示例#7
0
def test_from_offsets_dev_data():
    values = np.array([97, 112, 112, 108, 101, 112, 101, 97, 114],
                      dtype=np.int8)
    offsets = np.array([0, 5, 5, 9], dtype=np.int32)
    bitmask = np.array([5], dtype=np.int8)
    values = rmm.to_device(values)
    offsets = rmm.to_device(offsets)
    bitmask = rmm.to_device(bitmask)
    s = nvstrings.from_offsets(values.device_ctypes_pointer.value,
                               offsets.device_ctypes_pointer.value, 3,
                               bitmask.device_ctypes_pointer.value, 1, True)
    expected = ['apple', None, 'pear']
    assert_eq(s, expected)
示例#8
0
def _request_transfer(key, remoteinfo):
    logger.info("rebuild from: %s for %r", remoteinfo, key)

    context = zmq.Context()
    socket = context.socket(zmq.REQ)
    socket.connect("tcp://{0}:{1}".format(*remoteinfo))

    myaddr = _global_addr[0]
    theiraddr = remoteinfo[0]
    if myaddr == theiraddr:
        # Same machine go by IPC
        logger.info("request by IPC")
        socket.send(pickle.dumps(("IPC", key)))
        rcv = socket.recv()
        ipch = pickle.loads(rcv)
        # Open IPC and copy to local context

        with ipch as data:
            copied = rmm.device_array_like(data)
            copied.copy_to_device(data)

        # Release
        _request_drop(socket, key)
        return copied
    else:
        # Different machine go by NET
        logger.info("request by NET: %s->%s", theiraddr, myaddr)
        socket.send(pickle.dumps(("NET", key)))
        rcv = socket.recv()
        output = rmm.to_device(pickle.loads(rcv))
        # Release
        _request_drop(socket, key)
        return output
示例#9
0
def test_slice_from():
    strs = nvstrings.to_device(
        ["hello world", "holy accéntéd", "batman", None, ""])
    d_arr = rmm.to_device(np.asarray([2, 3, -1, -1, -1], dtype=np.int32))
    got = strs.slice_from(starts=d_arr.device_ctypes_pointer.value)
    expected = ['llo world', 'y accéntéd', '', None, '']
    assert_eq(got, expected)
示例#10
0
    def __getitem__(self, arg):
        if isinstance(arg, Number):
            arg = int(arg)
            return self.element_indexing(arg)
        elif isinstance(arg, slice):
            # compute mask slice
            if self.null_count > 0:
                if arg.step is not None and arg.step != 1:
                    raise NotImplementedError(arg)

                # slicing data
                subdata = self.data[arg]
                # slicing mask
                bytemask = cudautils.expand_mask_bits(
                    self.data.size,
                    self.mask.to_gpu_array(),
                )
                submask = Buffer(cudautils.compact_mask_bytes(bytemask[arg]))
                col = self.replace(data=subdata, mask=submask)
                return col
            else:
                newbuffer = self.data[arg]
                return self.replace(data=newbuffer)
        elif isinstance(arg, (list, np.ndarray)):
            arg = np.array(arg)
            arg = rmm.to_device(arg)

        if isinstance(arg, DeviceNDArray):
            return self.take(arg)
        else:
            raise NotImplementedError(type(arg))
示例#11
0
def test_gpu_parse_arrow_int(dtype):

    depdelay = np.array([0, 0, -3, -2, 11, 6, -7, -4, 4, -3], dtype=dtype)
    arrdelay = np.array([5, -3, 1, -2, 22, 11, -12, -5, 4, -9], dtype=dtype)
    d_depdelay = pa.array(depdelay)
    d_arrdelay = pa.array(arrdelay)
    batch = pa.RecordBatch.from_arrays([d_depdelay, d_arrdelay],
                                       ['depdelay', 'arrdelay'])

    schema_bytes = batch.schema.serialize().to_pybytes()
    recordbatches_bytes = batch.serialize().to_pybytes()

    schema = np.ndarray(shape=len(schema_bytes),
                        dtype=np.byte,
                        buffer=bytearray(schema_bytes))

    rb_cpu_data = np.ndarray(shape=len(recordbatches_bytes),
                             dtype=np.byte,
                             buffer=bytearray(recordbatches_bytes))

    rb_gpu_data = rmm.to_device(rb_cpu_data)
    gar = GpuArrowReader(schema, rb_gpu_data)
    columns = gar.to_dict()
    assert columns['depdelay'].dtype == dtype
    assert set(columns) == {"depdelay", "arrdelay"}
    assert list(columns['depdelay']) == [0, 0, -3, -2, 11, 6, -7, -4, 4, -3]
示例#12
0
def test_dataframe_setitem_from_masked_object():
    ary = np.random.randn(100)
    mask = np.zeros(100, dtype=bool)
    mask[:20] = True
    np.random.shuffle(mask)
    ary[mask] = np.nan

    test1 = Series(ary)
    assert (test1.has_null_mask)
    assert (test1.null_count == 20)

    test2 = DataFrame.from_pandas(pd.DataFrame({'a': ary}))
    assert (test2['a'].has_null_mask)
    assert (test2['a'].null_count == 20)

    gpu_ary = rmm.to_device(ary)
    test3 = Series(gpu_ary)
    assert (test3.has_null_mask)
    assert (test3.null_count == 20)

    test4 = DataFrame()
    lst = [1, 2, None, 4, 5, 6, None, 8, 9]
    test4['lst'] = lst
    assert (test4['lst'].has_null_mask)
    assert (test4['lst'].null_count == 2)
示例#13
0
def test_prefixsum(dtype, nelem):
    if dtype == np.int8:
        # to keep data in range
        data = gen_rand(dtype, nelem, low=-2, high=2)
    else:
        data = gen_rand(dtype, nelem)
    d_data = rmm.to_device(data)
    d_result = rmm.device_array(d_data.size, dtype=d_data.dtype)

    col_data = new_column()
    gdf_dtype = get_dtype(dtype)
    libgdf.gdf_column_view(col_data, unwrap_devary(d_data), ffi.NULL, nelem,
                           gdf_dtype)

    col_result = new_column()
    libgdf.gdf_column_view(col_result, unwrap_devary(d_result), ffi.NULL,
                           nelem, gdf_dtype)

    inclusive = True
    libgdf.gdf_prefixsum_generic(col_data, col_result, inclusive)

    expect = np.cumsum(d_data.copy_to_host())
    got = d_result.copy_to_host()
    if not inclusive:
        expect = expect[:-1]
        assert got[0] == 0
        got = got[1:]

    np.testing.assert_array_equal(expect, got)
def test_gpu_parse_arrow_data():
    batch = make_gpu_parse_arrow_data_batch()
    schema_data = batch.schema.serialize().to_pybytes()
    recbatch_data = batch.serialize().to_pybytes()

    cpu_schema = np.ndarray(shape=len(schema_data),
                            dtype=np.byte,
                            buffer=bytearray(schema_data))
    cpu_data = np.ndarray(shape=len(recbatch_data),
                          dtype=np.byte,
                          buffer=bytearray(recbatch_data))
    gpu_data = rmm.to_device(cpu_data)
    del cpu_data

    # test reader
    reader = GpuArrowReader(cpu_schema, gpu_data)
    assert reader[0].name == 'dest_lat'
    assert reader[1].name == 'dest_lon'
    lat = reader[0].data.copy_to_host()
    lon = reader[1].data.copy_to_host()
    assert lat.size == 23
    assert lon.size == 23
    np.testing.assert_array_less(lat, 42)
    np.testing.assert_array_less(27, lat)
    np.testing.assert_array_less(lon, -76)
    np.testing.assert_array_less(-105, lon)

    dct = reader.to_dict()
    np.testing.assert_array_equal(lat, dct['dest_lat'].to_array())
    np.testing.assert_array_equal(lon, dct['dest_lon'].to_array())
示例#15
0
def _make_hash_input(hash_input, ncols):
    ci = []
    di = []
    for i in range(ncols):
        di.append(rmm.to_device(hash_input[i]))

    for i in range(ncols):
        col_input = new_column()
        libgdf.gdf_column_view(col_input, unwrap_devary(di[i]), ffi.NULL,
                               hash_input[i].size,
                               get_dtype(hash_input[i].dtype))
        ci.append(col_input)

    initial_hash_values = rmm.to_device(np.arange(ncols, dtype=np.uint32))

    yield ci, unwrap_devary(initial_hash_values)
示例#16
0
def test_gpu_parse_arrow_data():
    batch = make_gpu_parse_arrow_data_batch()
    schema_data = batch.schema.serialize()
    recbatch_data = batch.serialize()

    # To ensure compatibility for OmniSci we're going to create this numpy
    # array to be read-only as that's how numpy arrays created from foreign
    # memory buffers will be set
    cpu_schema = np.frombuffer(schema_data, dtype=np.uint8)
    cpu_data = np.frombuffer(recbatch_data, dtype=np.uint8)
    gpu_data = rmm.to_device(cpu_data)
    del cpu_data

    # test reader
    reader = GpuArrowReader(cpu_schema, gpu_data)
    assert reader[0].name == 'dest_lat'
    assert reader[1].name == 'dest_lon'
    lat = reader[0].data.copy_to_host()
    lon = reader[1].data.copy_to_host()
    assert lat.size == 23
    assert lon.size == 23
    np.testing.assert_array_less(lat, 42)
    np.testing.assert_array_less(27, lat)
    np.testing.assert_array_less(lon, -76)
    np.testing.assert_array_less(-105, lon)

    dct = reader.to_dict()
    np.testing.assert_array_equal(lat, dct['dest_lat'].to_array())
    np.testing.assert_array_equal(lon, dct['dest_lon'].to_array())
示例#17
0
文件: index.py 项目: zeichuan/cudf
    def __getitem__(self, index):
        from numbers import Number

        if isinstance(index, slice):
            start, stop, step = index.indices(len(self))
            sln = (stop - start) // step
            sln = max(0, sln)
            start += self._start
            stop += self._start
            if sln == 0:
                return RangeIndex(0)
            else:
                return index_from_range(start, stop, step)

        elif isinstance(index, Number):
            index = utils.normalize_index(index, len(self))
            index += self._start
            return index
        elif isinstance(index, (list, np.ndarray)):
            index = np.array(index)
            index = rmm.to_device(index)

        else:
            if pd.api.types.is_scalar(index):
                index = utils.min_signed_type(index)(index)
            index = columnops.as_column(index).data.mem

        return as_index(self.as_column()[index], name=self.name)
示例#18
0
def test_sum_of_squares(dtype, nelem):
    data = gen_rand(dtype, nelem)
    d_data = rmm.to_device(data)
    d_result = rmm.device_array(libgdf.gdf_reduce_optimal_output_size(),
                                dtype=d_data.dtype)

    col_data = new_column()
    gdf_dtype = get_dtype(dtype)

    libgdf.gdf_column_view(col_data, unwrap_devary(d_data), ffi.NULL, nelem,
                           gdf_dtype)

    libgdf.gdf_sum_of_squares(col_data, unwrap_devary(d_result), d_result.size)
    got = d_result.copy_to_host()[0]
    expect = (data ** 2).sum()

    print('expect:', expect)
    print('got:', got)

    if np.dtype(dtype).kind == 'i':
        if 0 <= expect <= np.iinfo(dtype).max:
            np.testing.assert_array_almost_equal(expect, got)
        else:
            print('overflow, passing')
    else:
        np.testing.assert_array_almost_equal(expect, got,
                                             decimal=accuracy_for_dtype[dtype])
示例#19
0
def read_data():
    import pandas as pd

    basedir = os.path.dirname(__file__)
    datapath = os.path.join(basedir, "data", "ipums.pkl")
    try:
        df = pd.read_pickle(datapath)
    except Exception as excpr:
        if type(excpr).__name__ == "FileNotFoundError":
            pytest.skip(".pkl file is not found")
        else:
            print(type(excpr).__name__)

    names = []
    arrays = []
    for k in df.columns:
        arrays.append(pa.Array.from_pandas(df[k]))
        names.append(k)
    batch = pa.RecordBatch.from_arrays(arrays, names)
    schema = batch.schema.serialize().to_pybytes()
    schema = np.ndarray(shape=len(schema),
                        dtype=np.byte,
                        buffer=bytearray(schema))
    data = batch.serialize().to_pybytes()
    data = np.ndarray(shape=len(data), dtype=np.byte, buffer=bytearray(data))
    darr = rmm.to_device(data)
    return df, schema, darr
示例#20
0
文件: string.py 项目: yutiansut/cudf
    def element_indexing(self, arg):
        if isinstance(arg, Number):
            arg = int(arg)
            if arg < 0:
                arg = len(self) + arg
            if arg > (len(self) - 1):
                raise IndexError
            out = self._data[arg]
        elif isinstance(arg, slice):
            out = self._data[arg]
        elif isinstance(arg, list):
            out = self._data[arg]
        elif isinstance(arg, np.ndarray):
            gpu_arr = rmm.to_device(arg)
            return self.element_indexing(gpu_arr)
        elif isinstance(arg, DeviceNDArray):
            # NVStrings gather call expects an array of int32s
            arg = cudautils.astype(arg, np.dtype('int32'))
            if len(arg) > 0:
                gpu_ptr = get_ctype_ptr(arg)
                out = self._data.gather(gpu_ptr, len(arg))
            else:
                out = self._data.gather([])
        else:
            raise NotImplementedError(type(arg))

        if len(out) == 1:
            return out.to_host()[0]
        else:
            return columnops.as_column(out)
示例#21
0
def test_gpu_parse_arrow_cats():
    batch = make_gpu_parse_arrow_cats_batch()
    schema_bytes = batch.schema.serialize().to_pybytes()
    recordbatches_bytes = batch.serialize().to_pybytes()

    schema = np.ndarray(shape=len(schema_bytes), dtype=np.byte,
                        buffer=bytearray(schema_bytes))
    rb_cpu_data = np.ndarray(shape=len(recordbatches_bytes), dtype=np.byte,
                             buffer=bytearray(recordbatches_bytes))
    rb_gpu_data = rmm.to_device(rb_cpu_data)

    gar = GpuArrowReader(schema, rb_gpu_data)
    columns = gar.to_dict()

    sr_idx = columns['idx']
    sr_name = columns['name']
    sr_weight = columns['weight']

    assert sr_idx.dtype == np.int32
    assert sr_name.dtype == 'category'
    assert sr_weight.dtype == np.double
    assert set(sr_name) == {'apple', 'pear', 'orange', 'grape'}

    expected = get_expected_values()
    for i in range(len(sr_idx)):
        got_idx = sr_idx[i]
        got_name = sr_name[i]
        got_weight = sr_weight[i]

        # the serialized data is not of order
        exp_idx, exp_name, exp_weight = expected[got_idx]

        assert got_idx == exp_idx
        assert got_name == exp_name
        np.testing.assert_almost_equal(got_weight, exp_weight)
示例#22
0
def cast_op_test(dtype, to_dtype, test_fn, nelem=128):
    h_data = gen_rand(dtype, nelem).astype(dtype)
    d_data = rmm.to_device(h_data)
    d_result = rmm.device_array(d_data.size, dtype=to_dtype)

    assert d_data.dtype == dtype
    assert d_result.dtype == to_dtype

    col_data = new_column()
    col_result = new_column()

    # data column
    libgdf.gdf_column_view(col_data, unwrap_devary(d_data), ffi.NULL, nelem,
                           get_dtype(dtype))
    # result column
    libgdf.gdf_column_view(col_result, unwrap_devary(d_result), ffi.NULL,
                           nelem, get_dtype(to_dtype))

    expect = h_data.astype(to_dtype)
    test_fn(col_data, col_result)

    got = d_result.copy_to_host()

    print('got')
    print(got)
    print('expect')
    print(expect)
    np.testing.assert_equal(expect, got)
示例#23
0
def math_op_test(dtype,
                 ulp,
                 expect_fn,
                 test_fn,
                 nelem=128,
                 scale=1,
                 positive_only=False):
    randvals = gen_rand(dtype, nelem, positive_only=positive_only)
    h_data = (randvals * scale).astype(dtype)
    d_data = rmm.to_device(h_data)
    d_result = rmm.device_array_like(d_data)

    col_data = new_column()
    col_result = new_column()
    gdf_dtype = get_dtype(dtype)

    # data column
    libgdf.gdf_column_view(col_data, unwrap_devary(d_data), ffi.NULL, nelem,
                           gdf_dtype)
    # result column
    libgdf.gdf_column_view(col_result, unwrap_devary(d_result), ffi.NULL,
                           nelem, gdf_dtype)

    expect = expect_fn(h_data)
    test_fn(col_data, col_result)

    got = d_result.copy_to_host()

    print('got')
    print(got)
    print('expect')
    print(expect)
    np.testing.assert_array_max_ulp(expect, got, maxulp=ulp)
示例#24
0
文件: utils.py 项目: yutiansut/cudf
def buffers_from_pyarrow(pa_arr, dtype=None):
    from cudf.dataframe.buffer import Buffer
    from cudf.utils.cudautils import copy_array

    buffers = pa_arr.buffers()

    if buffers[0]:
        mask_dev_array = make_mask(len(pa_arr))
        arrow_dev_array = rmm.to_device(np.array(buffers[0]).view('int8'))
        copy_array(arrow_dev_array, mask_dev_array)
        pamask = Buffer(mask_dev_array)
    else:
        pamask = None

    if dtype:
        new_dtype = dtype
    else:
        if isinstance(pa_arr, pa.DictionaryArray):
            new_dtype = pa_arr.indices.type.to_pandas_dtype()
        else:
            new_dtype = pa_arr.type.to_pandas_dtype()

    if buffers[1]:
        padata = Buffer(
            np.array(buffers[1]).view(new_dtype)[pa_arr.offset:pa_arr.offset +
                                                 len(pa_arr)])
    else:
        padata = Buffer(np.empty(0, dtype=new_dtype))
    return (pamask, padata)
示例#25
0
def test_product(dtype, nelem):
    if np.dtype(dtype).kind == 'i':
        data = np.ones(nelem, dtype=dtype)
        # Set at most 30 items to [0..2) to keep the value within 2^32
        for _ in range(30):
            data[random.randrange(nelem)] = random.random() * 2
    else:
        data = gen_rand(dtype, nelem)

    print('max', data.max(), 'min', data.min())
    d_data = rmm.to_device(data)
    d_result = rmm.device_array(libgdf.gdf_reduce_optimal_output_size(),
                                dtype=d_data.dtype)

    col_data = new_column()
    gdf_dtype = get_dtype(dtype)

    libgdf.gdf_column_view(col_data, unwrap_devary(d_data), ffi.NULL, nelem,
                           gdf_dtype)

    libgdf.gdf_product(col_data, unwrap_devary(d_result), d_result.size)
    got = d_result.copy_to_host()[0]
    expect = np.product(data)

    print('expect:', expect)
    print('got:', got)

    np.testing.assert_array_almost_equal(expect, got)
示例#26
0
def test_gather_single_col():
    col = columnops.as_column(np.arange(100), dtype=np.int32)
    gather_map = np.array([0, 1, 2, 3, 5, 8, 13, 21], dtype=np.int32)

    device_gather_map = rmm.to_device(gather_map)

    out = cpp_copying.apply_gather(col, device_gather_map)

    np.testing.assert_array_equal(out.to_array(), gather_map)
示例#27
0
 def serialize(self):
     header = {}
     if self.kind == "numba":
         frames = [cuda.to_device(np.arange(self.size))]
     elif self.kind == "rmm":
         frames = [rmm.to_device(np.arange(self.size))]
     elif self.kind == "cupy":
         frames = [cupy.asarray(np.arange(self.size))]
     else:
         frames = [np.arange(self.size)]
     return header, frames
示例#28
0
def column_hash_values(column0, *other_columns, initial_hash_values=None):
    """Hash all values in the given columns.
    Returns a new NumericalColumn[int32]
    """
    columns = [column0] + list(other_columns)
    buf = Buffer(rmm.device_array(len(column0), dtype=np.int32))
    result = NumericalColumn(data=buf, dtype=buf.dtype)
    if initial_hash_values:
        initial_hash_values = rmm.to_device(initial_hash_values)
    cpp_hash.hash_columns(columns, result, initial_hash_values)
    return result
示例#29
0
def test_prefixsum_masked(dtype, nelem):
    if dtype == np.int8:
        data = gen_rand(dtype, nelem, low=-2, high=2)
    else:
        data = gen_rand(dtype, nelem)
    mask = gen_rand(np.int8, (nelem + 8 - 1) // 8)
    dummy_mask = gen_rand(np.int8, (nelem + 8 - 1) // 8)

    d_data = rmm.to_device(data)
    d_mask = rmm.to_device(mask)

    d_result = rmm.device_array(d_data.size, dtype=d_data.dtype)
    d_result_mask = rmm.to_device(dummy_mask)

    gdf_dtype = get_dtype(dtype)
    extra_dtype_info = ffi.new('gdf_dtype_extra_info*')
    extra_dtype_info.time_unit = libgdf.TIME_UNIT_NONE

    col_data = new_column()
    libgdf.gdf_column_view_augmented(col_data, unwrap_devary(d_data),
                                     unwrap_devary(d_mask), nelem, gdf_dtype,
                                     count_nulls(d_mask, nelem),
                                     extra_dtype_info[0])

    col_result = new_column()
    libgdf.gdf_column_view(col_result, unwrap_devary(d_result),
                           unwrap_devary(d_result_mask), nelem, gdf_dtype)

    inclusive = True
    libgdf.gdf_prefixsum(col_data, col_result, inclusive)

    boolmask = buffer_as_bits(mask)[:nelem]
    expect = np.cumsum(data[boolmask])
    got = d_result.copy_to_host()[boolmask]
    if not inclusive:
        expect = expect[:-1]
        assert got[0] == 0
        got = got[1:]

    decimal = 4 if dtype == np.float32 else 6
    np.testing.assert_array_almost_equal(expect, got, decimal=decimal)
示例#30
0
def test_sum_masked(nelem):
    dtype = np.float64
    data = gen_rand(dtype, nelem)
    mask = gen_rand(np.int8, (nelem + 8 - 1) // 8)

    d_data = rmm.to_device(data)
    d_mask = rmm.to_device(mask)
    d_result = rmm.device_array(libgdf.gdf_reduce_optimal_output_size(),
                                dtype=d_data.dtype)

    col_data = new_column()
    gdf_dtype = get_dtype(dtype)
    libgdf.gdf_column_view(col_data, unwrap_devary(d_data),
                           unwrap_devary(d_mask), nelem, gdf_dtype)
    libgdf.gdf_sum(col_data, unwrap_devary(d_result), d_result.size)

    got = d_result.copy_to_host()[0]
    boolmask = buffer_as_bits(mask)[:nelem]
    expect = data[boolmask].sum()

    np.testing.assert_almost_equal(expect, got)
示例#31
0
文件: test_rmm.py 项目: lucafuji/rmm
def array_tester(dtype, nelem):
    # data
    h_in = np.full(nelem, 3.2, dtype)
    h_result = np.empty(nelem, dtype)

    d_in = rmm.to_device(h_in)
    d_result = rmm.device_array_like(d_in)

    d_result.copy_to_device(d_in)
    h_result = d_result.copy_to_host()

    print('expect')
    print(h_in)
    print('got')
    print(h_result)

    np.testing.assert_array_equal(h_result, h_in)
示例#32
0
文件: test_rmm.py 项目: lucafuji/rmm
def test_rmm_csv_log():
    dtype = np.int32
    nelem = 1024

    # data
    h_in = np.full(nelem, 3.2, dtype)

    d_in = rmm.to_device(h_in)
    d_result = rmm.device_array_like(d_in)

    d_result.copy_to_device(d_in)

    csv = rmm.csv_log()

    print(csv[:1000])

    assert(csv.find("Event Type,Device ID,Address,Stream,Size (bytes),"
                    "Free Memory,Total Memory,Current Allocs,Start,End,"
                    "Elapsed,Location") >= 0)