def bitwise_op_test(dtype, expect_fn, test_fn, nelem=128): h_lhs = gen_rand(dtype, nelem) h_rhs = gen_rand(dtype, nelem) d_lhs = rmm.to_device(h_lhs) d_rhs = rmm.to_device(h_rhs) d_result = rmm.device_array_like(d_lhs) col_lhs = new_column() col_rhs = new_column() col_result = new_column() gdf_dtype = get_dtype(dtype) libgdf.gdf_column_view(col_lhs, unwrap_devary(d_lhs), ffi.NULL, nelem, gdf_dtype) libgdf.gdf_column_view(col_rhs, unwrap_devary(d_rhs), ffi.NULL, nelem, gdf_dtype) libgdf.gdf_column_view(col_result, unwrap_devary(d_result), ffi.NULL, nelem, gdf_dtype) expect = expect_fn(h_lhs, h_rhs) test_fn(col_lhs, col_rhs, col_result) got = d_result.copy_to_host() print('got') print(got) print('expect') print(expect) np.testing.assert_array_equal(expect, got)
def logical_op_test(dtype, expect_fn, test_fn, nelem=128, gdf_dtype=None): h_lhs = gen_rand(dtype, nelem) h_rhs = gen_rand(dtype, nelem) d_lhs = rmm.to_device(h_lhs) d_rhs = rmm.to_device(h_rhs) d_result = rmm.device_array(d_lhs.size, dtype=np.bool) col_lhs = new_column() col_rhs = new_column() col_result = new_column() gdf_dtype = get_dtype(dtype) if gdf_dtype is None else gdf_dtype libgdf.gdf_column_view(col_lhs, unwrap_devary(d_lhs), ffi.NULL, nelem, gdf_dtype) libgdf.gdf_column_view(col_rhs, unwrap_devary(d_rhs), ffi.NULL, nelem, gdf_dtype) libgdf.gdf_column_view(col_result, unwrap_devary(d_result), ffi.NULL, nelem, libgdf.GDF_INT8) expect = expect_fn(h_lhs, h_rhs) test_fn(col_lhs, col_rhs, col_result) got = d_result.copy_to_host() print(expect, got) np.testing.assert_equal(expect, got)
def test_sum_of_squares(dtype, nelem): data = gen_rand(dtype, nelem) d_data = rmm.to_device(data) d_result = rmm.device_array( libgdf.gdf_reduction_get_intermediate_output_size(), dtype=d_data.dtype) col_data = new_column() gdf_dtype = get_dtype(dtype) libgdf.gdf_column_view(col_data, unwrap_devary(d_data), ffi.NULL, nelem, gdf_dtype) libgdf.gdf_sum_of_squares(col_data, unwrap_devary(d_result), d_result.size) got = d_result.copy_to_host()[0] expect = (data**2).sum() print('expect:', expect) print('got:', got) if np.dtype(dtype).kind == 'i': if 0 <= expect <= np.iinfo(dtype).max: np.testing.assert_array_almost_equal(expect, got) else: print('overflow, passing') else: np.testing.assert_approx_equal(expect, got, significant=accuracy_for_dtype[dtype])
def test_prefixsum(dtype, nelem): if dtype == np.int8: # to keep data in range data = gen_rand(dtype, nelem, low=-2, high=2) else: data = gen_rand(dtype, nelem) d_data = rmm.to_device(data) d_result = rmm.device_array(d_data.size, dtype=d_data.dtype) col_data = new_column() gdf_dtype = get_dtype(dtype) libgdf.gdf_column_view(col_data, unwrap_devary(d_data), ffi.NULL, nelem, gdf_dtype) col_result = new_column() libgdf.gdf_column_view(col_result, unwrap_devary(d_result), ffi.NULL, nelem, gdf_dtype) inclusive = True libgdf.gdf_prefixsum(col_data, col_result, inclusive) expect = np.cumsum(d_data.copy_to_host()) got = d_result.copy_to_host() if not inclusive: expect = expect[:-1] assert got[0] == 0 got = got[1:] decimal = 4 if dtype == np.float32 else 6 np.testing.assert_array_almost_equal(expect, got, decimal=decimal)
def test_product(dtype, nelem): if np.dtype(dtype).kind == 'i': data = np.ones(nelem, dtype=dtype) # Set at most 30 items to [0..2) to keep the value within 2^32 for _ in range(30): data[random.randrange(nelem)] = random.random() * 2 else: data = gen_rand(dtype, nelem) print('max', data.max(), 'min', data.min()) d_data = rmm.to_device(data) d_result = rmm.device_array( libgdf.gdf_reduction_get_intermediate_output_size(), dtype=d_data.dtype) col_data = new_column() gdf_dtype = get_dtype(dtype) libgdf.gdf_column_view(col_data, unwrap_devary(d_data), ffi.NULL, nelem, gdf_dtype) libgdf.gdf_product(col_data, unwrap_devary(d_result), d_result.size) got = d_result.copy_to_host()[0] expect = np.product(data) print('expect:', expect) print('got:', got) np.testing.assert_approx_equal(expect, got)
def math_op_test(dtype, ulp, expect_fn, test_fn, nelem=128, scale=1, positive_only=False): randvals = gen_rand(dtype, nelem, positive_only=positive_only) h_data = (randvals * scale).astype(dtype) d_data = rmm.to_device(h_data) d_result = rmm.device_array_like(d_data) col_data = new_column() col_result = new_column() gdf_dtype = get_dtype(dtype) # data column libgdf.gdf_column_view(col_data, unwrap_devary(d_data), ffi.NULL, nelem, gdf_dtype) # result column libgdf.gdf_column_view(col_result, unwrap_devary(d_result), ffi.NULL, nelem, gdf_dtype) expect = expect_fn(h_data) test_fn(col_data, col_result) got = d_result.copy_to_host() print('got') print(got) print('expect') print(expect) np.testing.assert_array_max_ulp(expect, got, maxulp=ulp)
def cast_op_test(dtype, to_dtype, test_fn, nelem=128): h_data = gen_rand(dtype, nelem).astype(dtype) d_data = rmm.to_device(h_data) d_result = rmm.device_array(d_data.size, dtype=to_dtype) assert d_data.dtype == dtype assert d_result.dtype == to_dtype col_data = new_column() col_result = new_column() # data column libgdf.gdf_column_view(col_data, unwrap_devary(d_data), ffi.NULL, nelem, get_dtype(dtype)) # result column libgdf.gdf_column_view(col_result, unwrap_devary(d_result), ffi.NULL, nelem, get_dtype(to_dtype)) expect = h_data.astype(to_dtype) test_fn(col_data, col_result) got = d_result.copy_to_host() print('got') print(got) print('expect') print(expect) np.testing.assert_equal(expect, got)
def test_output_dtype_mismatch(): lhs_dtype = np.int32 rhs_dtype = np.int32 nelem = 5 h_lhs = np.arange(nelem, dtype=lhs_dtype) h_rhs = np.arange(nelem, dtype=rhs_dtype) d_lhs = rmm.to_device(h_lhs) d_rhs = rmm.to_device(h_rhs) d_result = rmm.device_array(d_lhs.size, dtype=np.float32) col_lhs = new_column() col_rhs = new_column() col_result = new_column() libgdf.gdf_column_view(col_lhs, unwrap_devary(d_lhs), ffi.NULL, nelem, get_dtype(lhs_dtype)) libgdf.gdf_column_view(col_rhs, unwrap_devary(d_rhs), ffi.NULL, nelem, get_dtype(rhs_dtype)) libgdf.gdf_column_view(col_result, unwrap_devary(d_result), ffi.NULL, nelem, get_dtype(d_result.dtype)) with pytest.raises(GDFError) as raises: libgdf.gdf_add_generic(col_lhs, col_rhs, col_result) raises.match("GDF_UNSUPPORTED_DTYPE") with pytest.raises(GDFError) as raises: libgdf.gdf_eq_generic(col_lhs, col_rhs, col_result) raises.match("GDF_UNSUPPORTED_DTYPE") with pytest.raises(GDFError) as raises: libgdf.gdf_bitwise_and_generic(col_lhs, col_rhs, col_result) raises.match("GDF_UNSUPPORTED_DTYPE")
def arith_op_test(dtype, ulp, expect_fn, test_fn, nelem=128, non_zero_rhs=False): h_lhs = gen_rand(dtype, nelem) h_rhs = gen_rand(dtype, nelem) if non_zero_rhs: fix_zeros(h_rhs) d_lhs = rmm.to_device(h_lhs) d_rhs = rmm.to_device(h_rhs) d_result = rmm.device_array_like(d_lhs) col_lhs = new_column() col_rhs = new_column() col_result = new_column() gdf_dtype = get_dtype(dtype) libgdf.gdf_column_view(col_lhs, unwrap_devary(d_lhs), ffi.NULL, nelem, gdf_dtype) libgdf.gdf_column_view(col_rhs, unwrap_devary(d_rhs), ffi.NULL, nelem, gdf_dtype) libgdf.gdf_column_view(col_result, unwrap_devary(d_result), ffi.NULL, nelem, gdf_dtype) expect = expect_fn(h_lhs, h_rhs) test_fn(col_lhs, col_rhs, col_result) got = d_result.copy_to_host() print('got') print(got) print('expect') print(expect) np.testing.assert_array_max_ulp(expect, got, maxulp=ulp)
def test_radixsort(nelem, descending, dtype): def expected_fn(key): # Use mergesort for stable sort # Negate the key for descending if issubclass(dtype, np.integer): def negate_values(v): return ~key else: # Note: this doesn't work on the smallest value of integer # i.e. -((int8)-128) -> -128 def negate_values(v): return -key sorted_idx = np.argsort(negate_values(key) if descending else key, kind='mergesort') sorted_keys = key[sorted_idx] # Returns key, vals return sorted_keys, sorted_idx # Make data key = gen_rand(dtype, nelem) d_key = rmm.to_device(key) col_key = new_column() libgdf.gdf_column_view(col_key, unwrap_devary(d_key), ffi.NULL, nelem, get_dtype(d_key.dtype)) val = np.arange(nelem, dtype=np.int64) d_val = rmm.to_device(val) col_val = new_column() libgdf.gdf_column_view(col_val, unwrap_devary(d_val), ffi.NULL, nelem, get_dtype(d_val.dtype)) sizeof_key = d_key.dtype.itemsize sizeof_val = d_val.dtype.itemsize begin_bit = 0 end_bit = sizeof_key * 8 # Setup plan plan = libgdf.gdf_radixsort_plan(nelem, descending, begin_bit, end_bit) libgdf.gdf_radixsort_plan_setup(plan, sizeof_key, sizeof_val) # Sort libgdf.gdf_radixsort_generic(plan, col_key, col_val) # Cleanup libgdf.gdf_radixsort_plan_free(plan) # Check got_keys = d_key.copy_to_host() got_vals = d_val.copy_to_host() sorted_keys, sorted_vals = expected_fn(key) np.testing.assert_array_equal(sorted_keys, got_keys) np.testing.assert_array_equal(sorted_vals, got_vals)
def _make_hash_input(hash_input, ncols): ci = [] di = [] for i in range(ncols): di.append(rmm.to_device(hash_input[i])) for i in range(ncols): col_input = new_column() libgdf.gdf_column_view(col_input, unwrap_devary(di[i]), ffi.NULL, hash_input[i].size, get_dtype(hash_input[i].dtype)) ci.append(col_input) initial_hash_values = rmm.to_device(np.arange(ncols, dtype=np.uint32)) yield ci, unwrap_devary(initial_hash_values)
def test_unsupported_dtype_error(): nelem = 128 h_data = np.random.random(nelem).astype(np.float32) d_data = rmm.to_device(h_data) d_result = rmm.device_array_like(d_data) col_data = new_column() col_result = new_column() libgdf.gdf_column_view(col_data, unwrap_devary(d_data), ffi.NULL, nelem, libgdf.GDF_INT32) libgdf.gdf_column_view(col_result, unwrap_devary(d_result), ffi.NULL, nelem + 10, libgdf.GDF_FLOAT32) with pytest.raises(GDFError) as excinfo: libgdf.gdf_sin_generic(col_data, col_result) assert 'GDF_UNSUPPORTED_DTYPE' == str(excinfo.value)
def test_col_mismatch_error(): nelem = 128 h_data = np.random.random(nelem).astype(np.float32) d_data = rmm.to_device(h_data) d_result = rmm.device_array_like(d_data) col_data = new_column() col_result = new_column() libgdf.gdf_column_view(col_data, unwrap_devary(d_data), ffi.NULL, nelem, libgdf.GDF_FLOAT32) libgdf.gdf_column_view(col_result, unwrap_devary(d_result), ffi.NULL, nelem + 10, libgdf.GDF_FLOAT32) with pytest.raises(GDFError) as excinfo: libgdf.gdf_sin_generic(col_data, col_result) assert 'GDF_COLUMN_SIZE_MISMATCH' == str(excinfo.value)
def test_prefixsum_masked(dtype, nelem): if dtype == np.int8: data = gen_rand(dtype, nelem, low=-2, high=2) else: data = gen_rand(dtype, nelem) mask = gen_rand(np.int8, (nelem + 8 - 1) // 8) dummy_mask = gen_rand(np.int8, (nelem + 8 - 1) // 8) d_data = rmm.to_device(data) d_mask = rmm.to_device(mask) d_result = rmm.device_array(d_data.size, dtype=d_data.dtype) d_result_mask = rmm.to_device(dummy_mask) gdf_dtype = get_dtype(dtype) extra_dtype_info = ffi.new('gdf_dtype_extra_info*') extra_dtype_info.time_unit = libgdf.TIME_UNIT_NONE col_data = new_column() libgdf.gdf_column_view_augmented(col_data, unwrap_devary(d_data), unwrap_devary(d_mask), nelem, gdf_dtype, count_nulls(d_mask, nelem), extra_dtype_info[0]) col_result = new_column() libgdf.gdf_column_view(col_result, unwrap_devary(d_result), unwrap_devary(d_result_mask), nelem, gdf_dtype) inclusive = True libgdf.gdf_prefixsum(col_data, col_result, inclusive) boolmask = buffer_as_bits(mask)[:nelem] expect = np.cumsum(data[boolmask]) got = d_result.copy_to_host()[boolmask] if not inclusive: expect = expect[:-1] assert got[0] == 0 got = got[1:] decimal = 4 if dtype == np.float32 else 6 np.testing.assert_array_almost_equal(expect, got, decimal=decimal)
def test_max(dtype, nelem): data = gen_rand(dtype, nelem) d_data = rmm.to_device(data) d_result = rmm.device_array( libgdf.gdf_reduction_get_intermediate_output_size(), dtype=d_data.dtype) col_data = new_column() gdf_dtype = get_dtype(dtype) libgdf.gdf_column_view(col_data, unwrap_devary(d_data), ffi.NULL, nelem, gdf_dtype) libgdf.gdf_max(col_data, unwrap_devary(d_result), d_result.size) got = d_result.copy_to_host()[0] expect = data.max() print('expect:', expect) print('got:', got) assert expect == got
def test_sum(dtype, nelem): data = gen_rand(dtype, nelem) d_data = rmm.to_device(data) d_result = rmm.device_array( libgdf.gdf_reduction_get_intermediate_output_size(), dtype=d_data.dtype) col_data = new_column() gdf_dtype = get_dtype(dtype) libgdf.gdf_column_view(col_data, unwrap_devary(d_data), ffi.NULL, nelem, gdf_dtype) libgdf.gdf_sum(col_data, unwrap_devary(d_result), d_result.size) got = d_result.copy_to_host()[0] expect = dtype(data.sum()) print('expect:', expect) print('got:', got) significant = 4 if dtype == np.float32 else 6 np.testing.assert_approx_equal(expect, got, significant=significant)
def _call_hash_multi(api, ncols, col_input, magic, initial_hash_values, nrows): out_ary = np.zeros(nrows, dtype=np.int32) d_out = rmm.to_device(out_ary) col_out = new_column() libgdf.gdf_column_view(col_out, unwrap_devary(d_out), ffi.NULL, out_ary.size, get_dtype(d_out.dtype)) api(ncols, col_input, magic, initial_hash_values, col_out) hashed_result = d_out.copy_to_host() print(hashed_result) return hashed_result
def test_sum_masked(nelem): dtype = np.float64 data = gen_rand(dtype, nelem) mask = gen_rand(np.int8, (nelem + 8 - 1) // 8) d_data = rmm.to_device(data) d_mask = rmm.to_device(mask) d_result = rmm.device_array( libgdf.gdf_reduction_get_intermediate_output_size(), dtype=d_data.dtype) col_data = new_column() gdf_dtype = get_dtype(dtype) libgdf.gdf_column_view(col_data, unwrap_devary(d_data), unwrap_devary(d_mask), nelem, gdf_dtype) libgdf.gdf_sum(col_data, unwrap_devary(d_result), d_result.size) got = d_result.copy_to_host()[0] boolmask = buffer_as_bits(mask)[:nelem] expect = data[boolmask].sum() np.testing.assert_approx_equal(expect, got)
def test_digitize(num_rows, num_bins, right, dtype): col_data = gen_rand(dtype, num_rows) d_col_data = rmm.to_device(col_data) col_in = new_column() libgdf.gdf_column_view(col_in, unwrap_devary(d_col_data), ffi.NULL, num_rows, get_dtype(d_col_data.dtype)) bin_data = gen_rand(dtype, num_bins) bin_data.sort() bin_data = np.unique(bin_data) d_bin_data = rmm.to_device(bin_data) bins = new_column() libgdf.gdf_column_view(bins, unwrap_devary(d_bin_data), ffi.NULL, len(bin_data), get_dtype(d_bin_data.dtype)) out_ary = np.zeros(num_rows, dtype=np.int32) d_out = rmm.to_device(out_ary) libgdf.gdf_digitize(col_in, bins, right, unwrap_devary(d_out)) result = d_out.copy_to_host() expected = np.digitize(col_data, bin_data, right) np.testing.assert_array_equal(expected, result)
def test_validity_add(dtype, nelem): expect_fn = np.add test_fn = libgdf.gdf_add_generic # data h_lhs = gen_rand(dtype, nelem) h_rhs = gen_rand(dtype, nelem) d_lhs = rmm.to_device(h_lhs) d_rhs = rmm.to_device(h_rhs) d_result = rmm.device_array_like(d_lhs) # valids h_lhs_valids = gen_rand(np.int8, (nelem + 8 - 1) // 8) h_rhs_valids = gen_rand(np.int8, (nelem + 8 - 1) // 8) d_lhs_valids = rmm.to_device(h_lhs_valids) d_rhs_valids = rmm.to_device(h_rhs_valids) d_result_valids = rmm.device_array_like(d_lhs_valids) # columns col_lhs = new_column() col_rhs = new_column() col_result = new_column() gdf_dtype = get_dtype(dtype) libgdf.gdf_column_view(col_lhs, unwrap_devary(d_lhs), unwrap_devary(d_lhs_valids), nelem, gdf_dtype) libgdf.gdf_column_view(col_rhs, unwrap_devary(d_rhs), unwrap_devary(d_rhs_valids), nelem, gdf_dtype) libgdf.gdf_column_view(col_result, unwrap_devary(d_result), unwrap_devary(d_result_valids), nelem, gdf_dtype) libgdf.gdf_validity_and(col_lhs, col_rhs, col_result) expect = expect_fn(h_lhs, h_rhs) test_fn(col_lhs, col_rhs, col_result) got = d_result.copy_to_host() # Ensure validity mask is matching expect_valids = h_lhs_valids & h_rhs_valids got_valids = d_result_valids.copy_to_host() np.testing.assert_array_equal(expect_valids, got_valids) # Masked data mask = buffer_as_bits(expect_valids.data)[:expect.size] expect_masked = expect[mask] got_masked = got[mask] print('expect') print(expect_masked) print('got') print(got_masked) np.testing.assert_array_equal(expect_masked, got_masked)
def test_segradixsort(nelem, num_segments, descending, dtype): def expected_fn(key): # Use mergesort for stable sort # Negate the key for descending if issubclass(dtype, np.integer): def negate_values(v): return ~key else: # Note: this doesn't work on the smallest value of integer # i.e. -((int8)-128) -> -128 def negate_values(v): return -key sorted_idx = np.argsort(negate_values(key) if descending else key, kind='mergesort') sorted_keys = key[sorted_idx] # Returns key, vals return sorted_keys, sorted_idx def make_segments(n, k): sampled = random.sample(list(range(n)), k) return list(sorted(sampled)) begin_offsets = np.asarray(make_segments(nelem, num_segments), dtype=np.uint32) end_offsets = np.asarray(begin_offsets.tolist()[1:] + [nelem], dtype=begin_offsets.dtype) # Make data key = gen_rand(dtype, nelem) d_key = rmm.to_device(key) col_key = new_column() libgdf.gdf_column_view(col_key, unwrap_devary(d_key), ffi.NULL, nelem, get_dtype(d_key.dtype)) val = np.arange(nelem, dtype=np.int64) d_val = rmm.to_device(val) col_val = new_column() libgdf.gdf_column_view(col_val, unwrap_devary(d_val), ffi.NULL, nelem, get_dtype(d_val.dtype)) d_begin_offsets = rmm.to_device(begin_offsets) d_end_offsets = rmm.to_device(end_offsets) sizeof_key = d_key.dtype.itemsize sizeof_val = d_val.dtype.itemsize begin_bit = 0 end_bit = sizeof_key * 8 # Setup plan plan = libgdf.gdf_segmented_radixsort_plan(nelem, descending, begin_bit, end_bit) libgdf.gdf_segmented_radixsort_plan_setup(plan, sizeof_key, sizeof_val) # Sort libgdf.gdf_segmented_radixsort_generic(plan, col_key, col_val, num_segments, unwrap_devary(d_begin_offsets), unwrap_devary(d_end_offsets)) # Cleanup libgdf.gdf_segmented_radixsort_plan_free(plan) # Check got_keys = d_key.copy_to_host() got_vals = d_val.copy_to_host() # Check a segment at a time for s, e in zip(begin_offsets, end_offsets): segment = key[s:e] exp_keys, exp_vals = expected_fn(segment) exp_vals += s np.testing.assert_array_equal(exp_keys, got_keys[s:e]) np.testing.assert_array_equal(exp_vals, got_vals[s:e])