def test_searchsorted(side, obj_class, vals_class): nelem = 1000 column_data = gen_rand("float64", nelem) column_mask = random_bitmask(nelem) values_data = gen_rand("float64", nelem) values_mask = random_bitmask(nelem) sr = cudf.Series.from_masked_array(column_data, column_mask) vals = cudf.Series.from_masked_array(values_data, values_mask) sr = sr.sort_values() # Reference object can be Series, Index, or Column if obj_class == "index": sr.reset_index(drop=True) elif obj_class == "column": sr = sr._column # Values can be Series or Index if vals_class == "index": vals.reset_index(drop=True) psr = sr.to_pandas() pvals = vals.to_pandas() expect = psr.searchsorted(pvals, side) got = sr.searchsorted(vals, side) assert_eq(expect, cupy.asnumpy(got))
def test_sum_decimal(dtype, nelem): np.random.seed(0) data = [str(x) for x in gen_rand("int64", nelem) / 100] expected = pd.Series([Decimal(x) for x in data]).sum() got = cudf.Series(data).astype(dtype).sum() assert_eq(expected, got)
def test_sum_of_squares_decimal(dtype): np.random.seed(0) data = [str(x) for x in gen_rand("int8", 3) / 10] expected = pd.Series([Decimal(x) for x in data]).pow(2).sum() got = cudf.Series(data).astype(dtype).sum_of_squares() assert_eq(expected, got)
def test_product_decimal(dtype): np.random.seed(0) data = [str(x) for x in gen_rand("int8", 3) / 10] expected = pd.Series([Decimal(x) for x in data]).product() got = cudf.Series(data).astype(dtype).product() assert_eq(expected, got)
def test_sum(dtype, nelem): dtype = cudf.dtype(dtype).type data = gen_rand(dtype, nelem) sr = Series(data) got = sr.sum() expect = data.sum() significant = 4 if dtype == np.float32 else 6 np.testing.assert_approx_equal(expect, got, significant=significant)
def test_max(dtype, nelem): dtype = cudf.dtype(dtype).type data = gen_rand(dtype, nelem) sr = Series(data) got = sr.max() expect = dtype(data.max()) assert expect == got
def test_cummin(dtype, nelem): if dtype == np.int8: # to keep data in range data = gen_rand(dtype, nelem, low=-2, high=2) else: data = gen_rand(dtype, nelem) decimal = 4 if dtype == np.float32 else 6 # series gs = cudf.Series(data) ps = pd.Series(data) np.testing.assert_array_almost_equal(gs.cummin().to_numpy(), ps.cummin(), decimal=decimal) # dataframe series (named series) gdf = cudf.DataFrame() gdf["a"] = cudf.Series(data) pdf = pd.DataFrame() pdf["a"] = pd.Series(data) np.testing.assert_array_almost_equal(gdf.a.cummin().to_numpy(), pdf.a.cummin(), decimal=decimal)
def test_sum_masked(nelem): dtype = np.float64 data = gen_rand(dtype, nelem) mask = utils.random_bitmask(nelem) bitmask = utils.expand_bits_to_bytes(mask)[:nelem] null_count = utils.count_zero(bitmask) sr = Series.from_masked_array(data, mask, null_count) got = sr.sum() res_mask = np.asarray(bitmask, dtype=np.bool_)[: data.size] expect = data[res_mask].sum() significant = 4 if dtype == np.float32 else 6 np.testing.assert_approx_equal(expect, got, significant=significant)
def test_product(dtype, nelem): np.random.seed(0) dtype = cudf.dtype(dtype).type if cudf.dtype(dtype).kind in {"u", "i"}: data = np.ones(nelem, dtype=dtype) # Set at most 30 items to [0..2) to keep the value within 2^32 for _ in range(30): data[np.random.randint(low=0, high=nelem, size=1)] = ( np.random.uniform() * 2 ) else: data = gen_rand(dtype, nelem) sr = Series(data) got = sr.product() expect = pd.Series(data).product() significant = 4 if dtype == np.float32 else 6 np.testing.assert_approx_equal(expect, got, significant=significant)
def test_sum_of_squares(dtype, nelem): dtype = cudf.dtype(dtype).type data = gen_rand(dtype, nelem) sr = Series(data) df = cudf.DataFrame(sr) got = sr.sum_of_squares() got_df = df.sum_of_squares() expect = (data ** 2).sum() if cudf.dtype(dtype).kind in {"u", "i"}: if 0 <= expect <= np.iinfo(dtype).max: np.testing.assert_array_almost_equal(expect, got) np.testing.assert_array_almost_equal(expect, got_df.iloc[0]) else: print("overflow, passing") else: np.testing.assert_approx_equal( expect, got, significant=accuracy_for_dtype[dtype] ) np.testing.assert_approx_equal( expect, got_df.iloc[0], significant=accuracy_for_dtype[dtype] )
def math_op_test(dtype, fn, nelem=128, test_df=False, positive_only=False, check_dtype=True): np.random.seed(0) randvals = gen_rand(dtype, nelem, positive_only=positive_only) h_series = pd.Series(randvals.astype(dtype)) d_series = cudf.Series(h_series) if test_df: d_in = cudf.DataFrame() d_in[0] = d_series h_in = pd.DataFrame() h_in[0] = h_series else: d_in = d_series h_in = h_series expect = fn(h_in) got = fn(d_in) assert_eq(expect, got, check_dtype=check_dtype)