def test_validity_ceil(nelem): # Data data = np.random.random(nelem) * 100 mask = utils.random_bitmask(nelem) bitmask = utils.expand_bits_to_bytes(mask)[:nelem] null_count = utils.count_zero(bitmask) sr = Series.from_masked_array(data, mask, null_count) # Result res = sr.ceil() na_value = -100000 got = res.fillna(na_value).to_array() res_mask = np.asarray(bitmask, dtype=np.bool_)[:data.size] expect = np.ceil(data) expect[~res_mask] = na_value # Check print('expect') print(expect) print('got') print(got) np.testing.assert_array_equal(expect, got)
def test_applymap_round(nelem, masked): # Generate data np.random.seed(0) data = np.random.random(nelem) * 100 if masked: # Make mask bitmask = utils.random_bitmask(nelem) boolmask = np.asarray(utils.expand_bits_to_bytes(bitmask), dtype=np.bool)[:nelem] data[~boolmask] = np.nan sr = Series(data) if masked: # Mask the Series sr = sr.set_mask(bitmask) # Call applymap out = sr.applymap(lambda x: (floor(x) + 1 if x - floor(x) >= 0.5 else floor(x))) if masked: # Fill masked values out = out.fillna(np.nan) # Check expect = np.round(data) got = out.to_array() np.testing.assert_array_almost_equal(expect, got)
def test_validity_add(nelem, lhs_nulls, rhs_nulls): np.random.seed(0) # LHS lhs_data = np.random.random(nelem) if lhs_nulls == "some": lhs_mask = utils.random_bitmask(nelem) lhs_bitmask = utils.expand_bits_to_bytes(lhs_mask)[:nelem] lhs_null_count = utils.count_zero(lhs_bitmask) assert lhs_null_count >= 0 lhs = Series.from_masked_array(lhs_data, lhs_mask) assert lhs.null_count == lhs_null_count else: lhs = Series(lhs_data) # RHS rhs_data = np.random.random(nelem) if rhs_nulls == "some": rhs_mask = utils.random_bitmask(nelem) rhs_bitmask = utils.expand_bits_to_bytes(rhs_mask)[:nelem] rhs_null_count = utils.count_zero(rhs_bitmask) assert rhs_null_count >= 0 rhs = Series.from_masked_array(rhs_data, rhs_mask) assert rhs.null_count == rhs_null_count else: rhs = Series(rhs_data) # Result res = lhs + rhs if lhs_nulls == "some" and rhs_nulls == "some": res_mask = np.asarray( utils.expand_bits_to_bytes(lhs_mask & rhs_mask), dtype=np.bool )[:nelem] if lhs_nulls == "some" and rhs_nulls == "none": res_mask = np.asarray( utils.expand_bits_to_bytes(lhs_mask), dtype=np.bool )[:nelem] if lhs_nulls == "none" and rhs_nulls == "some": res_mask = np.asarray( utils.expand_bits_to_bytes(rhs_mask), dtype=np.bool )[:nelem] # Fill NA values na_value = -10000 got = res.fillna(na_value).to_array() expect = lhs_data + rhs_data if lhs_nulls == "some" or rhs_nulls == "some": expect[~res_mask] = na_value np.testing.assert_array_equal(expect, got)
def test_serialize_masked_series(): nelem = 50 data = np.random.random(nelem) mask = utils.random_bitmask(nelem) bitmask = utils.expand_bits_to_bytes(mask)[:nelem] null_count = utils.count_zero(bitmask) assert null_count >= 0 sr = cudf.Series.from_masked_array(data, mask, null_count=null_count) outsr = cudf.Series.deserialize(*sr.serialize()) assert_eq(sr, outsr)
def test_serialize_masked_series(): nelem = 50 data = np.random.random(nelem) mask = utils.random_bitmask(nelem) bitmask = utils.expand_bits_to_bytes(mask)[:nelem] null_count = utils.count_zero(bitmask) assert null_count >= 0 sr = cudf.Series.from_masked_array(data, mask, null_count=null_count) outsr = deserialize(*serialize(sr)) pd.util.testing.assert_series_equal(sr.to_pandas(), outsr.to_pandas())
def test_sum_masked(nelem): dtype = np.float64 data = gen_rand(dtype, nelem) mask = utils.random_bitmask(nelem) bitmask = utils.expand_bits_to_bytes(mask)[:nelem] null_count = utils.count_zero(bitmask) sr = Series.from_masked_array(data, mask, null_count) got = sr.sum() res_mask = np.asarray(bitmask, dtype=np.bool_)[: data.size] expect = data[res_mask].sum() significant = 4 if dtype == np.float32 else 6 np.testing.assert_approx_equal(expect, got, significant=significant)
def test_onehot_masked(): np.random.seed(0) high = 5 size = 100 arr = np.random.randint(low=0, high=high, size=size) bitmask = utils.random_bitmask(size) bytemask = np.asarray(utils.expand_bits_to_bytes(bitmask)[:size], dtype=np.bool_) arr[~bytemask] = -1 df = DataFrame() df['a'] = Series(arr).set_mask(bitmask) out = df.one_hot_encoding('a', cats=list(range(high)), prefix='a', dtype=np.int32) assert tuple(out.columns) == ('a', 'a_0', 'a_1', 'a_2', 'a_3', 'a_4') np.testing.assert_array_equal(out['a_0'] == 1, arr == 0) np.testing.assert_array_equal(out['a_1'] == 1, arr == 1) np.testing.assert_array_equal(out['a_2'] == 1, arr == 2) np.testing.assert_array_equal(out['a_3'] == 1, arr == 3) np.testing.assert_array_equal(out['a_4'] == 1, arr == 4)
def test_onehot_masked(): np.random.seed(0) high = 5 size = 100 arr = np.random.randint(low=0, high=high, size=size) bitmask = utils.random_bitmask(size) bytemask = np.asarray(utils.expand_bits_to_bytes(bitmask)[:size], dtype=np.bool_) arr[~bytemask] = -1 df = DataFrame() df["a"] = Series(arr).set_mask(bitmask) out = df.one_hot_encoding("a", cats=list(range(high)), prefix="a", dtype=np.int32) assert tuple(out.columns) == ("a", "a_0", "a_1", "a_2", "a_3", "a_4") np.testing.assert_array_equal((out["a_0"] == 1).to_array(), arr == 0) np.testing.assert_array_equal((out["a_1"] == 1).to_array(), arr == 1) np.testing.assert_array_equal((out["a_2"] == 1).to_array(), arr == 2) np.testing.assert_array_equal((out["a_3"] == 1).to_array(), arr == 3) np.testing.assert_array_equal((out["a_4"] == 1).to_array(), arr == 4)