def test_validity_add(nelem, lhs_nulls, rhs_nulls): np.random.seed(0) # LHS lhs_data = np.random.random(nelem) if lhs_nulls == "some": lhs_mask = utils.random_bitmask(nelem) lhs_bitmask = utils.expand_bits_to_bytes(lhs_mask)[:nelem] lhs_null_count = utils.count_zero(lhs_bitmask) assert lhs_null_count >= 0 lhs = Series.from_masked_array(lhs_data, lhs_mask) assert lhs.null_count == lhs_null_count else: lhs = Series(lhs_data) # RHS rhs_data = np.random.random(nelem) if rhs_nulls == "some": rhs_mask = utils.random_bitmask(nelem) rhs_bitmask = utils.expand_bits_to_bytes(rhs_mask)[:nelem] rhs_null_count = utils.count_zero(rhs_bitmask) assert rhs_null_count >= 0 rhs = Series.from_masked_array(rhs_data, rhs_mask) assert rhs.null_count == rhs_null_count else: rhs = Series(rhs_data) # Result res = lhs + rhs if lhs_nulls == "some" and rhs_nulls == "some": res_mask = np.asarray( utils.expand_bits_to_bytes(lhs_mask & rhs_mask), dtype=np.bool )[:nelem] if lhs_nulls == "some" and rhs_nulls == "none": res_mask = np.asarray( utils.expand_bits_to_bytes(lhs_mask), dtype=np.bool )[:nelem] if lhs_nulls == "none" and rhs_nulls == "some": res_mask = np.asarray( utils.expand_bits_to_bytes(rhs_mask), dtype=np.bool )[:nelem] # Fill NA values na_value = -10000 got = res.fillna(na_value).to_array() expect = lhs_data + rhs_data if lhs_nulls == "some" or rhs_nulls == "some": expect[~res_mask] = na_value np.testing.assert_array_equal(expect, got)
def test_serialize_masked_series(): nelem = 50 data = np.random.random(nelem) mask = utils.random_bitmask(nelem) bitmask = utils.expand_bits_to_bytes(mask)[:nelem] null_count = utils.count_zero(bitmask) assert null_count >= 0 sr = cudf.Series.from_masked_array(data, mask, null_count=null_count) outsr = cudf.Series.deserialize(*sr.serialize()) assert_eq(sr, outsr)
def test_serialize_masked_series(): nelem = 50 data = np.random.random(nelem) mask = utils.random_bitmask(nelem) bitmask = utils.expand_bits_to_bytes(mask)[:nelem] null_count = utils.count_zero(bitmask) assert null_count >= 0 sr = cudf.Series.from_masked_array(data, mask, null_count=null_count) outsr = deserialize(*serialize(sr)) pd.util.testing.assert_series_equal(sr.to_pandas(), outsr.to_pandas())
def test_sum_masked(nelem): dtype = np.float64 data = gen_rand(dtype, nelem) mask = utils.random_bitmask(nelem) bitmask = utils.expand_bits_to_bytes(mask)[:nelem] null_count = utils.count_zero(bitmask) sr = Series.from_masked_array(data, mask, null_count) got = sr.sum() res_mask = np.asarray(bitmask, dtype=np.bool_)[: data.size] expect = data[res_mask].sum() significant = 4 if dtype == np.float32 else 6 np.testing.assert_approx_equal(expect, got, significant=significant)