Пример #1
0
def test_validity_ceil(nelem):
    # Data
    data = np.random.random(nelem) * 100
    mask = utils.random_bitmask(nelem)
    bitmask = utils.expand_bits_to_bytes(mask)[:nelem]
    null_count = utils.count_zero(bitmask)
    sr = Series.from_masked_array(data, mask, null_count)

    # Result
    res = sr.ceil()

    na_value = -100000
    got = res.fillna(na_value).to_array()
    res_mask = np.asarray(bitmask, dtype=np.bool_)[:data.size]

    expect = np.ceil(data)
    expect[~res_mask] = na_value

    # Check
    print('expect')
    print(expect)
    print('got')
    print(got)

    np.testing.assert_array_equal(expect, got)
Пример #2
0
def test_applymap_round(nelem, masked):
    # Generate data
    np.random.seed(0)
    data = np.random.random(nelem) * 100

    if masked:
        # Make mask
        bitmask = utils.random_bitmask(nelem)
        boolmask = np.asarray(utils.expand_bits_to_bytes(bitmask),
                              dtype=np.bool)[:nelem]
        data[~boolmask] = np.nan

    sr = Series(data)

    if masked:
        # Mask the Series
        sr = sr.set_mask(bitmask)

    # Call applymap
    out = sr.applymap(lambda x: (floor(x) + 1
                                 if x - floor(x) >= 0.5 else floor(x)))

    if masked:
        # Fill masked values
        out = out.fillna(np.nan)

    # Check
    expect = np.round(data)
    got = out.to_array()
    np.testing.assert_array_almost_equal(expect, got)
Пример #3
0
def test_validity_add(nelem, lhs_nulls, rhs_nulls):
    np.random.seed(0)
    # LHS
    lhs_data = np.random.random(nelem)
    if lhs_nulls == "some":
        lhs_mask = utils.random_bitmask(nelem)
        lhs_bitmask = utils.expand_bits_to_bytes(lhs_mask)[:nelem]
        lhs_null_count = utils.count_zero(lhs_bitmask)
        assert lhs_null_count >= 0
        lhs = Series.from_masked_array(lhs_data, lhs_mask)
        assert lhs.null_count == lhs_null_count
    else:
        lhs = Series(lhs_data)
    # RHS
    rhs_data = np.random.random(nelem)
    if rhs_nulls == "some":
        rhs_mask = utils.random_bitmask(nelem)
        rhs_bitmask = utils.expand_bits_to_bytes(rhs_mask)[:nelem]
        rhs_null_count = utils.count_zero(rhs_bitmask)
        assert rhs_null_count >= 0
        rhs = Series.from_masked_array(rhs_data, rhs_mask)
        assert rhs.null_count == rhs_null_count
    else:
        rhs = Series(rhs_data)
    # Result
    res = lhs + rhs
    if lhs_nulls == "some" and rhs_nulls == "some":
        res_mask = np.asarray(
            utils.expand_bits_to_bytes(lhs_mask & rhs_mask), dtype=np.bool
        )[:nelem]
    if lhs_nulls == "some" and rhs_nulls == "none":
        res_mask = np.asarray(
            utils.expand_bits_to_bytes(lhs_mask), dtype=np.bool
        )[:nelem]
    if lhs_nulls == "none" and rhs_nulls == "some":
        res_mask = np.asarray(
            utils.expand_bits_to_bytes(rhs_mask), dtype=np.bool
        )[:nelem]
    # Fill NA values
    na_value = -10000
    got = res.fillna(na_value).to_array()
    expect = lhs_data + rhs_data
    if lhs_nulls == "some" or rhs_nulls == "some":
        expect[~res_mask] = na_value

    np.testing.assert_array_equal(expect, got)
Пример #4
0
def test_serialize_masked_series():
    nelem = 50
    data = np.random.random(nelem)
    mask = utils.random_bitmask(nelem)
    bitmask = utils.expand_bits_to_bytes(mask)[:nelem]
    null_count = utils.count_zero(bitmask)
    assert null_count >= 0
    sr = cudf.Series.from_masked_array(data, mask, null_count=null_count)
    outsr = cudf.Series.deserialize(*sr.serialize())
    assert_eq(sr, outsr)
Пример #5
0
def test_serialize_masked_series():
    nelem = 50
    data = np.random.random(nelem)
    mask = utils.random_bitmask(nelem)
    bitmask = utils.expand_bits_to_bytes(mask)[:nelem]
    null_count = utils.count_zero(bitmask)
    assert null_count >= 0
    sr = cudf.Series.from_masked_array(data, mask, null_count=null_count)
    outsr = deserialize(*serialize(sr))
    pd.util.testing.assert_series_equal(sr.to_pandas(), outsr.to_pandas())
Пример #6
0
def test_sum_masked(nelem):
    dtype = np.float64
    data = gen_rand(dtype, nelem)

    mask = utils.random_bitmask(nelem)
    bitmask = utils.expand_bits_to_bytes(mask)[:nelem]
    null_count = utils.count_zero(bitmask)

    sr = Series.from_masked_array(data, mask, null_count)

    got = sr.sum()
    res_mask = np.asarray(bitmask, dtype=np.bool_)[: data.size]
    expect = data[res_mask].sum()

    significant = 4 if dtype == np.float32 else 6
    np.testing.assert_approx_equal(expect, got, significant=significant)
Пример #7
0
def test_onehot_masked():
    np.random.seed(0)
    high = 5
    size = 100
    arr = np.random.randint(low=0, high=high, size=size)
    bitmask = utils.random_bitmask(size)
    bytemask = np.asarray(utils.expand_bits_to_bytes(bitmask)[:size],
                          dtype=np.bool_)
    arr[~bytemask] = -1

    df = DataFrame()
    df['a'] = Series(arr).set_mask(bitmask)

    out = df.one_hot_encoding('a',
                              cats=list(range(high)),
                              prefix='a',
                              dtype=np.int32)

    assert tuple(out.columns) == ('a', 'a_0', 'a_1', 'a_2', 'a_3', 'a_4')
    np.testing.assert_array_equal(out['a_0'] == 1, arr == 0)
    np.testing.assert_array_equal(out['a_1'] == 1, arr == 1)
    np.testing.assert_array_equal(out['a_2'] == 1, arr == 2)
    np.testing.assert_array_equal(out['a_3'] == 1, arr == 3)
    np.testing.assert_array_equal(out['a_4'] == 1, arr == 4)
Пример #8
0
def test_onehot_masked():
    np.random.seed(0)
    high = 5
    size = 100
    arr = np.random.randint(low=0, high=high, size=size)
    bitmask = utils.random_bitmask(size)
    bytemask = np.asarray(utils.expand_bits_to_bytes(bitmask)[:size],
                          dtype=np.bool_)
    arr[~bytemask] = -1

    df = DataFrame()
    df["a"] = Series(arr).set_mask(bitmask)

    out = df.one_hot_encoding("a",
                              cats=list(range(high)),
                              prefix="a",
                              dtype=np.int32)

    assert tuple(out.columns) == ("a", "a_0", "a_1", "a_2", "a_3", "a_4")
    np.testing.assert_array_equal((out["a_0"] == 1).to_array(), arr == 0)
    np.testing.assert_array_equal((out["a_1"] == 1).to_array(), arr == 1)
    np.testing.assert_array_equal((out["a_2"] == 1).to_array(), arr == 2)
    np.testing.assert_array_equal((out["a_3"] == 1).to_array(), arr == 3)
    np.testing.assert_array_equal((out["a_4"] == 1).to_array(), arr == 4)