예제 #1
0
def test_clip_to_seconds_series():
    m = IndexMap()
    stamp = 1234567890
    k = pd.date_range(pd.to_datetime(stamp, unit='s'),
                      periods=10000,
                      freq='ns').to_series().astype(int)
    assert len(m.clip_to_seconds(k).unique()) == 1
    assert m.clip_to_seconds(k).unique()[0] == stamp
예제 #2
0
def test_update(mocker):
    m = IndexMap()
    keys = generate_keys(10000)

    def hash_mock(k, salt=0):
        seed = 123456
        rs = np.random.RandomState(seed=seed + salt)
        return pd.Series(rs.randint(0, len(k) * 10, size=len(k)), index=k)

    with mocker.patch.object(m, 'hash_', side_effect=hash_mock):
        m.update(keys)
        assert len(m) == len(keys), "All keys not in mapping"
        assert m._map.index.difference(keys).empty, "All keys not in mapping"
        assert len(m._map.unique()) == len(keys), "Duplicate values in mapping"

        # Can't have duplicate keys.
        with pytest.raises(KeyError):
            m.update(keys)

        new_unique_keys = generate_keys(1000).difference(keys)
        m.update(new_unique_keys)
        assert len(
            m) == len(keys) + len(new_unique_keys), "All keys not in mapping"
        assert m._map.index.difference(
            keys.union(new_unique_keys)).empty, "All keys not in mapping"
        assert len(m._map.unique(
        )) == len(keys) + len(new_unique_keys), "Duplicate values in mapping"
def test_convert_to_ten_digit_int():
    m = IndexMap()
    v = 1234567890
    datetime_col = pd.date_range(pd.to_datetime(v, unit='s'), periods=10000, freq='ns').to_series()
    int_col = pd.Series(v, index=range(10000))
    float_col = pd.Series(1.1234567890, index=range(10000))
    bad_col = pd.Series('a', index=range(10000))

    assert m.convert_to_ten_digit_int(datetime_col).unique()[0] == v
    assert m.convert_to_ten_digit_int(int_col).unique()[0] == 4072825790
    assert m.convert_to_ten_digit_int(float_col).unique()[0] == v
    with pytest.raises(RandomnessError):
        m.convert_to_ten_digit_int(bad_col)
예제 #4
0
def test_spread_scalar():
    m = IndexMap()
    assert m.spread(1234567890) == 4072825790
예제 #5
0
def test_clip_to_seconds_scalar():
    m = IndexMap()
    k = pd.to_datetime("2010-01-25 06:25:31.123456789")
    assert m.clip_to_seconds(k.value) == int(str(k.value)[:10])
예제 #6
0
def test_digit_series():
    m = IndexMap()
    k = pd.Series(123456789, index=range(10000))
    for i in range(10):
        assert len(m.digit(k, i).unique()) == 1
        assert m.digit(k, i)[0] == 10 - (i + 1)
예제 #7
0
def test_digit_scalar():
    m = IndexMap()
    k = 123456789
    for i in range(10):
        assert m.digit(k, i) == 10 - (i + 1)
예제 #8
0
def map_size_and_hashed_values(request):
    keys = generate_keys(*request.param)
    m = IndexMap()
    return m.map_size, m.hash_(keys)
예제 #9
0
def test_shift_series():
    m = IndexMap()
    s = pd.Series(1.1234567890, index=range(10000))
    assert len(m.shift(s).unique()) == 1
    assert m.shift(s).unique()[0] == 1234567890
예제 #10
0
def test_shift_scalar():
    m = IndexMap()
    assert m.shift(1.1234567890) == 1234567890
예제 #11
0
def test_spread_series():
    m = IndexMap()
    s = pd.Series(1234567890, index=range(10000))
    assert len(m.spread(s).unique()) == 1
    assert m.spread(s).unique()[0] == 4072825790