Пример #1
0
def test_sample_k_bigger_than_bag_size():
    seq = range(3)
    sut = db.from_sequence(seq, npartitions=3)
    # should raise: Sample larger than population or is negative
    with pytest.raises(ValueError,
                       match="Sample larger than population or is negative"):
        random.sample(sut, k=4).compute()
Пример #2
0
def test_sample_k_equal_bag_size_with_unbalanced_partitions():
    seq = range(10)
    sut = db.from_sequence(seq, partition_size=9)
    li = list(random.sample(sut, k=10).compute())
    assert sut.map_partitions(len).compute() == (9, 1)
    assert len(li) == 10
    assert all(i in seq for i in li)
    assert len(set(li)) == len(li)
Пример #3
0
def test_sample_size_k_bigger_than_smallest_partition_size():
    seq = range(10)
    sut = db.from_sequence(seq, partition_size=9)
    li = list(random.sample(sut, k=2).compute())
    assert sut.map_partitions(len).compute() == (9, 1)
    assert len(li) == 2
    assert all(i in seq for i in li)
    assert len(set(li)) == len(li)
Пример #4
0
def test_sample_size_exactly_k():
    seq = range(20)
    sut = db.from_sequence(seq, npartitions=3)
    li = list(random.sample(sut, k=2).compute())
    assert sut.map_partitions(len).compute() == (7, 7, 6)
    assert len(li) == 2
    assert all(i in seq for i in li)
    assert len(set(li)) == len(li)
Пример #5
0
def test_sample_empty_partition():
    seq = range(10)
    sut = db.from_sequence(seq, partition_size=9)
    sut = sut.repartition(3)
    li = list(random.sample(sut, k=2).compute())
    assert sut.map_partitions(len).compute() == (9, 0, 1)
    assert len(li) == 2
    assert all(i in seq for i in li)
    assert len(set(li)) == len(li)
Пример #6
0
def test_sample_return_bag():
    seq = range(20)
    sut = db.from_sequence(seq, npartitions=3)
    assert isinstance(random.sample(sut, k=2), db.Bag)