示例#1
0
def test_local_shuffle(ray_start_regular_shared):
    # confirm that no data disappears, and they all stay within the same shard
    it = from_range(8, num_shards=2).local_shuffle(shuffle_buffer_size=2)
    assert repr(it) == ("ParallelIterator[from_range[8, shards=2]" +
                        ".local_shuffle(shuffle_buffer_size=2, seed=None)]")
    shard_0 = it.get_shard(0)
    shard_1 = it.get_shard(1)
    assert set(shard_0) == {0, 1, 2, 3}
    assert set(shard_1) == {4, 5, 6, 7}

    # check that shuffling results in different orders
    it1 = from_range(100, num_shards=10).local_shuffle(shuffle_buffer_size=5)
    it2 = from_range(100, num_shards=10).local_shuffle(shuffle_buffer_size=5)
    assert list(it1.gather_sync()) != list(it2.gather_sync())

    # buffer size of 1 should not result in any shuffling
    it3 = from_range(10, num_shards=1).local_shuffle(shuffle_buffer_size=1)
    assert list(it3.gather_sync()) == list(range(10))

    # statistical test
    it4 = from_items([0, 1] * 10000,
                     num_shards=1).local_shuffle(shuffle_buffer_size=100)
    result = "".join(it4.gather_sync().for_each(str))
    freq_counter = Counter(zip(result[:-1], result[1:]))
    assert len(freq_counter) == 4
    for key, value in freq_counter.items():
        assert value / len(freq_counter) > 0.2
示例#2
0
文件: test_iter.py 项目: rlcyf/ray
def test_repartition_consistent(ray_start_regular_shared):
    # repartition should be deterministic
    it1 = from_range(9, num_shards=1).repartition(2)
    it2 = from_range(9, num_shards=1).repartition(2)
    assert it1.num_shards() == 2
    assert it2.num_shards() == 2
    assert set(it1.get_shard(0)) == set(it2.get_shard(0))
    assert set(it1.get_shard(1)) == set(it2.get_shard(1))
示例#3
0
def test_repartition_consistent(ray_start_regular_shared):
    # repartition should be deterministic
    it1 = from_range(9, num_shards=1).repartition(2)
    it2 = from_range(9, num_shards=1).repartition(2)
    # union should work after repartition
    it3 = it1.union(it2)
    assert it1.num_shards() == 2
    assert it2.num_shards() == 2
    assert set(it1.get_shard(0)) == set(it2.get_shard(0))
    assert set(it1.get_shard(1)) == set(it2.get_shard(1))

    assert it3.num_shards() == 4
    assert set(it3.gather_async()) == set(it1.gather_async()) | set(
        it2.gather_async())
示例#4
0
def test_transform(ray_start_regular_shared):
    def f(it):
        for item in it:
            yield item * 2

    def g(it):
        for item in it:
            if item >= 2:
                yield item

    it = from_range(4).transform(f)
    assert repr(it) == "ParallelIterator[from_range[4, shards=2].transform()]"
    assert list(it.gather_sync()) == [0, 4, 2, 6]

    it = from_range(4)
    assert list(it.gather_sync().transform(g)) == [2, 3]
示例#5
0
def test_local_shuffle(ray_start_regular_shared):
    para_it = parallel_it.from_range(100).for_each(lambda x: [x])

    # batch_size larger than 1 and shuffle_buffer_size larger than 1
    ds = ml_data.from_parallel_iter(para_it, batch_size=10)
    ds1 = ds.local_shuffle(shuffle_buffer_size=5)
    ds2 = ds.local_shuffle(shuffle_buffer_size=5)

    l1 = list(ds1.gather_sync())
    l2 = list(ds2.gather_sync())
    assert not all(df1.equals(df2) for df1, df2 in zip(l1, l2))

    # batch_size equals 1 and shuffle_buffer_size larger than 1
    ds = ml_data.from_parallel_iter(para_it, batch_size=1)
    ds1 = ds.local_shuffle(shuffle_buffer_size=5)
    ds2 = ds.local_shuffle(shuffle_buffer_size=5)

    l1 = list(ds1.gather_sync())
    l2 = list(ds2.gather_sync())
    assert not all(df1.equals(df2) for df1, df2 in zip(l1, l2))

    # batch_size equals 1 and shuffle_buffer_size equals 1
    ds = ml_data.from_parallel_iter(para_it, batch_size=1)
    ds1 = ds.local_shuffle(shuffle_buffer_size=1)
    ds2 = ds.local_shuffle(shuffle_buffer_size=1)

    l1 = list(ds1.gather_sync())
    l2 = list(ds2.gather_sync())
    assert all(df1.equals(df2) for df1, df2 in zip(l1, l2))
示例#6
0
def test_gather_async(ray_start_regular_shared):
    it = from_range(4)
    it = it.gather_async()
    assert (
        repr(it) == "LocalIterator[ParallelIterator[from_range[4, shards=2]]"
        ".gather_async()]")
    assert sorted(it) == [0, 1, 2, 3]
示例#7
0
def test_get_shard_optimized(ray_start_regular_shared):
    it = from_range(6, num_shards=3)
    shard1 = it.get_shard(shard_index=0, batch_ms=25, num_async=2)
    shard2 = it.get_shard(shard_index=1, batch_ms=15, num_async=3)
    shard3 = it.get_shard(shard_index=2, batch_ms=5, num_async=4)
    assert list(shard1) == [0, 1]
    assert list(shard2) == [2, 3]
    assert list(shard3) == [4, 5]
示例#8
0
def test_duplicate(ray_start_regular_shared):
    it = from_range(5, num_shards=1)

    it1, it2 = it.gather_sync().duplicate(2)
    it1 = it1.batch(2)

    it3 = it1.union(it2, deterministic=False)
    results = it3.take(20)
    assert results == [0, [0, 1], 1, 2, [2, 3], 3, 4, [4]]
示例#9
0
def test_repartition_more(ray_start_regular_shared):
    it = from_range(100, 2).repartition(3)
    assert it.num_shards() == 3
    assert set(it.get_shard(0)) == set(range(0, 50, 3)) | set(
        (range(50, 100, 3)))
    assert set(
        it.get_shard(1)) == set(range(1, 50, 3)) | set(range(51, 100, 3))
    assert set(
        it.get_shard(2)) == set(range(2, 50, 3)) | set(range(52, 100, 3))
示例#10
0
文件: test_iter.py 项目: rlcyf/ray
def test_repartition_less(ray_start_regular_shared):
    it = from_range(9, num_shards=3)
    it1 = it.repartition(2)
    assert repr(it1) == ("ParallelIterator[from_range[9, " +
                         "shards=3].repartition[num_partitions=2]]")

    assert it1.num_shards() == 2
    shard_0_set = set(it1.get_shard(0))
    shard_1_set = set(it1.get_shard(1))
    assert shard_0_set == {0, 2, 3, 5, 6, 8}
    assert shard_1_set == {1, 4, 7}
示例#11
0
def test_from_parallel_it(ray_start_regular_shared):
    para_it = parallel_it.from_range(4).for_each(lambda x: [x])
    ds = ml_data.from_parallel_iter(para_it, batch_size=2)
    assert repr(ds) == ("MLDataset[from_range[4, shards=2]"
                        ".for_each().batch(2).to_pandas()]")
    collected = list(ds.gather_sync())
    assert len(collected) == 2
    assert all(d.shape == (2, 1) for d in collected)
    expected = para_it.flatten().batch(2).gather_sync().flatten()
    flattened = ds.gather_sync().for_each(lambda x: x[0].to_list()).flatten()
    assert list(flattened) == list(expected)
示例#12
0
def test_repartition_less(ray_start_regular_shared):
    it = from_range(9, num_shards=3)
    # chaining operations after a repartition should work
    it1 = it.repartition(2).for_each(lambda x: 2 * x)
    assert repr(it1) == ("ParallelIterator[from_range[9, " +
                         "shards=3].repartition[num_partitions=2].for_each()]")

    assert it1.num_shards() == 2
    shard_0_set = set(it1.get_shard(0))
    shard_1_set = set(it1.get_shard(1))
    assert shard_0_set == {0, 4, 6, 10, 12, 16}
    assert shard_1_set == {2, 8, 14}
示例#13
0
def test_union(ray_start_regular_shared):
    para_it1 = parallel_it.from_range(4, 2, False).for_each(lambda x: [x])
    ds1 = ml_data.from_parallel_iter(para_it1, True, 2, False)
    para_it2 = parallel_it.from_range(4, 2, True).for_each(lambda x: [x])
    ds2 = ml_data.from_parallel_iter(para_it2, True, 2, True)

    with pytest.raises(TypeError) as ex:
        ds1.union(ds2)
    assert "two MLDataset which have different repeated type" in str(ex.value)

    # union two MLDataset with same batch size
    para_it2 = parallel_it.from_range(4, 2, False).for_each(lambda x: [x])
    ds2 = ml_data.from_parallel_iter(para_it2, True, 2, False)
    ds = ds1.union(ds2)
    assert ds.batch_size == 2

    # union two MLDataset with different batch size
    para_it2 = parallel_it.from_range(4, 2, False).for_each(lambda x: [x])
    ds2 = ml_data.from_parallel_iter(para_it2, True, 1, False)
    ds = ds1.union(ds2)
    # batch_size 0 means batch_size unknown
    assert ds.batch_size == 0
示例#14
0
def test_metrics(ray_start_regular_shared):
    workers = make_workers(1)
    workers.foreach_worker(lambda w: w.sample())
    a = from_range(10, repeat=True).gather_sync()
    b = StandardMetricsReporting(
        a, workers, {
            "min_iter_time_s": 2.5,
            "metrics_smoothing_episodes": 10,
            "collect_metrics_timeout": 10,
        })

    start = time.time()
    res1 = next(b)
    assert res1["episode_reward_mean"] > 0, res1
    res2 = next(b)
    assert res2["episode_reward_mean"] > 0, res2
    assert time.time() - start > 2.4
    workers.stop()
示例#15
0
    def test_metrics(self):
        workers = make_workers(1)
        workers.foreach_worker(lambda w: w.sample())
        a = from_range(10, repeat=True).gather_sync()
        b = StandardMetricsReporting(
            a,
            workers,
            {
                "min_time_s_per_reporting": 2.5,
                "timesteps_per_iteration": 0,
                "metrics_num_episodes_for_smoothing": 10,
                "metrics_episode_collection_timeout_s": 10,
                "keep_per_episode_custom_metrics": False,
            },
        )

        start = time.time()
        res1 = next(b)
        assert res1["episode_reward_mean"] > 0, res1
        res2 = next(b)
        assert res2["episode_reward_mean"] > 0, res2
        assert time.time() - start > 2.4
        workers.stop()
示例#16
0
def test_from_range(ray_start_regular_shared):
    it = from_range(4)
    assert repr(it) == "ParallelIterator[from_range[4, shards=2]]"
    assert list(it.gather_sync()) == [0, 2, 1, 3]
示例#17
0
def test_batch(ray_start_regular_shared):
    it = from_range(4, 1).batch(2)
    assert repr(it) == "ParallelIterator[from_range[4, shards=1].batch(2)]"
    assert list(it.gather_sync()) == [[0, 1], [2, 3]]
示例#18
0
def test_gather_async_optimized(ray_start_regular_shared):
    it = from_range(100)
    it = it.gather_async(batch_ms=100, num_async=4)
    assert sorted(it) == list(range(100))
示例#19
0
def test_gather_async_queue(ray_start_regular_shared):
    it = from_range(100)
    it = it.gather_async(num_async=4)
    assert sorted(it) == list(range(100))
示例#20
0
def test_chain(ray_start_regular_shared):
    it = from_range(4).for_each(lambda x: x * 2).for_each(lambda x: x * 2)
    assert repr(
        it
    ) == "ParallelIterator[from_range[4, shards=2].for_each().for_each()]"
    assert list(it.gather_sync()) == [0, 8, 4, 12]
示例#21
0
def test_filter(ray_start_regular_shared):
    it = from_range(4).filter(lambda x: x < 3)
    assert repr(it) == "ParallelIterator[from_range[4, shards=2].filter()]"
    assert list(it.gather_sync()) == [0, 2, 1]
示例#22
0
def test_union_local(ray_start_regular_shared):
    it1 = from_items(["a", "b", "c"], 1).gather_async()
    it2 = from_range(5, 2).for_each(str).gather_async()
    it = it1.union(it2)
    assert sorted(it) == ["0", "1", "2", "3", "4", "a", "b", "c"]
示例#23
0
def test_combine(ray_start_regular_shared):
    it = from_range(4, 1).combine(lambda x: [x, x])
    assert repr(it) == "ParallelIterator[from_range[4, shards=1].combine()]"
    assert list(it.gather_sync()) == [0, 0, 1, 1, 2, 2, 3, 3]