Exemplo n.º 1
0
def test_play_the_game_less_badly():
    bad_seeds_01_env = BadSeeds01(seed_count=5,
                                  bad_seed_count=3,
                                  max_episode_length=2)

    a_bad_seed_ndx = bad_seeds_01_env.bad_seed_indices[0]

    next_state, terminal, reward = bad_seeds_01_env.execute(
        actions=a_bad_seed_ndx)
    assert next_state[0, a_bad_seed_ndx] != 0.0
    assert terminal is False
    assert reward == 0.0

    measurement_counts, measured_seed_counts = count_measurements(
        bad_seeds_01_env.state)
    print(measurement_counts)
    expected_measurement_counts = np.zeros_like(measurement_counts)
    expected_measurement_counts[0, a_bad_seed_ndx] += 1.0
    assert np.all(measurement_counts == expected_measurement_counts)
    assert measured_seed_counts == 1

    next_state, terminal, reward = bad_seeds_01_env.execute(
        actions=a_bad_seed_ndx)
    assert next_state[1, a_bad_seed_ndx] != 0.0
    assert terminal is True
    assert reward == 2.0

    measurement_counts, measured_seed_counts = count_measurements(
        bad_seeds_01_env.state)
    print(measurement_counts)
    expected_measurement_counts[0, a_bad_seed_ndx] += 1.0
    assert np.all(measurement_counts == expected_measurement_counts)
    assert measured_seed_counts == 1
Exemplo n.º 2
0
def test_play_the_game_badly():
    bad_seeds_01_env = BadSeeds01(seed_count=5,
                                  bad_seed_count=3,
                                  max_episode_length=5)

    # measure all seeds but the last seed
    for time_i, seed_i in enumerate(range(len(bad_seeds_01_env.all_seeds) -
                                          1)):
        next_state, terminal, reward = bad_seeds_01_env.execute(actions=seed_i)
        assert next_state[time_i, seed_i] != 0.0
        assert terminal is False
        assert reward == 0.0

        # measurement_counts looks like this
        #   time_i = 0: [1 0 0 0 0 ]
        #   time_i = 1: [1 1 0 0 0 ]
        #   ...
        #   time_i = 3: [1 1 1 1 0 ]
        measurement_counts, measured_seed_counts = count_measurements(
            bad_seeds_01_env.state)
        for seed_j in range(seed_i):
            assert measurement_counts[0, seed_j] == 1
        assert measured_seed_counts == (seed_i + 1)

    # measure the first seed again
    # no reward because the last seed is never measured
    next_state, terminal, reward = bad_seeds_01_env.execute(actions=0)
    assert next_state[len(bad_seeds_01_env.all_seeds) - 1, 0] != 0.0
    assert terminal is True
    assert reward == 0.0

    measurement_counts, measured_seed_counts = count_measurements(
        bad_seeds_01_env.state)
    assert np.all(measurement_counts == np.array([[2, 1, 1, 1, 0]]))
    assert measured_seed_counts == 4
Exemplo n.º 3
0
def test_count_measurements():
    state = np.array([
        [0.0, 0.5, 0.0, 0.0],
        [0.0, 0.0, -0.5, 0.0],
        [0.5, 0.0, 0.0, 0.0],
        [0.0, -0.5, 0.0, 0.0],
        [0.0, 0.0, 0.5, 0.0],
        [0.0, 0.5, 0.0, 0.0],
    ])

    measurement_counts, measured_seed_counts = count_measurements(
        time_steps_by_seeds_state=state)
    assert np.all(measurement_counts == np.array([1, 3, 2, 0]))
    assert measured_seed_counts == 3