def test_game_over(action):
    patient = DrivingGridworld.legacy_constructor(4, 0, 0, 1, discount=0.123)
    o, r, d = patient.its_showtime()
    assert d == 0.123
    assert not patient.game_over
    o, r, d = patient.play(action)
    assert patient.game_over == (action == QUIT)
    assert d == 0.123
def test_crashing_into_right_wall(method):
    discount_mdp = 0.9
    patient = DrivingGridworld.legacy_constructor(headlight_range=1,
                                                  num_bumps=0,
                                                  num_pedestrians=0,
                                                  discount=discount_mdp,
                                                  car_col=3)
    observation, reward, discount = method(patient, RIGHT)
    assert patient.road.has_crashed()
    assert patient.game_over
    assert discount == 0.0
    assert reward == 0.0
    assert patient.road.to_key() == (-1, 0, frozenset())
    assert patient.road.to_s() == ' ' * 7 + '\n' + ' ' * 7
def test_obstacles_always_appear_with_the_same_probability():
    headlight_range = 4
    patient = DrivingGridworld.legacy_constructor(
        headlight_range,
        num_bumps=0,
        num_pedestrians=1,
        speed=1,
        discount=0.99,
        pedestrian_appearance_prob=0.01)

    all_x = [
        (frozenset(), 0.99),
        (frozenset([('p', 0, 1, 0, 0)]), 0.0025),
        (frozenset([('p', 0, 3, 0, 0)]), 0.0025),
        (frozenset([('p', 0, 0, 0, 0)]), 0.0025),
        (frozenset([('p', 0, 2, 0, 0)]), 0.0025)
    ]  # yapf:disable
    successors = tuple(patient.road.successors(NO_OP))
    assert len(successors) == len(all_x)
    for i, (s, prob) in enumerate(successors):
        x_obstacles, x_prob = all_x[i]
        assert s.to_key()[-1] == x_obstacles
        assert prob == x_prob

    patient.road = successors[1][0]
    for _ in range(headlight_range):
        patient.play(NO_OP)

    assert patient.road.to_s() == '|d  d| \n|d  d| \n|d  d| \n|d  d| \n|dpCd|^'
    patient.play(NO_OP)
    assert patient.road.to_s() == '|d  d| \n|d  d| \n|d  d| \n|d  d| \n|d Cd|^'

    successors = tuple(patient.road.successors(NO_OP))
    for i, (s, prob) in enumerate(successors):
        x_obstacles, x_prob = all_x[i]
        assert s.to_key()[-1] == x_obstacles
        assert prob == x_prob
Exemplo n.º 4
0
"""## Global Settings"""

set_good_defaults()
np.set_printoptions(formatter={'float': '{:0.4f}'.format})

os.system('ls')
"""# Experiments

## Static Definitions
"""

headlight_range = 3
speed_limit = new_road(headlight_range=headlight_range).speed_limit()

game = DrivingGridworld(
    lambda: new_road(headlight_range=headlight_range),
    discount=1.0,
    reward_function=lambda *args, **kwargs: 0)

num_samples_per_cfr_iter = 10
n = 100
num_reward_functions = n * num_samples_per_cfr_iter
num_train_and_test_reward_functions = 2 * num_reward_functions

reward_function_dist_timer = Timer('reward distribution generation')
with reward_function_dist_timer:
    z = speed_limit * speed_limit + speed_limit + 1
    wc_ncer = tf.fill([num_train_and_test_reward_functions], -1.0 / z)

    random_reward_function = SituationalReward(
        stopping_reward=tf.zeros([num_train_and_test_reward_functions]),
        wc_non_critical_error_reward=wc_ncer,
Exemplo n.º 5
0
    def tabulate(cls,
                 headlight_range=2,
                 num_samples_per_cfr_iter=10,
                 n=100,
                 loc=0,
                 precisions=[None],
                 discount=0.99,
                 progress_bonus=1.0,
                 ditch_bonus_multiplier=10.0,
                 normalize_rewards=False,
                 critical_error_reward=-1000.0,
                 print_every=100):
        speed_limit = new_road(headlight_range=headlight_range).speed_limit()
        game = DrivingGridworld(
            lambda: new_road(headlight_range=headlight_range))
        num_reward_functions = n * num_samples_per_cfr_iter
        wc_ncer = fixed_ditch_bonus(
            progress_bonus, multiplier=ditch_bonus_multiplier)

        if critical_error_reward is None:
            critical_error_reward = critical_reward_for_fixed_ditch_bonus(
                progress_bonus, speed_limit, discount)

        tf.compat.v1.logging.info('progress_bonus: {}, wc_ncer: {}, cer: {}'.format(
            progress_bonus, wc_ncer, critical_error_reward))

        reward_datasets = []
        for precision in precisions:
            random_reward_function = DebrisPerceptionReward(
                stopping_reward=tf.zeros([num_reward_functions]),
                wc_non_critical_error_reward=tf.fill([num_reward_functions],
                                                     wc_ncer),
                bc_unobstructed_progress_reward=tf.fill([num_reward_functions],
                                                        progress_bonus),
                num_samples=num_reward_functions,
                critical_error_reward=tf.fill([num_reward_functions],
                                              critical_error_reward),
                use_slow_collision_as_offroad_base=False,
                loc=loc,
                precision=precision)

            transitions, rfd_list, state_indices = game.road.tabulate(
                random_reward_function, print_every=print_every)

            r = tf.reshape(
                tf.transpose(tf.stack(rfd_list), [2, 0, 1]), [
                    num_samples_per_cfr_iter, n,
                    len(state_indices),
                    len(rfd_list[0])
                ])
            if normalize_rewards:
                r = r / tf.reduce_max(tf.abs(r), axis=(2, 3), keepdims=True)
            reward_datasets.append(r)

        transitions = tf.stack(transitions)
        root_probs = tf.one_hot(
            state_indices[game.road.copy().to_key()], depth=len(state_indices))
        return [
            cls(game, root_probs, transitions, r, discount, state_indices)
            for r in reward_datasets
        ]
def test_initial_observation():
    patient = DrivingGridworld.legacy_constructor(3, 0, 0, 1, discount=0.8)
    assert patient.road.to_s() == '|d  d| \n|d  d| \n|d  d| \n|d Cd|^'

    o, r, d = patient.its_showtime()
    assert r == 0
    assert d == 0.8

    np.testing.assert_array_equal(
        o.board,
        np.array(
            [[124, 100,  32,  32, 100, 124, 32],
             [124, 100,  32,  32, 100, 124, 32],
             [124, 100,  32,  32, 100, 124, 32],
             [124, 100,  32,  67, 100, 124, 94]]).astype('uint8')
    )  # yapf:disable

    np.testing.assert_array_equal(
        o.layers['C'],
        np.array(
            [[False, False, False, False, False, False, False],
             [False, False, False, False, False, False, False],
             [False, False, False, False, False, False, False],
             [False, False, False,  True, False, False, False]])
    )  # yapf:disable

    np.testing.assert_array_equal(
        o.layers['|'],
        np.array(
            [[ True, False, False, False, False,  True, False],
             [ True, False, False, False, False,  True, False],
             [ True, False, False, False, False,  True, False],
             [ True, False, False, False, False,  True, False]])
    )  # yapf:disable

    np.testing.assert_array_equal(
        o.layers['d'],
        np.array(
            [[False,  True, False, False,  True, False, False],
             [False,  True, False, False,  True, False, False],
             [False,  True, False, False,  True, False, False],
             [False,  True, False, False,  True, False, False]])
    )  # yapf:disable

    np.testing.assert_array_equal(
        o.layers[' '],
        np.array(
            [[False, False,  True,  True, False, False, True],
             [False, False,  True,  True, False, False, True],
             [False, False,  True,  True, False, False, True],
             [False, False,  True, False, False, False, False]])
    )  # yapf:disable

    np.testing.assert_array_equal(
        o.layers['^'],
        np.array(
            [[False, False,  False,  False, False, False, False],
             [False, False,  False,  False, False, False, False],
             [False, False,  False,  False, False, False, False],
             [False, False,  False, False, False, False, True]])
    )  # yapf:disable