Exemplo n.º 1
0
def test_does_not_allow_zero_velocity():
    with patch('rlbox.env.racetrack.rnd') as rnd:
        rnd.rand.return_value = 1
        track = RaceTrack()
        track.pos = list(track.start_loc[0])
        pos = track.pos.copy()
        state, _ = track.step([0, 0])
        assert state == (pos[0] - 1, pos[1] + 1, 1, 1)
Exemplo n.º 2
0
def test_returns_to_start_position_if_out():
    with patch('rlbox.env.racetrack.rnd') as rnd:
        rnd.rand.return_value = 1
        track = RaceTrack()
        track.pos = list(track.start_loc[5])
        state, _ = track.step([1, 1])
        assert state[2:] == (0, 0)
        assert track.loc(track.pos) == track.START
Exemplo n.º 3
0
def test_moves_car_with_given_velocity():
    with patch('rlbox.env.racetrack.rnd') as rnd:
        rnd.rand.return_value = 1
        track = RaceTrack()
        start = track.pos.copy()
        state, reward = track.step([1, 0])
        assert state == (start[0] - 1, start[1], 1, 0)
        assert reward == -1
Exemplo n.º 4
0
def test_does_not_allow_velocity_out_of_bound():
    with patch('rlbox.env.racetrack.rnd') as rnd:
        rnd.rand.return_value = 1
        track = RaceTrack()
        track.pos = list(track.start_loc[0])
        track.vel = [4, 4]
        state, _ = track.step([1, 1])
        assert state[2:] == (4, 4)
Exemplo n.º 5
0
def test_fails_on_accelerate_with_probability():
    with patch('rlbox.env.racetrack.rnd') as rnd:
        rnd.rand.return_value = 0
        track = RaceTrack()
        track.pos = [track.pos[0] - 1, track.pos[1]]
        track.vel = [1, 0]
        pos = track.pos.copy()
        state, reward = track.step([1, 1])
        assert state == (pos[0] - 1, pos[1], 1, 0)
        assert reward == -1
Exemplo n.º 6
0
def test_is_done_when_on_end_location():
    track = RaceTrack()
    assert not track.done()
    with patch('rlbox.env.racetrack.rnd') as rnd:
        rnd.rand.return_value = 0
        track.pos = [2, 14]
        track.vel = [1, 3]
        track.step([0, 1])
        assert track.done()
Exemplo n.º 7
0
    def worker(runs, env_cfg, task, param):
        environment = RaceTrack(**env_cfg)
        agent = core.Agent(
            AGENT_PROGRAM[task[0]](
                environment.act_spec,
                environment.obs_spec,
                **task[1]),
            lambda env: (env.obs, -1),
            lambda action, env: env.step(action))

        core.Run(agent, environment).start()

        key = {**env_cfg, **{'runs': runs}, **{'alg': task[0]}, **task[1]}
        states, actions, rewards = environment.episode()
        run = param['run']
        SharedMem.dump(Testbed.key_for('states', **key), run, states)
        SharedMem.dump(Testbed.key_for('rewards', **key), run, rewards)
        SharedMem.dump(Testbed.key_for('actions', **key), run, actions)
Exemplo n.º 8
0
def simulate():
    fig = plt.figure()
    track = RaceTrack()
    im = plt.imshow(track.print(),
                    origin='lower',
                    interpolation='none',
                    animated=True)
    plt.gca().invert_yaxis()
    agent = RandomAgent(track.act_spec, track.obs_spec)

    def step(_):
        if not track.done():
            track.step(agent(None, None))
            im.set_array(track.print())
        return im,

    ani = animation.FuncAnimation(fig, step, interval=50, blit=True)
    plt.show()
Exemplo n.º 9
0
    def plot(self):
        store = Store(self.summary)

        params = {**self.env, **{'runs': self.runs},
                  **{'alg': self.exe[0][0]}, **self.exe[0][1]}
        st = store[self.key_for('states', **params)]
        act = store[self.key_for('actions', **params)]
        rew = store[self.key_for('rewards', **params)]

        environment = RaceTrack(**self.env)

        if self.key_for('policy', **params) in store:
            pi = store[self.key_for('policy', **params)][:]
        else:
            agent_prog = RandomAgent(environment.act_spec, environment.obs_spec)
            pi = off_policy_monte_carlo(
                environment.act_spec,
                environment.obs_spec,
                agent_prog.policy(),
                stream(st, act, rew))
        store.replace(self.key_for('policy', **params), pi)
        store.close()
        agent = core.Agent(
            RandomAgent(environment.act_spec, environment.obs_spec, pi),
            lambda env: (environment.STATE_IDX[env.obs], -1),
            lambda action, env: env.step(environment.ACT_SPACE[action]))

        def step(_):
            if not environment.done():
                agent.run(environment)
                im.set_array(environment.print())
            return im,

        for _ in range(0, 5):
            fig = plt.figure()
            environment = RaceTrack(**self.env)
            im = plt.imshow(environment.print(),
                            origin='lower', interpolation='none',
                            animated=True)
            plt.gca().invert_yaxis()

            ani = animation.FuncAnimation(fig, step, interval=100, blit=True)
            plt.show()
Exemplo n.º 10
0
def test_prepares_action_space():
    track = RaceTrack()
    assert track.act_spec[0].shape == (2, )
    assert track.act_spec[0].lo == -1
    assert track.act_spec[0].hi == 1
Exemplo n.º 11
0
def test_initializes_with_zero_velocity():
    track = RaceTrack()
    assert track.vel == [0, 0]
Exemplo n.º 12
0
def test_initializes_on_start_location():
    track = RaceTrack()
    assert TRACK[track.pos[0], track.pos[1]] == RaceTrack.START
Exemplo n.º 13
0
def test_prepares_observation_space():
    track = RaceTrack()
    assert len(track.obs_spec) == 4