示例#1
0
    def test_move(self):
        grid = ' #P\nG #'
        gw = GridWorld(grid, move_value=-1, die_value=-20, win_value=10)

        step_tests = [
            # move into wall
            ((0,0), (1,0), (0,0), -1, False),
            # move to free field
            ((0,0), (1,1), (1,1), -1, False),
            # move to goal
            ((0,0), (0,1), (0,1), 10, True),
            # die penalty
            ((0,0), (2,0), (2,0), -20, True),
            # out of bounds #1
            ((0,0), (-1,0), (0,0), -1, False),
            # out of bounds #1
            ((0,0), (10,0), (0,0), -1, False),
        ]

        for start, to, end, reward, is_terminal in step_tests:
            e, r, t = gw.move(start, to)
            self.assertEqual(e, end)
            self.assertEqual(r, reward)
            self.assertEqual(t, is_terminal)
示例#2
0
文件: qlearn.py 项目: rahular/rl

if __name__ == '__main__':
    max_steps = 100
    max_iters = 1000
    seed = random.randint(0, 100)
    agent = qAgent()
    grid = GridWorld(size=8, force_fast=True, seed=seed)
    grid.show()
    print()
    for iter in range(max_iters):
        agent.set_grid(grid)
        i, j = 0, 0  # initial state
        cum_reward = 0
        for step in range(max_steps):
            action = agent.get_action(i, j)
            new_i, new_j = grid.move(i, j, action)
            reward, is_final = grid.get_reward(i, j)
            cum_reward += reward
            agent.update_q(i, j, new_i, new_j, action, reward)
            if is_final:
                break
            i = new_i
            j = new_j
        if iter % 100 == 0:
            print(
                'Episode {} finished after {} steps with cumulative reward of {}'
                .format(iter, step, cum_reward))
        grid = GridWorld(size=8, force_fast=True, seed=seed)
    print()
    show_qtable(agent, grid.size)
示例#3
0
                print "currentstate:", currentstate_index, currentstate

                # Posicion actual (considerando to do el vector de estado)
                # Realmente la posicion son la columna 1 y 2
                print "currentposition:", currentposition

                # Se asegura de que el movimiento sea valido
                validMove = False
                while not validMove:

                    action_index = agent.getAction('qlearning')
                    action = agent.getActions()[action_index]
                    print "newaction:", action_index, action

                    # Recupera la nueva posicion a traves de ejecutar una accion
                    newposition = gridworld.move(
                        agent.getCurrentPosition()[-2:], action)
                    print "newposition:", newposition

                    if newposition:
                        validMove = True
                        print "+ valid move"

                # Incluye el presupuesto en la posicion
                newposition = np.append([agent.getBudgetState()], newposition)
                print "newposition with budget:", newposition

                # Calcula la recompensa que devuelve el ambiente
                current_reward = reward.reward(currentposition, action,
                                               newposition)
                print "reward:", current_reward