Пример #1
0
 def render(self):
     """Renders the Q-function and policy learned"""
     if self.Q is None:
         self.Q = self.V + np.swapaxes(self.C[0, :, :], 0, 1).reshape(
             (self.actions.n_prim + self.actions.n_opt,
              self.GridWorld.n_states))
     gui.render_q(self.GridWorld, self.Q)  # Need a way to include options
     gui.render_policy(self.GridWorld,
                       self.policy)  # Need a way to include options
Пример #2
0
################################################################################
    print(env.state2coord)
    print(env.coord2state)
    print(env.state_actions)
    for i, el in enumerate(env.state_actions):
            print("s{}: {}".format(i, env.action_names[el]))

################################################################################
# Policy definition
# If you want to represent deterministic action you can just use the number of
# the action. Recall that in the terminal states only action 0 (right) is
# defined.
# In this case, you can use gui.renderpol to visualize the policy
################################################################################
    pol = [1, 2, 0, 0, 1, 1, 0, 0, 0, 0, 3]
    gui.render_policy(env, pol)

################################################################################
# Try to simulate a trajectory
# you can use env.step(s,a, render=True) to visualize the transition
################################################################################
    env.render = True
    state = 0
    fps = 1
    for i in range(5):
            action = np.random.choice(env.state_actions[state])
            nexts, reward, term = env.step(state,action)
            state = nexts
            time.sleep(1./fps)

################################################################################