Пример #1
0
    def act(self, action_idx):
        """
        takes an action and executes it 

        returns (next_state_observation, reward, done, extra_info)
        """

        # convert the action index into the actual action
        action_idx = int(action_idx)
        
        action = self.actions[action_idx]

        self.game.act(action)

        self.frame += 1
    
        r =  self.game.reward()

        config = self.game.observe()
        obs, info = config['observation']
        featurizers.grid_one_hot(self.game, obs)
        obs = np.array(obs)

       
        done = self.game.is_over() or self.frame >= self.max_frame

        total_reward = self.game.reward_so_far()

        return (obs, r, done, total_reward)
Пример #2
0
    def reset(self):
        """
        reset the game and gives the first observation
        """
        self.game.reset()

        config = self.game.observe()
        
        obs, info = config['observation']
        featurizers.grid_one_hot(self.game, obs)
        obs = np.array(obs)
        
        return obs
Пример #3
0
 def _observe(self):
     # returns s, r, is_done, info
     game_data = self.game.observe()
     state, info = game_data['observation']
     state = featurizers.grid_one_hot(self.game, state, np=np)
     state = state.transpose((2,0,1)) #images go in the [C,H,W] shape for pytorch
     return state, game_data['reward'], self.game.is_over(), info
 def observe(self):
     game_observation = self.game.observe()
     # Logic borrowed from:
     # https://github.com/facebook/MazeBase/blob/23454fe092ecf35a8aab4da4972f231c6458209b/py/example.py#L192
     obs, info = game_observation[OBSERVATION]
     featurizers.grid_one_hot(self.game, obs)
     obs = np.array(obs)
     featurizers.vocabify(self.game, info)
     info = np.array(obs)
     game_observation[OBSERVATION] = np.concatenate((obs, info),
                                                    2).flatten()
     is_episode_over = self.game.is_over()
     return Observation(id=game_observation[ID],
                        reward=game_observation[REWARD],
                        state=game_observation[OBSERVATION],
                        is_episode_over=is_episode_over)
Пример #5
0
        return actions[ind]


frame = 0
game.display()
sleep(.1)
system('clear')
while True:
    print("r: {}\ttr: {} \tguess: {}".format(game.reward(),
                                             game.reward_so_far(),
                                             game.approx_best_reward()))
    config = game.observe()
    pp.pprint(config['observation'][1])
    # Uncomment this to featurize into one-hot vectors
    obs, info = config['observation']
    featurizers.grid_one_hot(game, obs)
    obs = np.array(obs)
    featurizers.vocabify(game, info)
    info = np.array(obs)
    config['observation'] = obs, info
    game.display()

    id = game.current_agent()
    actions = game.all_possible_actions()
    action = action_func(actions)
    game.act(action)

    sleep(.1)
    system('clear')
    print("\n")
    frame += 1