def reset(self): """Start a new episode.""" # Build a new game and retrieve its first set of state/reward/discount. self._current_game = self._game_factory() self._state = environment.StepType.FIRST # Collect environment returns from starting the game and update state. observations, reward, discount = self._current_game.its_showtime() self._update_for_game_step(observations, reward, discount) return environment.TimeStep(step_type=self._state, reward=None, discount=None, observation=self.last_observations)
def step(self, action): """Apply action, step the world forward, and return observations.""" if self._action_size == 1: # Handle a float or single-element arrays of any dimensionality. Strictly # speaking, a single-element list will also work, but it's best not to # confuse matters in the docstring with this option. all_actions = [np.asarray(action).item()] else: all_actions = [np.asarray(a).item() for a in action] if len(all_actions) != self._action_size: raise RuntimeError("A pycolab Environment adapter's step method " 'was called with actions that were not compatible ' 'with what the pycolab game expects.') # Clear episode internals and start a new episode, if episode ended or if # the game was not already underway. if self._state == environment.StepType.LAST: self._drop_last_episode() if self._current_game is None: return self.reset() # Execute the action in pycolab. action = all_actions[0] if self._action_size == 1 else all_actions observations, reward, discount = self._current_game.play(action) self._update_for_game_step(observations, reward, discount) # Check the current status of the game. if self._game_over: self._state = environment.StepType.LAST else: self._state = environment.StepType.MID return environment.TimeStep( step_type=self._state, reward=self._last_reward, discount=self._last_discount, observation=self.last_observations)