def __init__( self, name: str, directory: str, clone_seeds: bool = True, fixed_steps: int = 1, force: bool = False, resume=False, write_upon_reset=False, uid=None, mode=None, ): self.name = name self.state = AtariState() self.directory = directory self.force = force self.resume = resume self.write_upon_reset = write_upon_reset self.uid = uid self.mode = mode self.fixed_steps = fixed_steps self.clone_seeds = clone_seeds self._cum_reward = None self._env = self._init_monitor()
def __init__( self, state: AtariState = None, name: str = "MsPacman-v0", clone_seeds: bool = True, env: AtariEnv = None, fixed_steps: int = 1, ): """ Environment class used for managing Atari games. It can be used as a perfect simulation, or as an imperfect one. It can handle rgb images, or ram as observations. :param name: Name of the atari environment to be created. See: https://gym.openai.com/envs#atari works also with "GameName-ram-v0" like environments. :param clone_seeds: bool; If true, clone the pseudo random number generators of the emulator for a perfect simulation. False provides an stochastic simulation. :param env: Openai AtariEnv, optional; Use an already existing env instead of creating one. :param fixed_steps: The number of consecutive times that the action will be applied. This allows us to set the frequency at which the policy will play. """ self._clone_seeds = clone_seeds self._cum_reward = 0 if env is None and name: spec = gym_registry.spec(name) # not actually needed, but we feel safer spec.max_episode_steps = None spec.max_episode_time = None self._env = spec.make() self._name = name elif env is not None: self._env = env self._name = env.spec.id else: raise ValueError("An env or an env name must be specified") self._state = AtariState() if state is None else state if state is None: self._state = self.reset() super(AtariEnvironment, self).__init__(name=name, state=self.state, fixed_steps=fixed_steps)
class AtariMonitor: def __init__( self, name: str, directory: str, clone_seeds: bool = True, fixed_steps: int = 1, force: bool = False, resume=False, write_upon_reset=False, uid=None, mode=None, ): self.name = name self.state = AtariState() self.directory = directory self.force = force self.resume = resume self.write_upon_reset = write_upon_reset self.uid = uid self.mode = mode self.fixed_steps = fixed_steps self.clone_seeds = clone_seeds self._cum_reward = None self._env = self._init_monitor() @property def env(self) -> Monitor: return self._env def _init_monitor(self) -> Monitor: env = gym.make(self.name) monitor = Monitor( env, directory=self.directory, force=self.force, resume=self.resume, write_upon_reset=self.write_upon_reset, uid=self.uid, mode=self.mode, ) return monitor def _get_microstate(self) -> Microstate: if self.clone_seeds: microstate = self.env.unwrapped.ale.cloneSystemState() else: microstate = self.env.unwrapped.ale.cloneState() return Microstate(self.env, microstate) def step(self, action: np.ndarray, fixed_steps: int = None) -> AtariState: fixed_steps = self.fixed_steps if fixed_steps is None else fixed_steps end = False _dead = False for i in range(fixed_steps): observed, reward, _end, lives = self.env.step(action.argmax()) end = end or _end _dead = _dead or reward < 0 self._cum_reward += reward self.env.render() if end: break microstate = self._get_microstate() self.state.update_state( observed=observed, reward=self._cum_reward, end=end, lives=lives, microstate=microstate ) return self.state.create_clone() def reset(self) -> AtariState: observed = self.env.reset() microstate = self._get_microstate() self._cum_reward = 0 self.state.update_state( observed=observed, reward=self._cum_reward, end=False, lives=-1000, microstate=microstate, ) return self.state.create_clone() def skip_frames(self, n_frames: int = 0) -> AtariState: action = np.zeros(self.env.action_space.n) ix = self.env.action_space.sample() action[ix] = 1 for i in range(n_frames): self.step(action=action) if self.state.terminal: break return self.state.create_clone()