示例#1
0
    def __init__(
        self,
        name: str,
        directory: str,
        clone_seeds: bool = True,
        fixed_steps: int = 1,
        force: bool = False,
        resume=False,
        write_upon_reset=False,
        uid=None,
        mode=None,
    ):
        self.name = name
        self.state = AtariState()
        self.directory = directory
        self.force = force
        self.resume = resume
        self.write_upon_reset = write_upon_reset
        self.uid = uid
        self.mode = mode

        self.fixed_steps = fixed_steps
        self.clone_seeds = clone_seeds
        self._cum_reward = None
        self._env = self._init_monitor()
示例#2
0
    def __init__(
        self,
        state: AtariState = None,
        name: str = "MsPacman-v0",
        clone_seeds: bool = True,
        env: AtariEnv = None,
        fixed_steps: int = 1,
    ):
        """
        Environment class used for managing Atari games. It can be used as a perfect simulation, or
        as an imperfect one. It can handle rgb images, or ram as observations.
        :param name: Name of the atari environment to be created.
                     See: https://gym.openai.com/envs#atari works also with "GameName-ram-v0" like
                     environments.
        :param clone_seeds:  bool;
                 If true, clone the pseudo random number generators of the emulator for a
                 perfect simulation. False provides an stochastic simulation.
        :param env: Openai AtariEnv, optional; Use an already existing env instead of creating one.
        :param fixed_steps: The number of consecutive times that the action will be applied. This
                            allows us to set the frequency at which the policy will play.
        """
        self._clone_seeds = clone_seeds
        self._cum_reward = 0
        if env is None and name:
            spec = gym_registry.spec(name)
            # not actually needed, but we feel safer
            spec.max_episode_steps = None
            spec.max_episode_time = None

            self._env = spec.make()
            self._name = name
        elif env is not None:
            self._env = env
            self._name = env.spec.id
        else:
            raise ValueError("An env or an env name must be specified")
        self._state = AtariState() if state is None else state
        if state is None:
            self._state = self.reset()

        super(AtariEnvironment, self).__init__(name=name,
                                               state=self.state,
                                               fixed_steps=fixed_steps)
示例#3
0
class AtariMonitor:
    def __init__(
        self,
        name: str,
        directory: str,
        clone_seeds: bool = True,
        fixed_steps: int = 1,
        force: bool = False,
        resume=False,
        write_upon_reset=False,
        uid=None,
        mode=None,
    ):
        self.name = name
        self.state = AtariState()
        self.directory = directory
        self.force = force
        self.resume = resume
        self.write_upon_reset = write_upon_reset
        self.uid = uid
        self.mode = mode

        self.fixed_steps = fixed_steps
        self.clone_seeds = clone_seeds
        self._cum_reward = None
        self._env = self._init_monitor()

    @property
    def env(self) -> Monitor:
        return self._env

    def _init_monitor(self) -> Monitor:
        env = gym.make(self.name)
        monitor = Monitor(
            env,
            directory=self.directory,
            force=self.force,
            resume=self.resume,
            write_upon_reset=self.write_upon_reset,
            uid=self.uid,
            mode=self.mode,
        )
        return monitor

    def _get_microstate(self) -> Microstate:
        if self.clone_seeds:
            microstate = self.env.unwrapped.ale.cloneSystemState()

        else:
            microstate = self.env.unwrapped.ale.cloneState()

        return Microstate(self.env, microstate)

    def step(self, action: np.ndarray, fixed_steps: int = None) -> AtariState:
        fixed_steps = self.fixed_steps if fixed_steps is None else fixed_steps
        end = False
        _dead = False
        for i in range(fixed_steps):
            observed, reward, _end, lives = self.env.step(action.argmax())
            end = end or _end
            _dead = _dead or reward < 0
            self._cum_reward += reward
            self.env.render()
            if end:
                break

        microstate = self._get_microstate()
        self.state.update_state(
            observed=observed, reward=self._cum_reward, end=end, lives=lives, microstate=microstate
        )
        return self.state.create_clone()

    def reset(self) -> AtariState:
        observed = self.env.reset()
        microstate = self._get_microstate()
        self._cum_reward = 0
        self.state.update_state(
            observed=observed,
            reward=self._cum_reward,
            end=False,
            lives=-1000,
            microstate=microstate,
        )
        return self.state.create_clone()

    def skip_frames(self, n_frames: int = 0) -> AtariState:
        action = np.zeros(self.env.action_space.n)
        ix = self.env.action_space.sample()
        action[ix] = 1
        for i in range(n_frames):
            self.step(action=action)
            if self.state.terminal:
                break
        return self.state.create_clone()