def step(self, action): action = action[0] # Force FIRE action to start episodes in games with lives if self._force_fire: obs, _, _, _ = self.env.env.step(1) self._force_fire = False while self._current_no_op > 0: obs, _, _, _ = self.env.env.step(0) self._current_no_op -= 1 obs, reward, absorbing, info = self.env.step(action) self._real_reset = absorbing if info['lives'] != self._lives: if self._episode_ends_at_life: absorbing = True self._lives = info['lives'] self._force_fire = self.env.unwrapped.get_action_meanings( )[1] == 'FIRE' self._state.append(preprocess_frame(obs, self._img_size)) return LazyFrames(list(self._state), self._history_length), reward, absorbing, info
def reset(self, state=None): self._state = preprocess_frame(self.env.reset(), self._img_size) self._state = deque([deepcopy( self._state) for _ in range(self._history_length)], maxlen=self._history_length ) return LazyFrames(list(self._state), self._history_length)
def step(self, action): obs, reward, absorbing, info = self.env.step(action) reward *= 1. # Int to float if reward > 0: reward = 1. # MiniGrid discounts rewards based on timesteps, but we need raw rewards self._state.append(preprocess_frame(obs, self._img_size)) return LazyFrames(list(self._state), self._history_length), reward, absorbing, info
def reset(self, state=None): if self._real_reset: self._state = preprocess_frame(self.env.reset(), self._img_size) self._state = deque( [deepcopy(self._state) for _ in range(self._history_length)], maxlen=self._history_length) self._lives = self._max_lives self._force_fire = self.env.unwrapped.get_action_meanings( )[1] == 'FIRE' self._current_no_op = np.random.randint(self._max_no_op_actions + 1) return LazyFrames(list(self._state), self._history_length)