class Environment: def __init__(self, display=Config.SHOW_MODE): self.game = GameManager(display=display) self.previous_state = None self.current_state = None self.available = None self.total_reward = 0 self.envs_mean = None self.envs_std = None self.num_steps = 0 self.state_mean = 0 self.state_std = 0 self.alpha = 0.9999 self.num_steps = 0 #self.reset() def get_num_actions(self): return self.game.get_num_actions() def get_num_states(self): return self.game.get_num_state() def _observation(self, observation): self.num_steps += 1 self.state_mean = self.state_mean * self.alpha + \ observation.mean() * (1 - self.alpha) self.state_std = self.state_std * self.alpha + \ observation.std() * (1 - self.alpha) unbiased_mean = self.state_mean / (1 - pow(self.alpha, self.num_steps)) unbiased_std = self.state_std / (1 - pow(self.alpha, self.num_steps)) return (observation - unbiased_mean) / (unbiased_std + 1e-8) def reset(self): self.total_reward = 0 observation, available = self.game.reset() self.previous_state = self.current_state = None self.current_state = process_frame(observation) return self.current_state, available def step(self, action): observation, reward, done, available, envs_mean, envs_std = self.game.step( action) self.available = available self.total_reward += reward self.envs_mean = envs_mean self.envs_std = envs_std self.previous_state = self.current_state self.current_state = process_frame(observation) # self.current_state = self._observation(process_frame(observation)) return self.current_state, reward, done, available, envs_mean, envs_std
class Environment: def __init__(self): self.game = GameManager(Config.ATARI_GAME, display=Config.PLAY_MODE) self.nb_frames = Config.STACKED_FRAMES self.frame_q = Queue(maxsize=self.nb_frames) self.previous_states = [] self.current_state = None self.total_reward = 0 self.reset() @staticmethod def _rgb2gray(rgb): return np.dot(rgb[..., :3], [0.299, 0.587, 0.114]) @staticmethod def _preprocess(image): image = Environment._rgb2gray(image) image = misc.imresize(image, [Config.IMAGE_HEIGHT, Config.IMAGE_WIDTH], 'bilinear') image = image.astype(np.float32) / 128.0 - 1.0 return image def _get_current_state(self): if not self.frame_q.full(): return None # frame queue is not full yet. x_ = np.array(self.frame_q.queue) x_ = np.transpose(x_, [1, 2, 0]) # move channels return x_ def _update_frame_q(self, frame): if self.frame_q.full(): self.frame_q.get() image = Environment._preprocess(frame) self.frame_q.put(image) def get_num_actions(self): return self.game.get_num_actions() def num_basic_actions(self): return self.game.num_basic_actions() def reset(self): self.total_reward = 0 self.frame_q.queue.clear() self._update_frame_q(self.game.reset()) self.previous_states = [] self.current_state = None def step(self, actions): observations, rewards, done, _ = self.game.step(actions) self.total_reward += sum(rewards) self.previous_states = [self.current_state] #self._update_frame_q(observations[-1]) #temp = [Environment._preprocess(frame) for frame in observations[:-1]] for i in range(len(observations) - 1): self._update_frame_q(observations[i]) self.previous_states.append(self._get_current_state()) self._update_frame_q(observations[-1]) self.current_state = self._get_current_state() return rewards, done
def get_num_actions(self): return GameManager.get_num_actions()