def main(): arguments = docopt.docopt(__doc__, version='ALE Demo Version 1.0') pygame.init() ale = ALEInterface() ale.setInt(b'random_seed', 123) ale.setBool(b'display_screen', True) ale.loadROM(str.encode(arguments['<rom_file>'])) legal_actions = ale.getLegalActionSet() rewards, num_episodes = [], int(arguments['--iters'] or 5) for episode in range(num_episodes): total_reward = 0 while not ale.game_over(): total_reward += ale.act(random.choice(legal_actions)) print('Episode %d reward %d.' % (episode, total_reward)) rewards.append(total_reward) ale.reset_game() average = sum(rewards) / len(rewards) print('Average for %d episodes: %d' % (num_episodes, average))
# Load the ROM file ale.loadROM(sys.argv[1]) # Get the list of available modes and difficulties avail_modes = ale.getAvailableModes() avail_diff = ale.getAvailableDifficulties() print(f"Number of available modes: {len(avail_modes)}") print(f"Number of available difficulties: {len(avail_diff)}") # Get the list of legal actions legal_actions = ale.getLegalActionSet() # Play one episode in each mode and in each difficulty for mode in avail_modes: for diff in avail_diff: ale.setDifficulty(diff) ale.setMode(mode) ale.reset_game() print(f"Mode {mode} difficulty {diff}:") total_reward = 0 while not ale.game_over(): a = legal_actions[randrange(len(legal_actions))] # Apply an action and get the resulting reward reward = ale.act(a) total_reward += reward print(f"Episode ended with score: {total_reward}")
class AtariEmulator(BaseEnvironment): def __init__(self, actor_id, args): self.ale = ALEInterface() self.ale.setInt(b"random_seed", args.random_seed * (actor_id + 1)) # For fuller control on explicit action repeat (>= ALE 0.5.0) self.ale.setFloat(b"repeat_action_probability", 0.0) # Disable frame_skip and color_averaging # See: http://is.gd/tYzVpj self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b"color_averaging", False) full_rom_path = args.rom_path + "/" + args.game + ".bin" self.ale.loadROM(str.encode(full_rom_path)) self.legal_actions = self.ale.getMinimalActionSet() self.screen_width, self.screen_height = self.ale.getScreenDims() self.lives = self.ale.lives() self.random_start = args.random_start self.single_life_episodes = args.single_life_episodes self.call_on_new_frame = args.visualize # Processed historcal frames that will be fed in to the network # (i.e., four 84x84 images) self.observation_pool = ObservationPool( np.zeros((IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES), dtype=np.uint8)) self.rgb_screen = np.zeros((self.screen_height, self.screen_width, 3), dtype=np.uint8) self.gray_screen = np.zeros((self.screen_height, self.screen_width, 1), dtype=np.uint8) self.frame_pool = FramePool( np.empty((FRAMES_IN_POOL, self.screen_height, self.screen_width), dtype=np.uint8), self.__process_frame_pool) def get_legal_actions(self): return self.legal_actions def __get_screen_image(self): """ Get the current frame luminance :return: the current frame """ self.ale.getScreenGrayscale(self.gray_screen) if self.call_on_new_frame: self.ale.getScreenRGB(self.rgb_screen) self.on_new_frame(self.rgb_screen) return np.squeeze(self.gray_screen) def get_rgb_screen(self): self.ale.getScreenRGB(self.rgb_screen) return self.rgb_screen def on_new_frame(self, frame): pass def __new_game(self): """ Restart game """ self.ale.reset_game() self.lives = self.ale.lives() if self.random_start: wait = random.randint(0, MAX_START_WAIT) for _ in range(wait): self.ale.act(self.legal_actions[0]) def __process_frame_pool(self, frame_pool): """ Preprocess frame pool """ img = np.amax(frame_pool, axis=0) img = imresize(img, (IMG_SIZE_X, IMG_SIZE_Y), interp='nearest') img = img.astype(np.uint8) return img def __action_repeat(self, a, times=ACTION_REPEAT): """ Repeat action and grab screen into frame pool """ reward = 0 for i in range(times - FRAMES_IN_POOL): reward += self.ale.act(self.legal_actions[a]) # Only need to add the last FRAMES_IN_POOL frames to the frame pool for i in range(FRAMES_IN_POOL): reward += self.ale.act(self.legal_actions[a]) self.frame_pool.new_frame(self.__get_screen_image()) return reward def get_initial_state(self): """ Get the initial state """ self.__new_game() for step in range(NR_IMAGES): _ = self.__action_repeat(0) self.observation_pool.new_observation( self.frame_pool.get_processed_frame()) if self.__is_terminal(): raise Exception('This should never happen.') return self.observation_pool.get_pooled_observations() def next(self, action): """ Get the next state, reward, and game over signal """ reward = self.__action_repeat(np.argmax(action)) self.observation_pool.new_observation( self.frame_pool.get_processed_frame()) terminal = self.__is_terminal() self.lives = self.ale.lives() observation = self.observation_pool.get_pooled_observations() return observation, reward, terminal, self.lives def save_frame(self, frame): pass def __is_terminal(self): if self.single_life_episodes: return self.__is_over() or (self.lives > self.ale.lives()) else: return self.__is_over() def __is_over(self): return self.ale.game_over() def get_noop(self): return [1.0, 0.0]
class ArcadeLearningEnvironment(Environment): """ [Arcade Learning Environment](https://github.com/mgbellemare/Arcade-Learning-Environment) adapter (specification key: `ale`, `arcade_learning_environment`). May require: ```bash sudo apt-get install libsdl1.2-dev libsdl-gfx1.2-dev libsdl-image1.2-dev cmake ``` Args: level (string): ALE rom file (<span style="color:#C00000"><b>required</b></span>). loss_of_life_termination: Signals a terminal state on loss of life (<span style="color:#00C000"><b>default</b></span>: false). loss_of_life_reward (float): Reward/Penalty on loss of life (negative values are a penalty) (<span style="color:#00C000"><b>default</b></span>: 0.0). repeat_action_probability (float): Repeats last action with given probability (<span style="color:#00C000"><b>default</b></span>: 0.0). visualize (bool): Whether to visualize interaction (<span style="color:#00C000"><b>default</b></span>: false). frame_skip (int > 0): Number of times to repeat an action without observing (<span style="color:#00C000"><b>default</b></span>: 1). seed (int): Random seed (<span style="color:#00C000"><b>default</b></span>: none). """ def __init__(self, level, life_loss_terminal=False, life_loss_punishment=0.0, repeat_action_probability=0.0, visualize=False, frame_skip=1, seed=None): super().__init__() from ale_py import ALEInterface self.environment = ALEInterface() self.rom_file = level self.life_loss_terminal = life_loss_terminal self.life_loss_punishment = life_loss_punishment self.environment.setFloat(b'repeat_action_probability', repeat_action_probability) self.environment.setBool(b'display_screen', visualize) self.environment.setInt(b'frame_skip', frame_skip) if seed is not None: self.environment.setInt(b'random_seed', seed) # All set commands must be done before loading the ROM. self.environment.loadROM(self.rom_file.encode()) self.available_actions = tuple(self.environment.getLegalActionSet()) # Full list of actions: # No-Op, Fire, Up, Right, Left, Down, Up Right, Up Left, Down Right, Down Left, Up Fire, # Right Fire, Left Fire, Down Fire, Up Right Fire, Up Left Fire, Down Right Fire, Down Left # Fire def __str__(self): return super().__str__() + '({})'.format(self.rom_file) def states(self): width, height = self.environment.getScreenDims() return dict(type='float', shape=(width, height, 3), min_value=0.0, max_value=1.0) def actions(self): return dict(type='int', num_values=len(self.available_actions)) def close(self): self.environment.__del__() self.environment = None def get_states(self): # screen = np.copy(self.environment.getScreenRGB(self.screen)) screen = self.environment.getScreenRGB() screen = screen.astype(dtype=np.float32) / 255.0 return screen def reset(self): self.environment.reset_game() width, height = self.environment.getScreenDims() # self.screen = np.empty((width, height, 3), dtype=np.uint8) self.lives = self.environment.lives() return self.get_states() def execute(self, actions): reward = self.environment.act(self.available_actions[actions]) terminal = self.environment.game_over() states = self.get_states() next_lives = self.environment.lives() if next_lives < self.lives: if self.life_loss_terminal: terminal = True elif self.life_loss_punishment > 0.0: reward -= self.life_loss_punishment self.lives = next_lives return states, terminal, reward