class AtariEnvironment(Environment): """ Atari Environment Object """ def __init__(self, rom_path, action_repeat=4, death_end=True, width_resize=84, height_resize=84, resize_mod='scale'): super(Environment, self).__init__() self.action_repeat = action_repeat self.death_end = death_end self.width_resize = width_resize self.height_resize = height_resize self.resize_mod = resize_mod self.display = False from ale_python_interface import ALEInterface self.ale = ALEInterface() self.ale.loadROM(rom_path) self.ale.setInt('random_seed', np.random.randint(1000)) self.ale.setBool('display_screen', self.display) self.action_set = self.ale.getMinimalActionSet() self.num_actions = len(self.action_set) self.start_lives = self.ale.lives() width, height = self.ale.getScreenDims() self.currentScreen = np.empty((height, width), dtype=np.uint8) self.reset() def reset(self): self.ale.reset_game() self.ale.getScreenGrayscale(self.currentScreen) self.terminal = False def step(self, action, repeat=None): repeat = self.action_repeat if repeat is None else repeat reward = 0 for _ in range(repeat): reward += self.ale.act(self.action_set[action]) self.ale.getScreenGrayscale(self.currentScreen) self.terminal = self.death_end and self.ale.lives( ) < self.start_lives or self.ale.game_over() return reward def get_frame(self): if self.resize_mod == 'scale': return imresize(self.currentScreen, (self.width_resize, self.height_resize), interp='bilinear') elif self.resize_mod == 'crop': height, width = self.currentScreen.shape res = (height - width) / 2 crop = self.currentScreen[res:(res + width), :] return imresize(crop, (self.width_resize, self.height_resize), interp='bilinear')
def act_with_frame_skip(self, a): # trolololo reward = 0 game_over = False lives = ALEInterface.lives(self) for _ in xrange(self.frame_skip): reward += ALEInterface.act(self, self.legal_actions[a]) if ALEInterface.game_over(self) or (not self.test_mode and ALEInterface.lives(self) < lives): game_over = True return reward, game_over
class ALEInterfaceWrapper: def __init__(self, repeat_action_probability, rng): self.internal_action_repeat_prob = repeat_action_probability self.prev_action = 0 self.rng_source = rng self.rng = deepcopy(self.rng_source) self.ale = ALEInterface() ''' This sets the probability from the default 0.25 to 0. It ensures deterministic actions. ''' self.ale.setFloat('repeat_action_probability', 0.0) def getScreenRGB(self): return self.ale.getScreenRGB() def game_over(self): return self.ale.game_over() def reset_game(self): self.ale.reset_game() def lives(self): return self.ale.lives() def getMinimalActionSet(self): return self.ale.getMinimalActionSet() def setInt(self, key, value): self.ale.setInt(key, value) def setFloat(self, key, value): self.ale.setFloat(key, value) def loadROM(self, rom): self.ale.loadROM(rom) def reset_action_seed(self): self.rng = deepcopy(self.rng_source) def set_action_seed(self, seed): self.rng = np.random.RandomState(seed) def act(self, action): actual_action = action if self.internal_action_repeat_prob > 0: if self.rng.uniform(0, 1) < self.internal_action_repeat_prob: actual_action = self.prev_action self.prev_action = actual_action return self.ale.act(actual_action)
class Breakout(object): steps_between_actions = 4 def __init__(self): self.ale = ALEInterface() self.ale.setInt('random_seed', 123) self.ale.setBool("display_screen", False) self.ale.setBool("sound", False) self.ale.loadROM("%s/breakout.bin" % rom_directory) self.current_state = [ self.ale.getScreenRGB(), self.ale.getScreenRGB() ] def start_episode(self): self.ale.reset_game() def take_action(self, action): assert not self.terminated def step(): reward = self.ale.act(action) self.roll_state() return reward reward = sum(step() for _ in xrange(self.steps_between_actions)) return (reward, self.current_state) def roll_state(self): assert len(self.current_state) == 2 self.current_state = [self.current_state[1], self.ale.getScreenRGB()] assert len(self.current_state) == 2 @property def actions(self): return self.ale.getMinimalActionSet() @property def terminated(self): return self.ale.game_over() or self.ale.lives() < 5
class game(object): def __init__(self, display): self.ale = ALEInterface() # Get & Set the desired settings self.ale.setInt('random_seed', 123) # Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = display if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) # Load the ROM file self.ale.loadROM("ms_pacman.bin") def act(self, action): return self.ale.act(action) def getState(self): return get_feature(self.ale.getScreen()) def getScreen(self): return self.ale.getScreen() def reset_game(self): self.ale.reset_game() def lives(self): return self.ale.lives() def game_over(self): return self.ale.game_over()
class ALEInterfaceWrapper: def __init__(self, repeat_action_probability): self.internal_action_repeat_prob = repeat_action_probability self.prev_action = 0 self.ale = ALEInterface() ''' This sets the probability from the default 0.25 to 0. It ensures deterministic actions. ''' self.ale.setFloat('repeat_action_probability', repeat_action_probability) def getScreenRGB(self): return self.ale.getScreenRGB() def game_over(self): return self.ale.game_over() def reset_game(self): self.ale.reset_game() def lives(self): return self.ale.lives() def getMinimalActionSet(self): return self.ale.getMinimalActionSet() def setInt(self, key, value): self.ale.setInt(key, value) def setFloat(self, key, value): self.ale.setFloat(key, value) def loadROM(self, rom): self.ale.loadROM(rom) def act(self, action): actual_action = action return self.ale.act(actual_action)
class ArcadeLearningEnvironment(Environment): """ [Arcade Learning Environment](https://github.com/mgbellemare/Arcade-Learning-Environment) adapter (specification key: `ale`, `arcade_learning_environment`). May require: ```bash sudo apt-get install libsdl1.2-dev libsdl-gfx1.2-dev libsdl-image1.2-dev cmake git clone https://github.com/mgbellemare/Arcade-Learning-Environment.git cd Arcade-Learning-Environment mkdir build && cd build cmake -DUSE_SDL=ON -DUSE_RLGLUE=OFF -DBUILD_EXAMPLES=ON .. make -j 4 cd .. pip3 install . ``` Args: level (string): ALE rom file (<span style="color:#C00000"><b>required</b></span>). loss_of_life_termination: Signals a terminal state on loss of life (<span style="color:#00C000"><b>default</b></span>: false). loss_of_life_reward (float): Reward/Penalty on loss of life (negative values are a penalty) (<span style="color:#00C000"><b>default</b></span>: 0.0). repeat_action_probability (float): Repeats last action with given probability (<span style="color:#00C000"><b>default</b></span>: 0.0). visualize (bool): Whether to visualize interaction (<span style="color:#00C000"><b>default</b></span>: false). frame_skip (int > 0): Number of times to repeat an action without observing (<span style="color:#00C000"><b>default</b></span>: 1). seed (int): Random seed (<span style="color:#00C000"><b>default</b></span>: none). """ def __init__( self, level, life_loss_terminal=False, life_loss_punishment=0.0, repeat_action_probability=0.0, visualize=False, frame_skip=1, seed=None ): from ale_python_interface import ALEInterface self.environment = ALEInterface() self.rom_file = level self.life_loss_terminal = life_loss_terminal self.life_loss_punishment = life_loss_punishment self.environment.setFloat(b'repeat_action_probability', repeat_action_probability) self.environment.setBool(b'display_screen', visualize) self.environment.setInt(b'frame_skip', frame_skip) if seed is not None: self.environment.setInt(b'random_seed', seed) # All set commands must be done before loading the ROM. self.environment.loadROM(rom_file=self.rom_file.encode()) self.available_actions = tuple(self.environment.getLegalActionSet()) # Full list of actions: # No-Op, Fire, Up, Right, Left, Down, Up Right, Up Left, Down Right, Down Left, Up Fire, # Right Fire, Left Fire, Down Fire, Up Right Fire, Up Left Fire, Down Right Fire, Down Left # Fire def __str__(self): return super().__str__() + '({})'.format(self.rom_file) def states(self): width, height = self.environment.getScreenDims() return dict(type='float', shape=(height, width, 3)) def actions(self): return dict(type='int', num_values=len(self.available_actions)) def close(self): self.environment.__del__() self.environment = None def get_states(self): screen = np.copy(self.environment.getScreenRGB(screen_data=self.screen)) screen = screen.astype(dtype=np.float32) / 255.0 return screen def reset(self): self.environment.reset_game() width, height = self.environment.getScreenDims() self.screen = np.empty((height, width, 3), dtype=np.uint8) self.lives = self.environment.lives() return self.get_states() def execute(self, actions): reward = self.environment.act(action=self.available_actions[actions]) terminal = self.environment.game_over() states = self.get_states() next_lives = self.environment.lives() if next_lives < self.lives: if self.life_loss_terminal: terminal = True elif self.life_loss_punishment > 0.0: reward -= self.life_loss_punishment self.lives = next_lives return states, terminal, reward
class AtariEnvironment: def __init__(self, args, outputDir): self.outputDir = outputDir self.screenCaptureFrequency = args.screen_capture_freq self.ale = ALEInterface() self.ale.setInt(b'random_seed', 123456) random.seed(123456) # Fix https://groups.google.com/forum/#!topic/deep-q-learning/p4FAIaabwlo self.ale.setFloat(b'repeat_action_probability', 0.0) # Load the ROM file self.ale.loadROM(args.rom) self.actionSet = self.ale.getMinimalActionSet() self.gameNumber = 0 self.stepNumber = 0 self.resetGame() def getNumActions(self): return len(self.actionSet) def getState(self): return self.state def getGameNumber(self): return self.gameNumber def getFrameNumber(self): return self.ale.getFrameNumber() def getEpisodeFrameNumber(self): return self.ale.getEpisodeFrameNumber() def getEpisodeStepNumber(self): return self.episodeStepNumber def getStepNumber(self): return self.stepNumber def getGameScore(self): return self.gameScore def isGameOver(self): return self.ale.game_over() def step(self, action): previousLives = self.ale.lives() reward = 0 isTerminal = 0 self.stepNumber += 1 self.episodeStepNumber += 1 for i in range(4): prevScreenRGB = self.ale.getScreenRGB() reward += self.ale.act(self.actionSet[action]) screenRGB = self.ale.getScreenRGB() # Detect end of episode, I don't think I'm handling this right in terms # of the overall game loop (??) if self.ale.lives() < previousLives or self.ale.game_over(): isTerminal = 1 break if self.gameNumber % self.screenCaptureFrequency == 0: dir = self.outputDir + '/screen_cap/game-%06d' % (self.gameNumber) if not os.path.isdir(dir): os.makedirs(dir) self.ale.saveScreenPNG(dir + '/frame-%06d.png' % (self.getEpisodeFrameNumber())) maxedScreen = np.maximum(screenRGB, prevScreenRGB) self.state = self.state.stateByAddingScreen(maxedScreen, self.ale.getFrameNumber()) self.gameScore += reward return reward, self.state, isTerminal def resetGame(self): if self.ale.game_over(): self.gameNumber += 1 self.ale.reset_game() self.state = State().stateByAddingScreen(self.ale.getScreenRGB(), self.ale.getFrameNumber()) self.gameScore = 0 self.episodeStepNumber = 0 # environment steps vs ALE frames. Will probably be 4*frame number
class Emulator: def __init__(self, rom_path, rom_name, visualize, actor_id, rseed, single_life_episodes = False): self.ale = ALEInterface() self.ale.setInt("random_seed", rseed * (actor_id +1)) # For fuller control on explicit action repeat (>= ALE 0.5.0) self.ale.setFloat("repeat_action_probability", 0.0) # Disable frame_skip and color_averaging # See: http://is.gd/tYzVpj self.ale.setInt("frame_skip", 1) self.ale.setBool("color_averaging", False) self.ale.loadROM(rom_path + "/" + rom_name + ".bin") self.legal_actions = self.ale.getMinimalActionSet() self.screen_width,self.screen_height = self.ale.getScreenDims() #self.ale.setBool('display_screen', True) # Processed historcal frames that will be fed in to the network # (i.e., four 84x84 images) self.screen_images_processed = np.zeros((IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES)) self.rgb_screen = np.zeros((self.screen_height,self.screen_width, 3), dtype=np.uint8) self.gray_screen = np.zeros((self.screen_height,self.screen_width,1), dtype=np.uint8) self.frame_pool = np.empty((2, self.screen_height, self.screen_width)) self.current = 0 self.lives = self.ale.lives() self.visualize = visualize self.visualize_processed = False self.windowname = rom_name + ' ' + str(actor_id) if self.visualize: logger.debug("Opening emulator window...") #from skimage import io #io.use_plugin('qt') cv2.startWindowThread() cv2.namedWindow(self.windowname) logger.debug("Emulator window opened") if self.visualize_processed: logger.debug("Opening processed frame window...") cv2.startWindowThread() logger.debug("Processed frame window opened") cv2.namedWindow(self.windowname + "_processed") self.single_life_episodes = single_life_episodes def get_screen_image(self): """ Add screen (luminance) to frame pool """ # [screen_image, screen_image_rgb] = [self.ale.getScreenGrayscale(), # self.ale.getScreenRGB()] self.ale.getScreenGrayscale(self.gray_screen) self.ale.getScreenRGB(self.rgb_screen) self.frame_pool[self.current] = np.squeeze(self.gray_screen) self.current = (self.current + 1) % FRAMES_IN_POOL return self.rgb_screen def new_game(self): """ Restart game """ self.ale.reset_game() self.lives = self.ale.lives() if MAX_START_WAIT < 0: logger.debug("Cannot time travel yet.") sys.exit() elif MAX_START_WAIT > 0: wait = random.randint(0, MAX_START_WAIT) else: wait = 0 for _ in xrange(wait): self.ale.act(self.legal_actions[0]) def process_frame_pool(self): """ Preprocess frame pool """ img = None if BLEND_METHOD == "max_pool": img = np.amax(self.frame_pool, axis=0) #img resize(img[:210, :], (84, 84)) img = cv2.resize(img[:210, :], (84, 84), interpolation=cv2.INTER_LINEAR) img = img.astype(np.float32) img *= (1.0/255.0) return img # Reduce height to 210, if not so #cropped_img = img[:210, :] # Downsample to 110x84 #down_sampled_img = resize(cropped_img, (84, 84)) # Crop to 84x84 playing area #stackable_image = down_sampled_img[:, 26:110] #return stackable_image def action_repeat(self, a): """ Repeat action and grab screen into frame pool """ reward = 0 for i in xrange(ACTION_REPEAT): reward += self.ale.act(self.legal_actions[a]) new_screen_image_rgb = self.get_screen_image() return reward, new_screen_image_rgb def get_reshaped_state(self, state): return np.reshape(state, (1, IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES)) #return np.reshape(self.screen_images_processed, # (1, IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES)) def get_initial_state(self): """ Get the initial state """ self.new_game() for step in xrange(NR_IMAGES): reward, new_screen_image_rgb = self.action_repeat(0) self.screen_images_processed[:, :, step] = self.process_frame_pool() self.show_screen(new_screen_image_rgb) if self.is_terminal(): MAX_START_WAIT -= 1 return self.get_initial_state() return np.copy(self.screen_images_processed) #get_reshaped_state() def next(self, action): """ Get the next state, reward, and game over signal """ reward, new_screen_image_rgb = self.action_repeat(np.argmax(action)) self.screen_images_processed[:, :, 0:3] = \ self.screen_images_processed[:, :, 1:4] self.screen_images_processed[:, :, 3] = self.process_frame_pool() self.show_screen(new_screen_image_rgb) terminal = self.is_terminal() self.lives = self.ale.lives() return np.copy(self.screen_images_processed), reward, terminal #get_reshaped_state(), reward, terminal def show_screen(self, image): """ Show visuals for raw and processed images """ if self.visualize: #io.imshow(image[:210, :], fancy=True) cv2.imshow(self.windowname, image[:210, :]) if self.visualize_processed: #io.imshow(self.screen_images_processed[:, :, 3], fancy=True) cv2.imshow(self.windowname + "_processed", self.screen_images_processed[:, :, 3]) def is_terminal(self): if self.single_life_episodes: return (self.is_over() or (self.lives > self.ale.lives())) else: return self.is_over() def is_over(self): return self.ale.game_over()
class AtariEmulator: def __init__(self, dims, history_length): ''' Initialize Atari environment ''' # Parameters self.buffer_length = 2 # args.buffer_length self.screen_dims = dims self.frame_skip = 4 # args.frame_skip self.max_start_wait = 30 # args.max_start_wait self.history_length = history_length # args.history_length self.start_frames_needed = self.buffer_length - 1 + \ ((self.history_length - 1) * self.frame_skip) # Initialize ALE instance self.ale = ALEInterface() self.ale.setFloat(b'repeat_action_probability', 0.0) # if args.watch: # self.ale.setBool(b'sound', True) # self.ale.setBool(b'display_screen', True) self.ale.loadROM(str.encode('../roms/pong.bin')) self.buffer = np.empty((self.buffer_length, 210, 160)) self.current = 0 self.action_set = self.ale.getMinimalActionSet() self.lives = self.ale.lives() self.reset() def get_possible_actions(self): ''' Return list of possible actions for game ''' return self.action_set def get_screen(self): ''' Add screen to frame buffer ''' self.buffer[self.current] = np.squeeze(self.ale.getScreenGrayscale()) self.current = (self.current + 1) % self.buffer_length def reset(self): self.ale.reset_game() self.lives = self.ale.lives() if self.max_start_wait < 0: print("ERROR: max start wait decreased beyond 0") sys.exit() elif self.max_start_wait <= self.start_frames_needed: wait = 0 else: wait = random.randint( 0, self.max_start_wait - self.start_frames_needed) for _ in range(wait): self.ale.act(self.action_set[0]) # Fill frame buffer for _ in range(self.buffer_length - 1): self.ale.act(self.action_set[0]) self.get_screen() # get initial_states frame = self.preprocess() state = [(frame, 0, 0, False)] for step in range(self.history_length - 1): next_frame, reward, terminal, _ = self.run_step(0) state.append((frame, 0, reward, terminal)) frame = next_frame # make sure agent hasn't died yet if self.isTerminal(): print( "Agent lost during start wait. Decreasing max_start_wait by 1" ) self.max_start_wait -= 1 return self.reset() return state, next_frame def run_step(self, action): ''' Apply action to game and return next screen and reward ''' raw_reward = 0 for step in range(self.frame_skip): raw_reward += self.ale.act(self.action_set[action]) self.get_screen() reward = np.clip(raw_reward, -1, 1) terminal = self.isTerminal() next_frame = self.preprocess() return (next_frame, reward, terminal, raw_reward) def preprocess(self): ''' Preprocess frame for agent ''' img = np.amax(self.buffer, axis=0) return cv2.resize(img, self.screen_dims, interpolation=cv2.INTER_LINEAR) def isTerminal(self): t = self.ale.game_over() or (self.lives > self.ale.lives()) if t: self.lives = self.ale.lives() return t
class ALEEnvironment(Environment): def __init__(self, rom_file, args): from ale_python_interface import ALEInterface self.ale = ALEInterface() if args.display_screen: self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', args.frame_skip) self.ale.setFloat('repeat_action_probability', args.repeat_action_probability) self.ale.setBool('color_averaging', args.color_averaging) if args.random_seed: self.ale.setInt('random_seed', args.random_seed) if args.record_screen_path: if not os.path.exists(args.record_screen_path): logger.info("Creating folder %s" % args.record_screen_path) os.makedirs(args.record_screen_path) logger.info("Recording screens to %s", args.record_screen_path) self.ale.setString('record_screen_dir', args.record_screen_path) if args.record_sound_filename: logger.info("Recording sound to %s", args.record_sound_filename) self.ale.setBool('sound', True) self.ale.setString('record_sound_filename', args.record_sound_filename) self.ale.loadROM(rom_file) if args.minimal_action_set: self.actions = self.ale.getMinimalActionSet() logger.info("Using minimal action set with size %d" % len(self.actions)) else: self.actions = self.ale.getLegalActionSet() logger.info("Using full action set with size %d" % len(self.actions)) logger.debug("Actions: " + str(self.actions)) self.screen_width = args.screen_width self.screen_height = args.screen_height self.life_lost = False def numActions(self): return len(self.actions) def restart(self): # In test mode, the game is simply initialized. In train mode, if the game # is in terminal state due to a life loss but not yet game over, then only # life loss flag is reset so that the next game starts from the current # state. Otherwise, the game is simply initialized. if (self.mode == 'test' or not self.life_lost or # `reset` called in a middle of episode self.ale.game_over() # all lives are lost ): self.ale.reset_game() self.life_lost = False def act(self, action): lives = self.ale.lives() reward = self.ale.act(self.actions[action]) self.life_lost = (not lives == self.ale.lives()) return reward def getScreen(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, (self.screen_width, self.screen_height)) return resized def isTerminal(self): if self.mode == 'train': return self.ale.game_over() or self.life_lost return self.ale.game_over()
class ALEEnvironment(): def __init__(self, rom_file, args): self.ale = ALEInterface() self.histLen = 4 if args.display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', args.frame_skip) #self.ale.setFloat('repeat_action_probability', args.repeat_action_probability) self.ale.setBool('color_averaging', args.color_averaging) if args.random_seed: self.ale.setInt('random_seed', args.random_seed) self.ale.loadROM(rom_file) if args.minimal_action_set: self.actions = self.ale.getMinimalActionSet() logger.info("Using minimal action set with size %d" % len(self.actions)) else: self.actions = self.ale.getLegalActionSet() logger.info("Using full action set with size %d" % len(self.actions)) logger.debug("Actions: " + str(self.actions)) self.screen_width = args.screen_width self.screen_height = args.screen_height self.mode = "train" self.life_lost = False self.initSrcreen = self.getScreen() self.goalSet = [] self.goalSet.append([[70, 65], [74, 71]]) # lower right ladder 4 self.goalSet.append([[11, 58], [15, 66]]) # lower left ladder 3 self.goalSet.append([[7, 41], [11, 45]]) # key 5 self.goalCenterLoc = [] for goal in self.goalSet: goalCenter = [ float(goal[0][0] + goal[1][0]) / 2, float(goal[0][1] + goal[1][1]) / 2 ] self.goalCenterLoc.append(goalCenter) self.agentOriginLoc = [42, 33] self.agentLastX = 42 self.agentLastY = 33 self.reachedGoal = [0, 0, 0] self.histState = self.initializeHistState() def initializeHistState(self): histState = np.concatenate((self.getState(), self.getState()), axis=2) histState = np.concatenate((histState, self.getState()), axis=2) histState = np.concatenate((histState, self.getState()), axis=2) return histState def numActions(self): return len(self.actions) def resetGoalReach(self): self.reachedGoal = [0, 0, 0] def restart(self): # In test mode, the game is simply initialized. In train mode, if the game # is in terminal state due to a life loss but not yet game over, then only # life loss flag is reset so that the next game starts from the current # state. Otherwise, the game is simply initialized. if (self.mode == 'test' or not self.life_lost or # `reset` called in a middle of episode self.ale.game_over() # all lives are lost ): self.ale.reset_game() self.life_lost = False self.reachedGoal = [0, 0, 0] for i in range(19): self.act(0) #wait for initialization self.histState = self.initializeHistState() self.agentLastX = self.agentOriginLoc[0] self.agentLastY = self.agentOriginLoc[1] def beginNextLife(self): self.life_lost = False self.reachedGoal = [0, 0, 0] for i in range(19): self.act(0) #wait for initialization self.histState = self.initializeHistState() self.agentLastX = self.agentOriginLoc[0] self.agentLastY = self.agentOriginLoc[1] def act(self, action): lives = self.ale.lives() reward = self.ale.act(self.actions[action]) self.life_lost = (not lives == self.ale.lives()) currState = self.getState() self.histState = np.concatenate((self.histState[:, :, 1:], currState), axis=2) return reward def getScreen(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, (self.screen_width, self.screen_height)) return resized def getScreenRGB(self): screen = self.ale.getScreenRGB() resized = cv2.resize(screen, (self.screen_width, self.screen_height)) #resized = screen return resized def getAgentLoc(self): img = self.getScreenRGB() man = [200, 72, 72] mask = np.zeros(np.shape(img)) mask[:, :, 0] = man[0] mask[:, :, 1] = man[1] mask[:, :, 2] = man[2] diff = img - mask indxs = np.where(diff == 0) diff[np.where(diff < 0)] = 0 diff[np.where(diff > 0)] = 0 diff[indxs] = 255 if (np.shape(indxs[0])[0] == 0): mean_x = self.agentLastX mean_y = self.agentLastY else: mean_y = np.sum(indxs[0]) / np.shape(indxs[0])[0] mean_x = np.sum(indxs[1]) / np.shape(indxs[1])[0] self.agentLastX = mean_x self.agentLastY = mean_y return (mean_x, mean_y) def distanceReward(self, lastGoal, goal): if (lastGoal == -1): lastGoalCenter = self.agentOriginLoc else: lastGoalCenter = self.goalCenterLoc[lastGoal] goalCenter = self.goalCenterLoc[goal] agentX, agentY = self.getAgentLoc() dis = np.sqrt((goalCenter[0] - agentX) * (goalCenter[0] - agentX) + (goalCenter[1] - agentY) * (goalCenter[1] - agentY)) disLast = np.sqrt((lastGoalCenter[0] - agentX) * (lastGoalCenter[0] - agentX) + (lastGoalCenter[1] - agentY) * (lastGoalCenter[1] - agentY)) disGoals = np.sqrt((goalCenter[0] - lastGoalCenter[0]) * (goalCenter[0] - lastGoalCenter[0]) + (goalCenter[1] - lastGoalCenter[1]) * (goalCenter[1] - lastGoalCenter[1])) return 0.001 * (disLast - dis) / disGoals # add color channel for input of network def getState(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, (self.screen_width, self.screen_height)) return np.reshape(resized, (84, 84, 1)) def getStackedState(self): return self.histState def isTerminal(self): if self.mode == 'train': return self.ale.game_over() or self.life_lost return self.ale.game_over() def isGameOver(self): return self.ale.game_over() def isLifeLost(self): return self.life_lost def reset(self): self.ale.reset_game() self.life_lost = False def goalReached(self, goal): goalPosition = self.goalSet[goal] goalScreen = self.initSrcreen stateScreen = self.getScreen() count = 0 for y in range(goalPosition[0][0], goalPosition[1][0]): for x in range(goalPosition[0][1], goalPosition[1][1]): if goalScreen[x][y] != stateScreen[x][y]: count = count + 1 # 30 is total number of pixels of agent if float(count) / 30 > 0.3: self.reachedGoal[goal] = 1 return True return False def goalNotReachedBefore(self, goal): if (self.reachedGoal[goal] == 1): return False return True
class GameEnvironment: def __init__(self, settings): self.ale = ALEInterface() self.ale.setBool('display_screen', settings['DISPLAY_SCREEN']) self.ale.setBool('sound', settings['SOUND']) self.ale.setBool('color_averaging', settings['COLOR_AVERAGING']) self.ale.setInt('random_seed', settings['RANDOM_SEED']) self.ale.setInt('frame_skip', settings['FRAME_SKIP']) self.ale.setFloat('repeat_action_probability', settings['REPEAT_ACTION_PROB']) roms_dir = settings['ROMS_DIR'] rom_name = settings['ROM_NAME'] ROM = None if(rom_name.endswith('.bin')): self.name = rom_name[:-4] ROM = rom_name else: self.name = rom_name ROM = rom_name + '.bin' self.ale.loadROM(os.path.join(roms_dir, ROM)) self.random_starts = settings['RANDOM_STARTS'] self.rng = settings['RNG'] if(settings['MINIMAL_ACTION_SET']): self.actions = self.ale.getMinimalActionSet() else: self.actions = self.ale.getLegalActionSet() self.n_actions = len(self.actions) self.width, self.height = self.ale.getScreenDims() self.observation = np.zeros((self.height, self.width), dtype='uint8') self.reward = None self.game_over = None self.terminal = None self.total_lives = None self.init() def init(self): self.restartGame() self.reward = 0 self.game_over = self.gameOver() self.terminal = self.game_over self.total_lives = self.lives() self.step(0) def getState(self): return self.observation, self.reward, self.terminal, self.game_over def step(self, action, training=False): self.reward = self.act(action) self.paint() lives = self.lives() self.game_over = self.gameOver() self.terminal = self.game_over if(training and (lives < self.total_lives)): self.terminal = True self.total_lives = lives return self.getState() def newGame(self): self.init() for i in xrange(self.rng.randint(1, self.random_starts)): self.act(0) terminal = self.gameOver() if(terminal): print "Warning terminal in random init" return self.step(0) def newTestGame(self): self.init() return self.getState() def paint(self): self.ale.getScreenGrayscale(self.observation) def getScreenRGB(self): return self.ale.getScreenRGB() def act(self, action): assert ((action >= 0) and (action < self.n_actions)) return self.ale.act(self.actions[action]) def lives(self): return self.ale.lives() def restartGame(self): self.ale.reset_game() def gameOver(self): return self.ale.game_over()
class ALEEnvironment(BaseEnvironment): """ A wrapper of Arcade Learning Environment, which inherits all members of ``BaseEnvironment``. """ # 63 games ADVENTURE = "adventure" AIR_RAID = "air_raid" ALIEN = "alien" AMIDAR = "amidar" ASSAULT = "assault" ASTERIX = "asterix" ASTEROIDS = "asteroids" ATLANTIS = "aslantis" BANK_HEIST = "bank_heist" BATTLE_ZONE = "battle_zone" BEAM_RIDER = "beam_rider" BERZERK = "berzerk" BOWLING = "bowling" BOXING = "boxing" BREAKOUT = "breakout" CARNIVAL = "carnival" CENTIPEDE = "centipede" CHOPPER_COMMAND = "chopper_command" CRAZY_CLIMBER = "crazy_climber" DEFENDER = "defender" DEMON_ATTACK = "demon_attack" DOUBLE_DUNK = "double_dunk" ELEVATOR_ACTION = "elevator_action" ENDURO = "enduro" FISHING_DERBY = "fishing_derby" FREEWAY = "freeway" FROSTBITE = "frostbite" GOPHER = "gopher" GRAVITAR = "gravitar" HERO = "hero" ICE_HOCKEY = "ice_hockey" JAMESBOND = "jamesbond" JOURNEY_ESCAPE = "journey_escape" KABOOM = "kaboom" KANGAROO = "kangaroo" KRULL = "krull" KUNGFU_MASTER = "kung_fu_master" MONTEZUMA = "montezuma_revenge" MS_PACMAN = "ms_pacman" UNKNOWN = "name_this_game" PHOENIX = "phoenix" PITFALL = "pitfall" PONG = "pong" POOYAN = "pooyan" PRIVATE_EYE = "private_eye" QBERT = "qbert" RIVERRAID = "riverraid" ROAD_RUNNER = "road_runner" ROBOTANK = "robotank" SEAQUEST = "seaquest" SKIING = "skiing" SOLARIS = "solaris" SPACE_INVADERS = "space_invaders" STAR_GUNNER = "star_gunner" TENNIS = "tennis" TIME_PILOT = "time_pilot" TUTANKHAM = "tutankham" UP_N_DOWN = "up_n_down" VENTURE = "venture" VIDEO_PINBALL = "video_pinball" WIZARD_OF_WOR = "wizard_of_wor" YARS_REVENGE = "yars_revenge" ZAXXON = "zaxxon" def __init__(self, rom_name, frame_skip=4, repeat_action_probability=0., max_episode_steps=10000, loss_of_life_termination=False, loss_of_life_negative_reward=False, bitwise_max_on_two_consecutive_frames=False, is_render=False, seed=None, startup_policy=None, disable_actions=None, num_of_sub_actions=-1, state_processor=AtariProcessor(resize_shape=(84, 84), convert_to_grayscale=True)): os.environ['SDL_VIDEO_CENTERED'] = '1' file_exist = isfile(ALEEnvironment.get_rom_path(rom_name)) if not file_exist: raise ValueError("Rom not found ! Please put rom " + rom_name + ".bin into: " + ALEEnvironment.get_rom_path()) self.__rom_name = rom_name self.__ale = ALEInterface() if frame_skip < 0: print("Invalid frame_skip param ! Set default frame_skip = 4") self.__frame_skip = 4 else: self.__frame_skip = frame_skip if repeat_action_probability < 0 or repeat_action_probability > 1: raise ValueError("Invalid repeat_action_probability") else: self.__repeat_action_probability = repeat_action_probability self.__max_episode_steps = max_episode_steps self.__loss_of_life_termination = loss_of_life_termination self.__loss_of_life_negative_reward = loss_of_life_negative_reward self.__max_2_frames = bitwise_max_on_two_consecutive_frames # Max 2 frames only work with grayscale self.__grayscale = False if state_processor is not None and type( state_processor ) is AtariProcessor and state_processor.get_grayscale(): self.__grayscale = True if self.__max_2_frames and self.__frame_skip > 1 and self.__grayscale: self.__max_2_frames = True else: self.__max_2_frames = False self.__is_render = is_render self.__processor = state_processor if seed is None or seed <= 0 or seed >= 9999: if seed is not None and (seed < 0 or seed >= 9999): print("Invalid seed ! Default seed = randint(0, 9999") self.__seed = np.random.randint(0, 9999) self.__random_seed = True else: self.__random_seed = False self.__seed = seed self.__current_steps = 0 self.__is_life_lost = False self.__is_terminal = False self.__current_lives = 0 self.__action_reduction = num_of_sub_actions self.__scr_width, self.__scr_height, self.__action_set = self.__init_ale( ) self.__prev_buffer = np.empty((self.__scr_height, self.__scr_width, 3), dtype=np.uint8) self.__current_buffer = np.empty( (self.__scr_height, self.__scr_width, 3), dtype=np.uint8) self.__current_state = None self.__prev_state = None self.__startup_policy = startup_policy if disable_actions is None: self.__dis_act = [] else: self.__dis_act = disable_actions if self.__processor is not None and self.__processor.get_number_of_objectives( ) > 1: self.__multi_objs = True else: self.__multi_objs = False def get_processor(self): return self.__processor def __init_ale(self): self.__ale.setBool(b'display_screen', self.__is_render) if self.__max_2_frames and self.__frame_skip > 1: self.__ale.setInt(b'frame_skip', 1) else: self.__ale.setInt(b'frame_skip', self.__frame_skip) self.__ale.setInt(b'random_seed', self.__seed) self.__ale.setFloat(b'repeat_action_probability', self.__repeat_action_probability) self.__ale.setBool(b'color_averaging', False) self.__ale.loadROM( ALEEnvironment.get_rom_path(self.__rom_name).encode()) width, height = self.__ale.getScreenDims() return width, height, self.__ale.getMinimalActionSet() def clone(self): if self.__random_seed: seed = np.random.randint(0, 9999) else: seed = self.__seed return ALEEnvironment(self.__rom_name, self.__frame_skip, self.__repeat_action_probability, self.__max_episode_steps, self.__loss_of_life_termination, self.__loss_of_life_negative_reward, self.__max_2_frames, self.__is_render, seed, self.__startup_policy, self.__dis_act, self.__action_reduction, self.__processor.clone()) def step_all(self, a): if isinstance(a, (list, np.ndarray)): if len(a) <= 0: raise ValueError('Empty action list !') a = a[0] self.__current_steps += 1 act = self.__action_set[a] rew = self._step(act) next_state = self.get_state() _is_terminal = self.is_terminal() return next_state, rew, _is_terminal, self.__current_steps def reset(self): self.__ale.reset_game() self.__current_lives = self.__ale.lives() self.__is_life_lost = False self.__is_terminal = False self.__current_state = None self.__prev_state = None action_space = self.get_action_space() v_range, is_range = action_space.get_range() if len(v_range) > 1: self.step(1) # No op steps if self.__startup_policy is not None: max_steps = int(self.__startup_policy.get_max_steps()) for _ in range(max_steps): act = self.__startup_policy.step(self.get_state(), action_space) self.step(act) # Start training from this point self.__current_steps = 0 # Reset processor if self.__processor is not None: self.__processor.reset() return self.get_state() def _pre_step(self, act): if self.__max_2_frames and self.__frame_skip > 1: rew = 0 for i in range(self.__frame_skip - 2): rew += self.__ale.act(act) self.__prev_buffer = self.__ale.getScreenRGB( self.__prev_buffer) self.__prev_buffer = self.__ale.getScreenRGB(self.__prev_buffer) rew += self.__ale.act(act) self.__current_buffer = self.__ale.getScreenRGB( self.__current_buffer) self.__is_terminal = self.__ale.game_over() if self.__processor is not None: self.__prev_state = self.__processor.process( self.__prev_buffer) self.__current_state = self.__processor.process( self.__current_buffer) else: self.__prev_state = self.__prev_buffer self.__current_state = self.__current_buffer self.__current_state = np.maximum.reduce( [self.__prev_state, self.__current_state]) else: rew = self.__ale.act(act) self.__current_buffer = self.__ale.getScreenRGB( self.__current_buffer) self.__is_terminal = self.__ale.game_over() if self.__processor is not None: self.__current_state = self.__processor.process( self.__current_buffer) if self.__multi_objs and self.__processor is not None: all_rewards = self.__processor.get_rewards(rew) return all_rewards else: return rew def _step(self, act): for i in range(len(self.__dis_act)): if act == self.__dis_act[i]: act = 0 if not self.__loss_of_life_termination and not self.__loss_of_life_negative_reward: if not self.__is_terminal: next_lives = self.__ale.lives() if next_lives < self.__current_lives: act = 1 self.__current_lives = next_lives return self._pre_step(act) else: rew = self._pre_step(act) next_lives = self.__ale.lives() if next_lives < self.__current_lives: if self.__loss_of_life_negative_reward: rew -= 1 self.__current_lives = next_lives self.__is_life_lost = True return rew def get_state(self): if not self.__max_2_frames: if self.__processor is not None: return self.__current_state else: return self.__current_buffer else: return self.__current_state def is_terminal(self): if self.__loss_of_life_termination and self.__is_life_lost: return True elif self.__max_episode_steps is not None and self.__current_steps > self.__max_episode_steps: return True else: return self.__is_terminal @staticmethod def get_rom_path(rom=None): if rom is None: return os.path.dirname(os.path.abspath(__file__)) + "/roms/" else: return os.path.dirname( os.path.abspath(__file__)) + "/roms/" + rom + ".bin" @staticmethod def list_all_roms(): return [ f for f in listdir(ALEEnvironment.get_rom_path()) if isfile(join(ALEEnvironment.get_rom_path(), f)) ] def get_state_space(self): if self.__processor is None: shape = self.__current_buffer.shape else: shape = self.__processor.process(self.__current_buffer).shape min_value = np.zeros(shape, dtype=np.uint8) max_value = np.full(shape, 255) return Space(min_value, max_value, True) def get_action_space(self): if self.__action_reduction >= 1: return Space(0, self.__action_reduction - 1, True) else: return Space(0, len(self.__action_set) - 1, True) def step(self, act): if isinstance(act, (list, np.ndarray)): if len(act) <= 0: raise ValueError('Empty action list !') act = act[0] self.__current_steps += 1 act = self.__action_set[act] rew = self._step(act) return rew def get_current_steps(self): return self.__current_steps def is_atari(self): return True def is_render(self): return self.__is_render def get_number_of_objectives(self): if self.__processor is None: return 1 else: return self.__processor.get_number_of_objectives() def get_number_of_agents(self): if self.__processor is None: return 1 else: return self.__processor.get_number_of_agents() def get_state_processor(self): return self.__processor
class AtariEnvironment: def __init__(self, args, outputDir): self.outputDir = outputDir self.screenCaptureFrequency = args.screen_capture_freq self.ale = ALEInterface() self.ale.setInt(b'random_seed', 123456) random.seed(123456) # Fix https://groups.google.com/forum/#!topic/deep-q-learning/p4FAIaabwlo self.ale.setFloat(b'repeat_action_probability', 0.0) # Load the ROM file self.ale.loadROM(args.rom) self.actionSet = self.ale.getMinimalActionSet() self.gameNumber = 0 self.stepNumber = 0 self.resetGame() def getNumActions(self): return len(self.actionSet) def getState(self): return self.state def getGameNumber(self): return self.gameNumber def getFrameNumber(self): return self.ale.getFrameNumber() def getEpisodeFrameNumber(self): return self.ale.getEpisodeFrameNumber() def getEpisodeStepNumber(self): return self.episodeStepNumber def getStepNumber(self): return self.stepNumber def getGameScore(self): return self.gameScore def isGameOver(self): return self.ale.game_over() def step(self, action): previousLives = self.ale.lives() reward = 0 isTerminal = 0 self.stepNumber += 1 self.episodeStepNumber += 1 for i in range(4): prevScreenRGB = self.ale.getScreenRGB() reward += self.ale.act(self.actionSet[action]) screenRGB = self.ale.getScreenRGB() # Detect end of episode, I don't think I'm handling this right in terms # of the overall game loop (??) if self.ale.lives() < previousLives or self.ale.game_over(): isTerminal = 1 break if self.gameNumber % self.screenCaptureFrequency == 0: dir = self.outputDir + '/screen_cap/game-%06d' % ( self.gameNumber) if not os.path.isdir(dir): os.makedirs(dir) self.ale.saveScreenPNG(dir + '/frame-%06d.png' % (self.getEpisodeFrameNumber())) maxedScreen = np.maximum(screenRGB, prevScreenRGB) self.state = self.state.stateByAddingScreen(maxedScreen, self.ale.getFrameNumber()) self.gameScore += reward return reward, self.state, isTerminal def resetGame(self): if self.ale.game_over(): self.gameNumber += 1 self.ale.reset_game() self.state = State().stateByAddingScreen(self.ale.getScreenRGB(), self.ale.getFrameNumber()) self.gameScore = 0 self.episodeStepNumber = 0 # environment steps vs ALE frames. Will probably be 4*frame number
def train_agent(gamepath, agent, n_episodes, display_screen, record_weights, reduce_exploration_prob_amount, n_frames_to_skip): """ :description: trains an agent to play a game :type gamepath: string :param gamepath: path to the binary of the game to be played :type agent: subclass RLAlgorithm :param agent: the algorithm/agent that learns to play the game :type n_episodes: int :param n_episodes: number of episodes of the game on which to train """ # load the ale interface to interact with ale = ALEInterface() ale.setInt('random_seed', 42) # display/recording settings, doesn't seem to work currently recordings_dir = './recordings/breakout/' # previously "USE_SDL" if display_screen: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX #ale.setString("record_screen_dir", recordings_dir); elif sys.platform.startswith('linux'): ale.setBool('sound', True) ale.setBool('display_screen', True) ale.loadROM(gamepath) ale.setInt("frame_skip", n_frames_to_skip) screen_preprocessor = screen_utils.RGBScreenPreprocessor() rewards = [] best_reward = 0 print('starting training...') for episode in xrange(n_episodes): action = 0 reward = 0 newAction = None total_reward = 0 counter = 0 lives = ale.lives() screen = np.zeros((32, 32, 3), dtype=np.int8) state = { "screen" : screen, "objects" : None, "prev_objects": None, "prev_action": 0, "action": 0 } while not ale.game_over(): # if newAction is None then we are training an off-policy algorithm # otherwise, we are training an on policy algorithm if newAction is None: action = agent.getAction(state) else: action = newAction reward += ale.act(action) if ale.lives() < lives: lives = ale.lives() reward -= 1 total_reward += reward new_screen = ale.getScreenRGB() new_screen = screen_preprocessor.preprocess(new_screen) new_state = {"screen": new_screen, "objects": None, "prev_objects": state["objects"], "prev_action": state["action"], "action": action} newAction = agent.incorporateFeedback(state, action, reward, new_state) state = new_state reward = 0 rewards.append(total_reward) if total_reward > best_reward and record_weights: best_reward = total_reward print("Best reward: {}".format(total_reward)) if episode % PRINT_TRAINING_INFO_PERIOD == 0: print '\n############################' print '### training information ###' print("Average reward: {}".format(np.mean(rewards))) print("Last 50: {}".format(np.mean(rewards[-NUM_EPISODES_AVERAGE_REWARD_OVER:]))) print("Exploration probability: {}".format(agent.explorationProb)) print('action: {}'.format(action)) print('size of weights dict: {}'.format(len(agent.weights))) print('current objects: {}'.format(state['objects'])) print('previous objects: {}'.format(state['prev_objects'])) avg_feat_weight = np.mean([v for k,v in agent.weights.iteritems()]) print('average feature weight: {}'.format(avg_feat_weight)) print '############################' print '############################\n' if episode != 0 and episode % RECORD_WEIGHTS_PERIOD == 0 and record_weights: file_utils.save_rewards(rewards, filename='episode-{}-{}-rewards'.format(episode, type(agent).__name__)) file_utils.save_weights(agent.weights, filename='episode-{}-{}-weights'.format(episode, type(agent).__name__)) if agent.explorationProb > MINIMUM_EXPLORATION_EPSILON: agent.explorationProb -= reduce_exploration_prob_amount print('episode: {} ended with score: {}'.format(episode, total_reward)) ale.reset_game() return rewards
global_episode = 0 logging = True t = time.time() num_episodes = 100000 initial_episode = global_episode sess.run(sync_DQNT_op) for episode in range(global_episode, num_episodes + global_episode): global state state = np.zeros((1, 84, 84, config.buff_size), dtype=np.uint8) state = preprocess(ale.getScreenGrayscale(), state) R = 0 ep_begin_t = time.time() terminal = False pseudo_terminal = False lives = ale.lives() episode_begining_step = global_step while terminal == False: action = e_greedy_action(get_epsilon(), state) reward = ale.act(action_map[action]) clipped_reward = max(-1, min(1, reward)) R += reward pseudo_terminal = False if ale.game_over(): terminal = True if lives != ale.lives() or terminal: lives = ale.lives() pseudo_terminal = True RM.add(state[0, :, :, config.buff_size -1], action, clipped_reward, pseudo_terminal) update_params() state = preprocess(ale.getScreenGrayscale(), state)
frameCount = 0 frameCountLast = frameCount terminal = 0 testFlag = False ale.reset_game() trainStart = False trainThread = threading.Thread(target=train) trainThread.start() # for frameCount in xrange(maxFrame): life0 = ale.lives() # rate = explorationRate if not testFlag else testExplorationRate # perceive if np.random.rand(1) >explorationRate: actionIndex = forward(memory.History,sess,Q_train) # actionIndex = np.argmax(sess.run(Q_train.y, feed_dict={Q_train.x_image: [memory.History]}),axis=1) else: actionIndex = np.random.randint(n_actions) # get action reward = ale.act(legal_actions[actionIndex]) # reward observe = Scale(ale.getScreenGrayscale()) # 0.08s life1 = ale.lives() if life1 < life0: reward += -1
class Environment: """docstring for Environment""" BUFFER_LEN = 2 EPISODE_FRAMES = 18000 EPOCH_COUNT = 200 EPOCH_STEPS = 250000 EVAL_EPS = 0.001 FRAMES_SKIP = 4 FRAME_HEIGHT = 84 FRAME_WIDTH = 84 MAX_NO_OP = 30 MAX_REWARD = 1 def __init__(self, rom_name, rng, display_screen = False): self.api = ALEInterface() self.api.setInt('random_seed', rng.randint(333)) self.api.setBool('display_screen', display_screen) self.api.setFloat('repeat_action_probability', 0.0) self.rom_name = rom_name self.display_screen = display_screen self.rng = rng self.repeat = Environment.FRAMES_SKIP self.buffer_len = Environment.BUFFER_LEN self.height = Environment.FRAME_HEIGHT self.width = Environment.FRAME_WIDTH self.episode_steps = Environment.EPISODE_FRAMES / Environment.FRAMES_SKIP self.merge_id = 0 self.max_reward = Environment.MAX_REWARD self.eval_eps = Environment.EVAL_EPS self.log_dir = '' self.network_dir = '' self.api.loadROM('../rom/' + self.rom_name) self.minimal_actions = self.api.getMinimalActionSet() original_width, original_height = self.api.getScreenDims() self.merge_frame = np.zeros((self.buffer_len , original_height , original_width) , dtype = np.uint8) def get_action_count(self): return len(self.minimal_actions) def train(self, agent, store_freq, folder = None, start_epoch = 0): self._open_log_files(agent, folder) obs = np.zeros((self.height, self.width), dtype = np.uint8) epoch_count = Environment.EPOCH_COUNT for epoch in xrange(start_epoch, epoch_count): self.need_reset = True steps_left = Environment.EPOCH_STEPS print "\n" + "=" * 50 print "Epoch #%d" % (epoch + 1) episode = 0 train_start = time.time() while steps_left > 0: num_step, _ = self._run_episode(agent, steps_left, obs) steps_left -= num_step episode += 1 if steps_left == 0 or episode % 10 == 0: print "Finished episode #%d, steps_left = %d" \ % (episode, steps_left) train_end = time.time() valid_values = agent.get_validate_values() eval_values = self.evaluate(agent) test_end = time.time() train_time = train_end - train_start test_time = test_end - train_end step_per_sec = Environment.EPOCH_STEPS * 1. / max(1, train_time) print "\tFinished epoch #%d, episode trained = %d\n" \ "\tValidate values = %.3f, evaluate reward = %.3f\n"\ "\tTrain time = %.0fs, test time = %.0fs, steps/sec = %.4f" \ % (epoch + 1, episode, valid_values, eval_values\ , train_time, test_time, step_per_sec) self._update_log_files(agent, epoch + 1, episode , valid_values, eval_values , train_time, test_time , step_per_sec, store_freq) gc.collect() def evaluate(self, agent, episodes = 30, obs = None): print "\n***Start evaluating" if obs is None: obs = np.zeros((self.height, self.width), dtype = np.uint8) sum_reward = 0.0 sum_step = 0.0 for episode in xrange(episodes): self.need_reset = True step, reward = self._run_episode(agent, self.episode_steps, obs , self.eval_eps, evaluating = True) sum_reward += reward sum_step += step print "Finished episode %d, reward = %d, step = %d" \ % (episode + 1, reward, step) self.need_reset = True print "Average reward per episode = %.4f" % (sum_reward / episodes) print "Average step per episode = %.4f" % (sum_step / episodes) return sum_reward / episodes def _prepare_game(self): if self.need_reset or self.api.game_over(): self.api.reset_game() self.need_reset = False if Environment.MAX_NO_OP > 0: num_no_op = self.rng.randint(Environment.MAX_NO_OP + 1) \ + self.buffer_len for _ in xrange(num_no_op): self.api.act(0) for _ in xrange(self.buffer_len): self._update_buffer() def _run_episode(self, agent, steps_left, obs , eps = 0.0, evaluating = False): self._prepare_game() start_lives = self.api.lives() step_count = 0 sum_reward = 0 is_terminal = False while step_count < steps_left and not is_terminal: self._get_screen(obs) action_id, _ = agent.get_action(obs, eps, evaluating) reward = self._repeat_action(self.minimal_actions[action_id]) reward_clip = reward if self.max_reward > 0: reward_clip = np.clip(reward, -self.max_reward, self.max_reward) life_lost = not evaluating and self.api.lives() < start_lives is_terminal = self.api.game_over() or life_lost \ or step_count + 1 >= steps_left agent.add_experience(obs, is_terminal, action_id, reward_clip , evaluating) sum_reward += reward step_count += 1 return step_count, sum_reward def _update_buffer(self): self.api.getScreenGrayscale(self.merge_frame[self.merge_id, ...]) self.merge_id = (self.merge_id + 1) % self.buffer_len def _repeat_action(self, action): reward = 0 for i in xrange(self.repeat): reward += self.api.act(action) if i + self.buffer_len >= self.repeat: self._update_buffer() return reward def _get_screen(self, resized_frame): self._resize_frame(self.merge_frame.max(axis = 0), resized_frame) def _resize_frame(self, src_frame, dst_frame): cv2.resize(src = src_frame, dst = dst_frame, dsize = (self.width, self.height), interpolation = cv2.INTER_LINEAR) def _open_log_files(self, agent, folder): time_str = time.strftime("_%m-%d-%H-%M", time.localtime()) base_rom_name = os.path.splitext(os.path.basename(self.rom_name))[0] if folder is not None: self.log_dir = folder self.network_dir = self.log_dir + '/network' else: self.log_dir = '../run_results/' + base_rom_name + time_str self.network_dir = self.log_dir + '/network' info_name = get_next_name(self.log_dir, 'info', 'txt') git_name = get_next_name(self.log_dir, 'git-diff', '') try: os.stat(self.log_dir) except OSError: os.makedirs(self.log_dir) try: os.stat(self.network_dir) except OSError: os.makedirs(self.network_dir) with open(os.path.join(self.log_dir, info_name), 'w') as f: f.write('Commit: ' + subprocess.check_output(['git', 'rev-parse' , 'HEAD'])) f.write('Run command: ') f.write(' '.join(pipes.quote(x) for x in sys.argv)) f.write('\n\n') f.write(agent.get_info()) write_info(f, Environment) write_info(f, agent.__class__) write_info(f, agent.network.__class__) # From https://github.com/spragunr/deep_q_rl/pull/49/files with open(os.path.join(self.log_dir, git_name), 'w') as f: f.write(subprocess.check_output(['git', 'diff', 'HEAD'])) if folder is not None: return with open(os.path.join(self.log_dir, 'results.csv'), 'w') as f: f.write("epoch,episode_train,validate_values,evaluate_reward"\ ",train_time,test_time,steps_per_second\n") mem = psutil.virtual_memory() with open(os.path.join(self.log_dir, 'memory.csv'), 'w') as f: f.write("epoch,available,free,buffers,cached"\ ",available_readable,used_percent\n") f.write("%d,%d,%d,%d,%d,%s,%.1f\n" % \ (0, mem.available, mem.free, mem.buffers, mem.cached , bytes2human(mem.available), mem.percent)) def _update_log_files(self, agent, epoch, episode, valid_values , eval_values, train_time, test_time, step_per_sec , store_freq): print "Updating log files" with open(self.log_dir + '/results.csv', 'a') as f: f.write("%d,%d,%.4f,%.4f,%d,%d,%.4f\n" % \ (epoch, episode, valid_values, eval_values , train_time, test_time, step_per_sec)) mem = psutil.virtual_memory() with open(self.log_dir + '/memory.csv', 'a') as f: f.write("%d,%d,%d,%d,%d,%s,%.1f\n" % \ (epoch, mem.available, mem.free, mem.buffers, mem.cached , bytes2human(mem.available), mem.percent)) agent.dump_network(self.network_dir + ('/%03d' % (epoch)) + '.npz') if (store_freq >= 0 and epoch >= Environment.EPOCH_COUNT) or \ (store_freq > 0 and (epoch % store_freq == 0)): agent.dump_exp(self.network_dir + '/exp.npz') def _setup_record(self, network_file): file_name, _ = os.path.splitext(os.path.basename(network_file)) time_str = time.strftime("_%m-%d-%H-%M", time.localtime()) img_dir = os.path.dirname(network_file) + '/images_' \ + file_name + time_str rom_name, _ = os.path.splitext(self.rom_name) out_name = os.path.dirname(network_file) + '/' + rom_name + '_' \ + file_name + time_str + '.mov' print out_name try: os.stat(img_dir) except OSError: os.makedirs(img_dir) self.api.setString('record_screen_dir', img_dir) self.api.loadROM('../rom/' + self.rom_name) return img_dir, out_name def record_run(self, agent, network_file, episode_id = 1): if episode_id > 1: self.evaluate(agent, episode_id - 1) system_state = self.api.cloneSystemState() img_dir, out_name = self._setup_record(network_file) if episode_id > 1: self.api.restoreSystemState(system_state) self.evaluate(agent, 1) script = \ """ { ffmpeg -r 60 -i %s/%%06d.png -f mov -c:v libx264 %s } || { avconv -r 60 -i %s/%%06d.png -f mov -c:v libx264 %s } """ % (img_dir, out_name, img_dir, out_name) os.system(script)
t0 = time.time() scoreEpisode = 0.0 cost_average = 0.0 frameCount = 0 frameCountLast = frameCount terminal = 1 t0s = t1s = t2s = t3s = t4s =t5s =t6s=t7s= 0 ale.reset_game() for frameCount in xrange(maxFrame): # t00 = time.time() lives = ale.lives() observe = Scale(ale.getScreen()) # 0.08s # t1 = time.time() # t1s += t1 - t00 if terminal: actionIndex = 1 else: if np.random.rand(1) > explorationRate: [actionIndex, actionValue] = forward([np.transpose(memory.History,[1,2,0])],Q_train, all=False) else: actionIndex = np.random.randint(len(legal_actions)) # get action # t2 = time.time() # t2s += t2 - t1
class AtariPlayer(gym.Env): """ A wrapper for ALE emulator, with configurations to mimic DeepMind DQN settings. Info: score: the accumulated reward in the current game gameOver: True when the current game is Over """ def __init__(self, rom_file, viz=0, frame_skip=4, nullop_start=30, live_lost_as_eoe=True, max_num_frames=0): """ Args: rom_file: path to the rom frame_skip: skip every k frames and repeat the action viz: visualization to be done. Set to 0 to disable. Set to a positive number to be the delay between frames to show. Set to a string to be a directory to store frames. nullop_start: start with random number of null ops. live_losts_as_eoe: consider lost of lives as end of episode. Useful for training. max_num_frames: maximum number of frames per episode. """ super(AtariPlayer, self).__init__() if not os.path.isfile(rom_file) and '/' not in rom_file: rom_file = get_dataset_path('atari_rom', rom_file) assert os.path.isfile(rom_file), \ "rom {} not found. Please download at {}".format(rom_file, ROM_URL) try: ALEInterface.setLoggerMode(ALEInterface.Logger.Error) except AttributeError: if execute_only_once(): logger.warn("You're not using latest ALE") # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86 with _ALE_LOCK: self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt(b"random_seed", self.rng.randint(0, 30000)) self.ale.setInt(b"max_num_frames_per_episode", max_num_frames) self.ale.setBool(b"showinfo", False) self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b'color_averaging', False) # manual.pdf suggests otherwise. self.ale.setFloat(b'repeat_action_probability', 0.0) # viz setup if isinstance(viz, six.string_types): assert os.path.isdir(viz), viz self.ale.setString(b'record_screen_dir', viz) viz = 0 if isinstance(viz, int): viz = float(viz) self.viz = viz if self.viz and isinstance(self.viz, float): self.windowname = os.path.basename(rom_file) cv2.startWindowThread() cv2.namedWindow(self.windowname) self.ale.loadROM(rom_file.encode('utf-8')) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() self.live_lost_as_eoe = live_lost_as_eoe self.frame_skip = frame_skip self.nullop_start = nullop_start self.action_space = spaces.Discrete(len(self.actions)) self.observation_space = spaces.Box( low=0, high=255, shape=(self.height, self.width)) self._restart_episode() def get_action_meanings(self): return [ACTION_MEANING[i] for i in self.actions] def _grab_raw_image(self): """ :returns: the current 3-channel image """ m = self.ale.getScreenRGB() return m.reshape((self.height, self.width, 3)) def _current_state(self): """ :returns: a gray-scale (h, w) uint8 image """ ret = self._grab_raw_image() # max-pooled over the last screen ret = np.maximum(ret, self.last_raw_screen) if self.viz: if isinstance(self.viz, float): cv2.imshow(self.windowname, ret) cv2.waitKey(int(self.viz * 1000)) ret = ret.astype('float32') # 0.299,0.587.0.114. same as rgb2y in torch/image ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY) return ret.astype('uint8') # to save some memory def _restart_episode(self): with _ALE_LOCK: self.ale.reset_game() # random null-ops start n = self.rng.randint(self.nullop_start) self.last_raw_screen = self._grab_raw_image() for k in range(n): if k == n - 1: self.last_raw_screen = self._grab_raw_image() self.ale.act(0) def _reset(self): if self.ale.game_over(): self._restart_episode() return self._current_state() def _step(self, act): oldlives = self.ale.lives() r = 0 for k in range(self.frame_skip): if k == self.frame_skip - 1: self.last_raw_screen = self._grab_raw_image() r += self.ale.act(self.actions[act]) newlives = self.ale.lives() if self.ale.game_over() or \ (self.live_lost_as_eoe and newlives < oldlives): break isOver = self.ale.game_over() if self.live_lost_as_eoe: isOver = isOver or newlives < oldlives info = {'ale.lives': newlives} return self._current_state(), r, isOver, info
class AtariEmulator: def __init__(self, args): ''' Initialize Atari environment ''' # Parameters self.buffer_length = args.buffer_length self.screen_dims = args.screen_dims self.frame_skip = args.frame_skip self.blend_method = args.blend_method self.reward_processing = args.reward_processing self.max_start_wait = args.max_start_wait self.history_length = args.history_length self.start_frames_needed = self.buffer_length - 1 + ((args.history_length - 1) * self.frame_skip) #Initialize ALE instance self.ale = ALEInterface() self.ale.setFloat(b'repeat_action_probability', 0.0) if args.watch: self.ale.setBool(b'sound', True) self.ale.setBool(b'display_screen', True) self.ale.loadROM(str.encode(args.rom_path + '/' + args.game + '.bin')) self.buffer = np.empty((self.buffer_length, 210, 160)) self.current = 0 self.action_set = self.ale.getMinimalActionSet() self.lives = self.ale.lives() self.reset() def get_possible_actions(self): ''' Return list of possible actions for game ''' return self.action_set def get_screen(self): ''' Add screen to frame buffer ''' self.buffer[self.current] = np.squeeze(self.ale.getScreenGrayscale()) self.current = (self.current + 1) % self.buffer_length def reset(self): self.ale.reset_game() self.lives = self.ale.lives() if self.max_start_wait < 0: print("ERROR: max start wait decreased beyond 0") sys.exit() elif self.max_start_wait <= self.start_frames_needed: wait = 0 else: wait = random.randint(0, self.max_start_wait - self.start_frames_needed) for _ in range(wait): self.ale.act(self.action_set[0]) # Fill frame buffer self.get_screen() for _ in range(self.buffer_length - 1): self.ale.act(self.action_set[0]) self.get_screen() # get initial_states state = [(self.preprocess(), 0, 0, False)] for step in range(self.history_length - 1): state.append(self.run_step(0)) # make sure agent hasn't died yet if self.isTerminal(): print("Agent lost during start wait. Decreasing max_start_wait by 1") self.max_start_wait -= 1 return self.reset() return state def run_step(self, action): ''' Apply action to game and return next screen and reward ''' raw_reward = 0 for step in range(self.frame_skip): raw_reward += self.ale.act(self.action_set[action]) self.get_screen() reward = None if self.reward_processing == 'clip': reward = np.clip(raw_reward, -1, 1) else: reward = raw_reward terminal = self.isTerminal() self.lives = self.ale.lives() return (self.preprocess(), action, reward, terminal, raw_reward) def preprocess(self): ''' Preprocess frame for agent ''' img = None if self.blend_method == "max": img = np.amax(self.buffer, axis=0) return cv2.resize(img, self.screen_dims, interpolation=cv2.INTER_LINEAR) def isTerminal(self): return (self.isGameOver() or (self.lives > self.ale.lives())) def isGameOver(self): return self.ale.game_over()
class ALEEnvironment(BaseEnvironment): """ The :class:`MinimalGameHandler` class takes care of the interface to the ALE and tries to do nothing else. It's meant for advanced users who need fine control over every aspect of the process. It has many functions that are simply wrappers of the underlying ALE but with pythonic names/usage. Parameters ---------- rom : byte string Specifies the directory to load the rom from. Must be a byte string: b'dir_for_rom/rom.bin' display_screen : boolean Default False. Whether or not to show the game. True takes longer to run but can be fun to watch step_cap: int Default None. Maximum number of steps to run in an episode. Breakout can sometimes not return terminal even when game is ended. This fixes that and will return terminal after stepping above this count """ def __init__(self, rom, resize_shape=(84, 84), skip_frame=1, repeat_action_probability=0.0, step_cap=None, loss_of_life_termination=False, loss_of_life_negative_reward=False, grayscale=True, display_screen=False, seed=np.random.RandomState()): # set up emulator self.ale = ALEInterface() if display_screen: self.ale.setBool(b'display_screen', True) self.ale.setInt(b'frame_skip', skip_frame) self.ale.setInt(b'random_seed', seed.randint(0, 9999)) self.ale.setFloat(b'repeat_action_probability', repeat_action_probability) self.ale.setBool(b'color_averaging', False) self.ale.loadROM(rom.encode()) # setup gamescreen object. I think this is faster than recreating an empty each time width, height = self.ale.getScreenDims() channels = 1 if grayscale else 3 self.grayscale = grayscale self.gamescreen = np.empty((height, width, 1), dtype=np.uint8) self.resize_shape = resize_shape self.skip_frame = skip_frame self.step_cap = step_cap self.curr_step_count = 0 # setup action converter # ALE returns legal action indexes, convert these to just numbers self.action_inds = self.ale.getMinimalActionSet() # setup lives self.loss_of_life_negative_reward = loss_of_life_negative_reward self.cur_lives = self.ale.lives() self.loss_of_life_termination = loss_of_life_termination self.life_lost = False def reset(self): self.ale.reset_game() self.cur_lives = self.ale.lives() self.life_lost = False self.curr_step_count = 0 def step(self, action): self.curr_step_count += 1 ale_action = self.action_inds[action] return self._step(ale_action) def _step(self, ale_action): if not self.loss_of_life_termination and not self.loss_of_life_negative_reward: return self.ale.act(ale_action) else: rew = self.ale.act(ale_action) new_lives = self.ale.lives() if new_lives < self.cur_lives: # if loss of life is negative reward subtract 1 from reward if self.loss_of_life_negative_reward: rew -= 1 self.cur_lives = new_lives self.life_lost = True return rew def get_state(self): if self.grayscale: self.gamescreen = self.ale.getScreenGrayscale(self.gamescreen) else: self.gamescreen = self.ale.getScreenRGB(self.gamescreen) # if resize_shape is none then don't resize if self.resize_shape is not None: # if grayscale we remove the last dimmension (channel) if self.grayscale: processedImg = imresize(self.gamescreen[:, :, 0], self.resize_shape) else: processedImg = imresize(self.gamescreen, self.resize_shape) return processedImg def get_state_shape(self): return self.resize_shape def get_terminal(self): if self.loss_of_life_termination and self.life_lost: return True elif self.step_cap is not None and self.curr_step_count > self.step_cap: return True else: return self.ale.game_over() def get_num_actions(self): return len(self.action_inds)
class AtariPlayer(RLEnvironment): """ A wrapper for atari emulator. """ def __init__(self, rom_file, viz=0, height_range=(None,None), frame_skip=4, image_shape=(84, 84), nullop_start=30, live_lost_as_eoe=True): """ :param rom_file: path to the rom :param frame_skip: skip every k frames and repeat the action :param image_shape: (w, h) :param height_range: (h1, h2) to cut :param viz: visualization to be done. Set to 0 to disable. Set to a positive number to be the delay between frames to show. Set to a string to be a directory to store frames. :param nullop_start: start with random number of null ops :param live_losts_as_eoe: consider lost of lives as end of episode. useful for training. """ super(AtariPlayer, self).__init__() self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt("random_seed", self.rng.randint(0, 10000)) self.ale.setBool("showinfo", False) try: ALEInterface.setLoggerMode(ALEInterface.Logger.Warning) except AttributeError: log_once() self.ale.setInt("frame_skip", 1) self.ale.setBool('color_averaging', False) # manual.pdf suggests otherwise. may need to check self.ale.setFloat('repeat_action_probability', 0.0) # viz setup if isinstance(viz, six.string_types): assert os.path.isdir(viz), viz self.ale.setString('record_screen_dir', viz) viz = 0 if isinstance(viz, int): viz = float(viz) self.viz = viz if self.viz and isinstance(self.viz, float): self.windowname = os.path.basename(rom_file) cv2.startWindowThread() cv2.namedWindow(self.windowname) self.ale.loadROM(rom_file) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() self.live_lost_as_eoe = live_lost_as_eoe self.frame_skip = frame_skip self.nullop_start = nullop_start self.height_range = height_range self.image_shape = image_shape self.current_episode_score = StatCounter() self.restart_episode() def _grab_raw_image(self): """ :returns: the current 3-channel image """ m = self.ale.getScreenRGB() return m.reshape((self.height, self.width, 3)) def current_state(self): """ :returns: a gray-scale (h, w, 1) image """ ret = self._grab_raw_image() # max-pooled over the last screen ret = np.maximum(ret, self.last_raw_screen) if self.viz: if isinstance(self.viz, float): cv2.imshow(self.windowname, ret) time.sleep(self.viz) ret = ret[self.height_range[0]:self.height_range[1],:] # 0.299,0.587.0.114. same as rgb2y in torch/image ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY) ret = cv2.resize(ret, self.image_shape) ret = np.expand_dims(ret, axis=2) return ret def get_num_actions(self): """ :returns: the number of legal actions """ return len(self.actions) def restart_episode(self): if self.current_episode_score.count > 0: self.stats['score'].append(self.current_episode_score.sum) self.current_episode_score.reset() self.ale.reset_game() # random null-ops start n = self.rng.randint(self.nullop_start) self.last_raw_screen = self._grab_raw_image() for k in range(n): if k == n - 1: self.last_raw_screen = self._grab_raw_image() self.ale.act(0) def action(self, act): """ :param act: an index of the action :returns: (reward, isOver) """ oldlives = self.ale.lives() r = 0 for k in range(self.frame_skip): if k == self.frame_skip - 1: self.last_raw_screen = self._grab_raw_image() r += self.ale.act(self.actions[act]) newlives = self.ale.lives() if self.ale.game_over() or \ (self.live_lost_as_eoe and newlives < oldlives): break self.current_episode_score.feed(r) isOver = self.ale.game_over() if isOver: self.restart_episode() if self.live_lost_as_eoe: isOver = isOver or newlives < oldlives return (r, isOver) def get_stat(self): try: return {'avg_score': np.mean(self.stats['score']), 'max_score': float(np.max(self.stats['score'])) } except ValueError: return {}
class ALE_ENVIRONMENT(object): def __init__(self, rom_file, session): self.load_rom(rom_file) self.session = session self.actions_set = self.ale.getMinimalActionSet().tolist() self.action_to_index = {a: i for i, a in enumerate(self.actions_set)} #self.state_dummy = 255*np.ones((1,config.STATE_DIM[0],config.STATE_DIM[1],config.SKIP_FRAMES)) self.states_dim = (config.STATE_DIM[0], config.STATE_DIM[1], config.SKIP_FRAMES) self.state_dummy = 255 * np.ones((1, np.prod(self.states_dim))) self.actions_dim = len(self.actions_set) self.preprocess_stack = deque([], config.SKIP_FRAMES) self.max_episode_len = config.MAX_EPISODE_LEN def load_rom(self, rom_file): self.ale = ALEInterface() self.ale.setInt(str.encode('random_seed'), 123) self.ale.setFloat(str.encode('repeat_action_probability'), 0.0) self.ale.setBool(str.encode('sound'), False) self.ale.setBool(str.encode('display_screen'), config.USE_SDL) self.ale.loadROM(str.encode(rom_file)) def generate_episodes(self, num_episodes, agent): print("Generateting %d episodes" % num_episodes) episodes = [] lives = self.ale.lives() _, dist = agent.act(self.state_dummy) print("pi_theta for blank white state:", dist) for i in range(num_episodes): print("Starting episode %d" % i) episode_done = False states = [] actions_dist = [] actions = [0] rewards = [0] self.skip_frames(states, config.SKIP_FRAMES) i = 0 while not episode_done and i < self.max_episode_len: action_idx, action_dist = agent.act(states[-1]) reward = 0 for _ in range(config.SKIP_FRAMES): reward = reward + np.clip( self.ale.act(self.actions_set[action_idx]), -1, 1) self.preprocess_stack.append(self.ale.getScreenRGB()) i += 1 state = imf.preprocess(self.preprocess_stack, True) states.append(state) actions_dist.append([action_dist]) actions.append(action_idx) rewards.append(reward) episode_done = self.ale.game_over() or (self.ale.lives() < lives) episodes.append({ 'states': np.concatenate(states[1:]), 'actions_dist': np.concatenate(actions_dist), 'actions': np.array(actions[1:]), 'rewards': np.array(rewards[1:]) }) self.ale.reset_game() return episodes def reset(self): self.ale.reset_game() self.preprocess_stack = deque([], config.SKIP_FRAMES) def skip_frames(self, states, num_frames): #perform nullops for _ in range(num_frames): self.ale.act(0) self.preprocess_stack.append(self.ale.getScreenRGB()) if len(self.preprocess_stack) < config.SKIP_FRAMES: self.ale.act(0) self.preprocess_stack.append(self.ale.getScreenRGB()) states.append(imf.preprocess(self.preprocess_stack, True))
class ALE(Environment): def __init__(self, rom, frame_skip=1, repeat_action_probability=0.0, loss_of_life_termination=False, loss_of_life_reward=0, display_screen=False, seed=np.random.RandomState()): """ Initialize ALE. Args: rom: Rom filename and directory. frame_skip: Repeat action for n frames. Default 1. repeat_action_probability: Repeats last action with given probability. Default 0. loss_of_life_termination: Signals a terminal state on loss of life. Default False. loss_of_life_reward: Reward/Penalty on loss of life (negative values are a penalty). Default 0. display_screen: Displays the emulator screen. Default False. seed: Random seed """ self.ale = ALEInterface() self.rom = rom self.ale.setBool(b'display_screen', display_screen) self.ale.setInt(b'random_seed', seed.randint(0, 9999)) self.ale.setFloat(b'repeat_action_probability', repeat_action_probability) self.ale.setBool(b'color_averaging', False) self.ale.setInt(b'frame_skip', frame_skip) # all set commands must be done before loading the ROM self.ale.loadROM(rom.encode()) # setup gamescreen object width, height = self.ale.getScreenDims() self.gamescreen = np.empty((height, width, 3), dtype=np.uint8) self.frame_skip = frame_skip # setup action converter # ALE returns legal action indexes, convert these to just numbers self.action_inds = self.ale.getMinimalActionSet() # setup lives self.loss_of_life_reward = loss_of_life_reward self.cur_lives = self.ale.lives() self.loss_of_life_termination = loss_of_life_termination self.life_lost = False def __str__(self): return 'ALE({})'.format(self.rom) def close(self): self.ale = None def reset(self): self.ale.reset_game() self.cur_lives = self.ale.lives() self.life_lost = False # clear gamescreen self.gamescreen = np.empty(self.gamescreen.shape, dtype=np.uint8) return self.current_state def execute(self, action): # convert action to ale action ale_action = self.action_inds[action] # get reward and process terminal & next state rew = self.ale.act(ale_action) if self.loss_of_life_termination or self.loss_of_life_reward != 0: new_lives = self.ale.lives() if new_lives < self.cur_lives: self.cur_lives = new_lives self.life_lost = True rew += self.loss_of_life_reward terminal = self.is_terminal state_tp1 = self.current_state return state_tp1, rew, terminal @property def states(self): return dict(shape=self.gamescreen.shape, type=float) @property def actions(self): return dict(continuous=False, num_actions=len(self.action_inds), names=self.action_names) @property def current_state(self): self.gamescreen = self.ale.getScreenRGB(self.gamescreen) return np.copy(self.gamescreen) @property def is_terminal(self): if self.loss_of_life_termination and self.life_lost: return True else: return self.ale.game_over() @property def action_names(self): action_names = [ 'No-Op', 'Fire', 'Up', 'Right', 'Left', 'Down', 'Up Right', 'Up Left', 'Down Right', 'Down Left', 'Up Fire', 'Right Fire', 'Left Fire', 'Down Fire', 'Up Right Fire', 'Up Left Fire', 'Down Right Fire', 'Down Left Fire' ] return np.asarray(action_names)[self.action_inds]
def train(gamepath, n_episodes, display_screen, record_weights, reduce_exploration_prob_amount, n_frames_to_skip, exploration_prob, verbose, discount, learning_rate, load_weights, frozen_target_update_period, use_replay_mem): """ :description: trains an agent to play a game :type gamepath: string :param gamepath: path to the binary of the game to be played :type n_episodes: int :param n_episodes: number of episodes of the game on which to train display_screen : whether or not to display the screen of the game record_weights : whether or not to save the weights of the nextwork reduce_exploration_prob_amount : amount to reduce exploration prob each episode to not reduce exploration_prob set to 0 n_frames_to_skip : how frequently to determine a new action to use exploration_prob : probability of choosing a random action verbose : whether or not to print information about the run periodically discount : discount factor used in learning learning_rate : the scaling factor for the sgd update load_weights : whether or not to load weights for the network (set the files directly below) frozen_target_update_period : the number of episodes between reseting the target of the network """ # load the ale interface to interact with ale = ALEInterface() ale.setInt('random_seed', 42) # display/recording settings, doesn't seem to work currently recordings_dir = './recordings/breakout/' # previously "USE_SDL" if display_screen: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX #ale.setString("record_screen_dir", recordings_dir); elif sys.platform.startswith('linux'): ale.setBool('sound', True) ale.setBool('display_screen', True) ale.loadROM(gamepath) ale.setInt("frame_skip", n_frames_to_skip) # real actions for breakout are [0,1,3,4] real_actions = ale.getMinimalActionSet() # use a list of actions [0,1,2,3] to index into the array of real actions actions = np.arange(len(real_actions)) # these theano variables are used to define the symbolic input of the network features = T.dvector('features') action = T.lscalar('action') reward = T.dscalar('reward') next_features = T.dvector('next_features') # load weights by file name # currently must be loaded by individual hidden layers if load_weights: hidden_layer_1 = file_utils.load_model('weights/hidden0_replay.pkl') hidden_layer_2 = file_utils.load_model('weights/hidden1_replay.pkl') else: # defining the hidden layer network structure # the n_hid of a prior layer must equal the n_vis of a subsequent layer # for q-learning the output layer must be of len(actions) hidden_layer_1 = HiddenLayer(n_vis=NNET_INPUT_DIMENSION, n_hid=NNET_INPUT_DIMENSION, layer_name='hidden1', activation='relu') hidden_layer_2 = HiddenLayer(n_vis=NNET_INPUT_DIMENSION, n_hid=NNET_INPUT_DIMENSION, layer_name='hidden2', activation='relu') hidden_layer_3 = HiddenLayer(n_vis=NNET_INPUT_DIMENSION, n_hid=len(actions), layer_name='hidden3', activation='relu') # the output layer is currently necessary when using tanh units in the # hidden layer in order to prevent a theano warning # currently the relu unit setting of the hidden and output layers is leaky w/ alpha=0.01 output_layer = OutputLayer(layer_name='output', activation='relu') # pass a list of layers to the constructor of the network (here called "mlp") layers = [hidden_layer_1, hidden_layer_2, hidden_layer_3, output_layer] qnetwork = QNetwork(layers, discount=discount, learning_rate=learning_rate) # this call gets the symbolic output of the network # along with the parameter updates expected loss, updates = qnetwork.get_loss_and_updates(features, action, reward, next_features) # this defines the theano symbolic function used to train the network # 1st argument is a list of inputs, here the symbolic variables above # 2nd argument is the symbolic output expected # 3rd argument is the dictionary of parameter updates # 4th argument is the compilation mode train_model = theano.function( [theano.Param(features, default=np.zeros(NNET_INPUT_DIMENSION)), theano.Param(action, default=0), theano.Param(reward, default=0), theano.Param(next_features, default=np.zeros(NNET_INPUT_DIMENSION))], outputs=loss, updates=updates, mode='FAST_RUN') sym_action = qnetwork.get_action(features) get_action = theano.function([features], sym_action) # some containers for collecting information about the training processes rewards = [] losses = [] best_reward = 4 sequence_examples = [] sampled_examples = [] # the preprocessor and feature extractor to use preprocessor = screen_utils.RGBScreenPreprocessor() feature_extractor = feature_extractors.NNetOpenCVBoundingBoxExtractor(max_features=MAX_FEATURES) if use_replay_mem: replay_mem = ReplayMemory() # main training loop, each episode is a full playthrough of the game for episode in xrange(n_episodes): # this implements the frozen target component of the network # by setting the frozen layers of the network to a copy of the current layers if episode % frozen_target_update_period == 0: qnetwork.frozen_layers = copy.deepcopy(qnetwork.layers) # some variables for collecting information about this particular run of the game total_reward = 0 action = 1 counter = 0 reward = 0 loss = 0 previous_param_0 = None # lives here is used for the reward heuristic of subtracting 1 from the reward # when we lose a life. currently commented out this functionality because # i think it might not be helpful. lives = ale.lives() # the initial state of the screen and state screen = np.zeros((preprocessor.dim, preprocessor.dim, preprocessor.channels)) state = { "screen" : screen, "objects" : None, "prev_objects": None, "features": np.zeros(MAX_FEATURES)} # start the actual play through of the game while not ale.game_over(): counter += 1 # get the current features, which is the representation of the state provided to # the "agent" (here just the network directly) features = state["features"] # epsilon greedy action selection (note that exploration_prob is reduced by # reduce_exploration_prob_amount after every game) if random.random() < exploration_prob: action = random.choice(actions) else: # to choose an action from the network, we fprop # the current state and take the argmax of the output # layer (i.e., the action that corresponds to the # maximum q value) action = get_action(features) # take the action and receive the reward reward += ale.act(real_actions[action]) # this is commented out because i think it might not be helpful if ale.lives() < lives: lives = ale.lives() reward -= 1 # get the next screen, preprocess it, initialize the next state next_screen = ale.getScreenRGB() next_screen = preprocessor.preprocess(next_screen) next_state = {"screen": next_screen, "objects": None, "prev_objects": state["objects"]} # get the features for the next state next_features = feature_extractor(next_state, action=None) if use_replay_mem: sars_tuple = (features, action, reward, next_features) replay_mem.store(sars_tuple) num_samples = 5 if replay_mem.isFull() else 1 for i in range(0, num_samples): random_train_tuple = replay_mem.sample() loss += train_model(*random_train_tuple) # collect for pca sequence_examples.append(list(sars_tuple[0]) + [sars_tuple[1]] \ + [sars_tuple[2]] + sars_tuple[3]) sequence_examples = sequence_examples[-100:] sampled_examples.append(list(random_train_tuple[0]) + [random_train_tuple[1]] \ + [random_train_tuple[2]] + random_train_tuple[3]) sampled_examples = sampled_examples[-100:] else: # call the train model function loss += train_model(features, action, reward, next_features) # prepare for the next loop through the game next_state["features"] = next_features state = next_state # weird counter value to avoid interaction with any other counter # loop that might be added, not necessary right now if verbose and counter % PRINT_TRAINING_INFO_PERIOD == 0: print('*' * 15 + ' training information ' + '*' * 15) print('episode: {}'.format(episode)) print('reward: \t{}'.format(reward)) print('avg reward: \t{}'.format(np.mean(rewards))) print 'avg reward (last 25): \t{}'.format(np.mean(rewards[-NUM_EPISODES_AVERAGE_REWARD_OVER:])) print('action: \t{}'.format(real_actions[action])) print('exploration prob: {}'.format(exploration_prob)) param_info = [(p.eval(), p.name) for p in qnetwork.get_params()] for index, (val, name) in enumerate(param_info): if previous_param_0 is None and index == 0: previous_param_0 = val print('parameter {} value: \n{}'.format(name, val)) if index == 0: diff = val - previous_param_0 print('difference from previous param {}: \n{}'.format(name, diff)) print('features: \t{}'.format(features)) print('next_features: \t{}'.format(next_features)) scaled_sequence = preprocessing.scale(np.array(sequence_examples)) scaled_sampled = preprocessing.scale(np.array(sampled_examples)) pca = PCA() _ = pca.fit_transform(scaled_sequence) print('variance explained by first component for sequence: {}%'.format(pca. \ explained_variance_ratio_[0] * 100)) _ = pca.fit_transform(scaled_sampled) print('variance explained by first component for sampled: {}%'.format(pca. \ explained_variance_ratio_[0] * 100)) print('*' * 52) print('\n') # collect info and total reward and also reset the reward to 0 if we reach this point total_reward += reward reward = 0 # collect stats from this game run losses.append(loss) rewards.append(total_reward) # if we got a best reward, inform the user if total_reward > best_reward: best_reward = total_reward print("best reward!: {}".format(total_reward)) # record the weights if record_weights=True # must record the weights of the indiviual layers # only save hidden layers b/c output layer does not have weights if episode != 0 and episode % RECORD_WEIGHTS_PERIOD == 0 and record_weights: file_utils.save_rewards(rewards) file_utils.save_model(qnetwork.layers[0], 'weights/hidden0_{}.pkl'.format(episode)) file_utils.save_model(qnetwork.layers[1], 'weights/hidden1_{}.pkl'.format(episode)) # reduce exploration policy over time if exploration_prob > MINIMUM_EXPLORATION_EPSILON: exploration_prob -= reduce_exploration_prob_amount # inform user of how the episode went and reset the game print('episode: {} ended with score: {}\tloss: {}'.format(episode, rewards[-1], losses[-1])) ale.reset_game() # return the list of rewards attained return rewards
class AtariWrapper(): """ ALE wrapper that tries to mimic the options in the DQN paper including the preprocessing (except resizing/cropping) """ action_words = [ 'NOOP', 'UP', 'RIGHT', 'LEFT', 'DOWN', "UPRIGHT", "UPLEFT", "DOWNRIGHT", "DOWNLEFT" ] _action_set = [0, 2, 3, 4, 5, 6, 7, 8, 9] #Valid actions for ALE. #Possible actions are just a list from 0,num_valid_actions #We still need to map from the latter to the former when possible_actions = list(range(len(_action_set))) def __init__(self, rom_path, seed=123, frameskip=4, show_display=False, stack_num_states=4, concatenate_state_every=4): """ Parameters: Frameskip should be either a tuple (indicating a random range to choose from, with the top value exclude), or an int. It's aka action repeat. stack_num_states: Number of dimensions/channels to have. concatenate_state_every: After how many frames should one channel be appended to state. Number is in terms of absolute frames independent of frameskip """ self.stack_num_states = stack_num_states self.concatenate_state_every = concatenate_state_every self.game_path = rom_path if not os.path.exists(self.game_path): raise IOError('You asked for game %s but path %s does not exist' % (game, self.game_path)) self.frameskip = frameskip try: self.ale = ALEInterface() except Exception as e: print( "ALEInterface could not be loaded. ale_python_interface import failed" ) raise e #Set some default options self.ale.setInt(b'random_seed', seed) self.ale.setBool(b'sound', False) self.ale.setBool(b'display_screen', show_display) self.ale.setFloat(b'repeat_action_probability', 0.) #Load the rom self.ale.loadROM(self.game_path) (self.screen_width, self.screen_height) = self.ale.getScreenDims() self.latest_frame_fifo = deque( maxlen=2) #Holds the two closest frames to max. self.state_fifo = deque(maxlen=stack_num_states) def _step(self, a, force_noop=False): """Perform one step of the environment. Automatically repeats the step self.frameskip number of times parameters: force_noop: Force it to perform a no-op ignoring the action supplied. """ assert a in self.possible_actions + [0] if force_noop: action, num_steps = 0, 1 else: action = self._action_set[a] if isinstance(self.frameskip, int): num_steps = self.frameskip else: num_steps = np.random.randint(self.frameskip[0], self.frameskip[1]) reward = 0.0 for i in range(num_steps): reward += self.ale.act(action) cur_frame = self.observe_raw(get_rgb=True) cur_frame_cropped = self.crop_frame(cur_frame) self.latest_frame_fifo.append(cur_frame_cropped) if i % self.concatenate_state_every == 0: curmax_frame = np.amax(self.latest_frame_fifo, axis=0) frame_lumi = self.convert_to_gray(curmax_frame) self.state_fifo.append(frame_lumi) #Transpose so we get HxWxC instead of CxHxW self.current_frame = np.array(np.transpose(self.state_fifo, (1, 2, 0))) return self.current_frame, reward, self.ale.game_over(), { "ale.lives": self.ale.lives() } def step(self, *args, **kwargs): """Performs one step of the environment """ lives_before = self.ale.lives() next_state, reward, done, info = self._step(*args, **kwargs) lives_after = self.ale.lives() # End the episode when a life is lost if lives_before > lives_after: done = True return next_state, reward, done, info def observe_raw(self, get_rgb=False): """Observe either RGB or Gray frames. Initialzing arrays forces it to not modify stale pointers """ if get_rgb: cur_frame_rgb = np.zeros( (self.screen_height, self.screen_width, 3), dtype=np.uint8) self.ale.getScreenRGB(cur_frame_rgb) return cur_frame_rgb else: cur_frame_gray = np.zeros((self.screen_height, self.screen_width), dtype=np.uint8) self.ale.getScreenGrayscale(cur_frame_gray) return cur_frame_gray def crop_frame(self, frame): """Simply crops a frame. Does nothing by default. """ return frame def convert_to_gray(self, img): """Get Luminescence channel """ img_f = np.float32(img) img_lumi = 0.299*img_f[:,:,0] + \ 0.587*img_f[:,:,1] + \ 0.114*img_f[:,:,2] return np.uint8(img_lumi) def reset(self): """Reset the game """ self.ale.reset_game() s = self.observe_raw(get_rgb=True) s = self.crop_frame(s) #Populate missing frames with blank ones. for _ in range(self.stack_num_states - 1): self.state_fifo.append(np.zeros(shape=(s.shape[0], s.shape[1]))) self.latest_frame_fifo.append(s) #Push the latest frame curmax_frame = s frame_lumi = self.convert_to_gray(s) self.state_fifo.append(frame_lumi) self.state = np.transpose(self.state_fifo, (1, 2, 0)) return self.state def get_action_meanings(self): """Return in text what the actions correspond to. """ return [ACTION_MEANING[i] for i in self._action_set] def save_state(self): """Saves the current state and returns a identifier to saved state """ return self.ale.cloneSystemState() def restore_state(self, ident): """Restore game state Restores the saved state of the system and perform a no-op so a new frame can be generated incase a restore is followed by an observe() """ self.ale.restoreSystemState(ident) self.step(0, force_noop=True)
class Agent(): def __init__(self, game, agent_type, display, load_model, record, test): self.name = game self.agent_type = agent_type self.ale = ALEInterface() self.ale.setInt(str.encode('random_seed'), np.random.randint(100)) self.ale.setBool(str.encode('display_screen'), display or record) if record: self.ale.setString(str.encode('record_screen_dir'), str.encode('./data/recordings/{}/{}/tmp/'.format(game, agent_type))) self.ale.loadROM(str.encode('./roms/{}.bin'.format(self.name))) self.action_list = list(self.ale.getMinimalActionSet()) self.frame_shape = np.squeeze(self.ale.getScreenGrayscale()).shape if test: self.name += '_test' if 'space_invaders' in self.name: # Account for blinking bullets self.frameskip = 2 else: self.frameskip = 3 self.frame_buffer = deque(maxlen=4) if load_model and not record: self.load_replaymemory() else: self.replay_memory = ReplayMemory(500000, 32) model_input_shape = self.frame_shape + (4,) model_output_shape = len(self.action_list) if agent_type == 'dqn': self.model = DeepQN( model_input_shape, model_output_shape, self.action_list, self.replay_memory, self.name, load_model ) elif agent_type == 'double': self.model = DoubleDQN( model_input_shape, model_output_shape, self.action_list, self.replay_memory, self.name, load_model ) else: self.model = DuelingDQN( model_input_shape, model_output_shape, self.action_list, self.replay_memory, self.name, load_model ) print('{} Loaded!'.format(' '.join(self.name.split('_')).title())) print('Displaying: ', display) print('Frame Shape: ', self.frame_shape) print('Frame Skip: ', self.frameskip) print('Action Set: ', self.action_list) print('Model Input Shape: ', model_input_shape) print('Model Output Shape: ', model_output_shape) print('Agent: ', agent_type) def training(self, steps): ''' Trains the agent for :steps number of weight updates. Returns the average model loss ''' loss = [] # Initialize frame buffer. np.squeeze removes empty dimensions e.g. if shape=(210,160,__) self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale())) self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale())) self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale())) self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale())) try: for step in range(steps): gameover = False initial_state = np.stack(self.frame_buffer, axis=-1) action = self.model.predict_action(initial_state) # Backup data if step % 5000 == 0: self.model.save_model() self.model.save_hyperparams() self.save_replaymemory() # If using a target model check for weight updates if hasattr(self.model, 'tau'): if self.model.tau == 0: self.model.update_target_model() self.model.tau = 10000 else: self.model.tau -= 1 # Frame skipping technique https://danieltakeshi.github.io/2016/11/25/frame-skipping-and-preprocessing-for-deep-q-networks-on-atari-2600-games/ lives_before = self.ale.lives() for _ in range(self.frameskip): self.ale.act(action) reward = self.ale.act(action) self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale())) lives_after = self.ale.lives() if lives_after < lives_before: gameover = True # Taking advice from dude on reddit reward = -1 if self.ale.game_over(): gameover = True reward = -1 self.ale.reset_game() new_state = np.stack(self.frame_buffer, axis=-1) # Experiment with clipping rewards for stability purposes reward = np.clip(reward, -1, 1) self.replay_memory.add( initial_state, action, reward, gameover, new_state ) loss += self.model.replay_train() except: self.model.save_model() self.model.save_hyperparams() self.save_replaymemory() raise KeyboardInterrupt return np.mean(loss, axis=0) def simulate_random(self): print('Simulating game randomly') done = False total_reward = 0 while not done: action = np.random.choice(self.ale.getMinimalActionSet()) reward = self.ale.act(action) total_reward += reward if self.ale.game_over(): reward = -1 done = True reward = np.clip(reward, -1, 1) if reward != 0: print(reward) frames_survived = self.ale.getEpisodeFrameNumber() self.ale.reset_game() return total_reward, frames_survived def simulate_intelligent(self, evaluating=False): done = False total_score = 0 self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale())) self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale())) self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale())) self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale())) while not done: state = np.stack(self.frame_buffer, axis=-1) action = self.model.predict_action(state, evaluating) for _ in range(self.frameskip): self.ale.act(action) # Remember, ale.act returns the increase in game score with this action total_score += self.ale.act(action) # Pushes oldest frame out self.frame_buffer.append(np.squeeze(self.ale.getScreenGrayscale())) if self.ale.game_over(): done = True frames_survived = self.ale.getEpisodeFrameNumber() print(' Game Over') print(' Frames Survived: ', frames_survived) print(' Score: ', total_score) print('===========================') self.ale.reset_game() return total_score, frames_survived def save_replaymemory(self): with bz2.BZ2File('./data/{}/{}_replaymem.obj'.format(self.agent_type, self.name), 'wb') as f: pickle.dump(self.replay_memory, f, protocol=pickle.HIGHEST_PROTOCOL) print('Saved replay memory at ', datetime.now()) def load_replaymemory(self): try: with bz2.BZ2File('./data/{}/{}_replaymem.obj'.format(self.agent_type, self.name), 'rb') as f: self.replay_memory = pickle.load(f) print('Loaded replay memory at ', datetime.now()) except FileNotFoundError: print('No replay memory file found') raise KeyboardInterrupt
class ALEEnvironment(Environment): def __init__(self, rom_file, args): from ale_python_interface import ALEInterface self.ale = ALEInterface() if args.display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', args.frame_skip) self.ale.setFloat('repeat_action_probability', args.repeat_action_probability) self.ale.setBool('color_averaging', args.color_averaging) if args.random_seed: self.ale.setInt('random_seed', args.random_seed) if args.record_screen_path: if not os.path.exists(args.record_screen_path): logger.info("Creating folder %s" % args.record_screen_path) os.makedirs(args.record_screen_path) logger.info("Recording screens to %s", args.record_screen_path) self.ale.setString('record_screen_dir', args.record_screen_path) if args.record_sound_filename: logger.info("Recording sound to %s", args.record_sound_filename) self.ale.setBool('sound', True) self.ale.setString('record_sound_filename', args.record_sound_filename) self.ale.loadROM(rom_file) if args.minimal_action_set: self.actions = self.ale.getMinimalActionSet() logger.info("Using minimal action set with size %d" % len(self.actions)) else: self.actions = self.ale.getLegalActionSet() logger.info("Using full action set with size %d" % len(self.actions)) logger.debug("Actions: " + str(self.actions)) self.screen_width = args.screen_width self.screen_height = args.screen_height self.life_lost = False def numActions(self): return len(self.actions) def restart(self): # In test mode, the game is simply initialized. In train mode, if the game # is in terminal state due to a life loss but not yet game over, then only # life loss flag is reset so that the next game starts from the current # state. Otherwise, the game is simply initialized. if ( self.mode == 'test' or not self.life_lost or # `reset` called in a middle of episode self.ale.game_over() # all lives are lost ): self.ale.reset_game() self.life_lost = False def act(self, action): lives = self.ale.lives() reward = self.ale.act(self.actions[action]) self.life_lost = (not lives == self.ale.lives()) return reward def getScreen(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, (self.screen_width, self.screen_height)) return resized def isTerminal(self): if self.mode == 'train': return self.ale.game_over() or self.life_lost return self.ale.game_over()
class AleEnv(): def __init__(self, rom, display_screen, use_env_frame_skip, frame_repeat): self.actions = None self.rom = rom self.display_screen = display_screen self.use_env_frame_skip = use_env_frame_skip self.frame_repeat = frame_repeat def initialize(self): self.ale = ALEInterface() self.ale.setInt("random_seed", random.randint(1, 1000)) if self.display_screen: self.ale.setBool('display_screen', True) if self.use_env_frame_skip == True: self.ale.setInt('frame_skip', self.frame_repeat) self.ale.setBool('color_averaging', True) self.ale.setFloat('repeat_action_probability', 0) self.ale.loadROM(self.rom) self.actions = self.ale.getMinimalActionSet() print 'actions: %s' % self.actions (self.screen_width,self.screen_height) = self.ale.getScreenDims() print("width/height: " +str(self.screen_width) + "/" + str(self.screen_height)) self.initialized = True def get_actions(self, rom=None): if self.actions is None and rom != None: ale = ALEInterface() ale.loadROM(rom) self.actions = ale.getMinimalActionSet() return self.actions @property def state_dtype(self): return np.uint8 @property def continuous_action(self): return False def reset_game(self): self.ale.reset_game() def lives(self): return self.ale.lives() def getScreenRGB(self): return self.ale.getScreenRGB() def getState(self, debug_display=False, debug_input=None): screen = self.ale.getScreenGrayscale() if screen is not None and debug_display: debug_input.show(screen.reshape(screen.shape[0], screen.shape[1])) return screen.reshape(self.screen_height, self.screen_width) def act(self, action): return self.ale.act(action) def game_over(self): return self.ale.game_over() def finish(self): return
class AtariEmulator: def __init__(self, args): ''' Initialize Atari environment ''' # Parameters self.buffer_length = args.buffer_length self.screen_dims = args.screen_dims self.frame_skip = args.frame_skip self.blend_method = args.blend_method self.reward_processing = args.reward_processing self.max_start_wait = args.max_start_wait self.history_length = args.history_length self.start_frames_needed = self.buffer_length - 1 + ( (args.history_length - 1) * self.frame_skip) #Initialize ALE instance self.ale = ALEInterface() self.ale.setFloat(b'repeat_action_probability', 0.0) if args.watch: self.ale.setBool(b'sound', True) self.ale.setBool(b'display_screen', True) self.ale.loadROM(str.encode(args.rom_path + '/' + args.game + '.bin')) self.buffer = np.empty((self.buffer_length, 210, 160)) self.current = 0 self.action_set = self.ale.getMinimalActionSet() self.lives = self.ale.lives() self.reset() def get_possible_actions(self): ''' Return list of possible actions for game ''' return self.action_set def get_screen(self): ''' Add screen to frame buffer ''' self.buffer[self.current] = np.squeeze(self.ale.getScreenGrayscale()) self.current = (self.current + 1) % self.buffer_length def reset(self): self.ale.reset_game() self.lives = self.ale.lives() if self.max_start_wait < 0: print("ERROR: max start wait decreased beyond 0") sys.exit() elif self.max_start_wait <= self.start_frames_needed: wait = 0 else: wait = random.randint( 0, self.max_start_wait - self.start_frames_needed) for _ in range(wait): self.ale.act(self.action_set[0]) # Fill frame buffer self.get_screen() for _ in range(self.buffer_length - 1): self.ale.act(self.action_set[0]) self.get_screen() # get initial_states state = [(self.preprocess(), 0, 0, False)] for step in range(self.history_length - 1): state.append(self.run_step(0)) # make sure agent hasn't died yet if self.isTerminal(): print( "Agent lost during start wait. Decreasing max_start_wait by 1" ) self.max_start_wait -= 1 return self.reset() return state def run_step(self, action): ''' Apply action to game and return next screen and reward ''' raw_reward = 0 for step in range(self.frame_skip): raw_reward += self.ale.act(self.action_set[action]) self.get_screen() reward = None if self.reward_processing == 'clip': reward = np.clip(raw_reward, -1, 1) else: reward = raw_reward terminal = self.isTerminal() self.lives = self.ale.lives() return (self.preprocess(), action, reward, terminal, raw_reward) def preprocess(self): ''' Preprocess frame for agent ''' img = None if self.blend_method == "max": img = np.amax(self.buffer, axis=0) return imresize(img, self.screen_dims) def isTerminal(self): return (self.isGameOver() or (self.lives > self.ale.lives())) def isGameOver(self): return self.ale.game_over()
class AleEnvironment(Environment): def __init__(self, rom_name, record_display=False, show_display=False, id = 0, shrink=False, life_lost_as_end=True, use_grayscale=True): super(AleEnvironment, self).__init__() self.ale = ALEInterface() self.ale.setInt('random_seed', int(np.random.rand() * 100)) self.ale.setFloat('repeat_action_probability', 0.0) self.ale.setBool('color_averaging', False) self.record_display = record_display self.show_display = show_display if self.record_display: self.ale.setString('record_screen_dir', 'movie') elif self.show_display: self.display_name = rom_name + '_' + str(id) cv2.startWindowThread() cv2.namedWindow(self.display_name) self.ale.loadROM(rom_name) self.actions = self.ale.getMinimalActionSet() self.screen_width, self.screen_height = self.ale.getScreenDims() self.use_grayscale = use_grayscale if self.use_grayscale: self.screen = np.zeros((self.screen_height, self.screen_width, 1), dtype=np.uint8) else: self.screen = np.zeros((self.screen_height, self.screen_width, 3), dtype=np.uint8) self.prev_screen = np.zeros((self.screen_height, self.screen_width, 3), dtype=np.uint8) self.shrink = shrink self.life_lost_as_end = life_lost_as_end self.lives_lost = False self.lives = self.ale.lives() def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): cv2.destroyWindow(self.display_name) def act(self, action): reward = self.ale.act(self.actions[action]) if self.use_grayscale: screen = self.ale.getScreenGrayscale(self.screen) else: current_screen = self.ale.getScreenRGB(self.screen) screen = np.maximum(current_screen, self.prev_screen) self.prev_screen = current_screen screen = screen[:, :, 0] * 0.2126 + screen[:, :, 1] * 0.0722 + screen[:, :, 2] * 0.7152 screen = screen.astype(np.uint8) screen = np.reshape(screen, (self.screen_height, self.screen_width, 1)) state = self.preprocess(screen) self.lives_lost = True if self.lives > self.ale.lives() else False self.lives = self.ale.lives() return reward, state def is_end_state(self): if self.life_lost_as_end: return self.ale.game_over() or self.lives_lost else: return self.ale.game_over() def reset(self): if self.ale.game_over(): self.ale.reset_game() self.lives = self.ale.lives() self.lives_lost = False def available_actions(self): # return available indexes instead of actual action value return range(0, len(self.actions)) def preprocess(self, screen): if self.show_display and not self.record_display: cv2.imshow(self.display_name, screen) if self.shrink: resized = cv2.resize(screen, (84, 84)) else: resized = cv2.resize(screen, (84, 110)) resized = resized[18:102, :] scaled = resized.astype(np.float32) / 255.0 return scaled
class ALEEnvironment(): def __init__(self, config): self.history = History3D(config) self.history_length = config.history_length self.mode = config.mode self.life_lost = False self.terminal = False self.score = 0 #cv2.namedWindow("Image") from ale_python_interface import ALEInterface self.ale = ALEInterface() if config.display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', False) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', config.frame_skip) # Whether skip frames or not self.ale.setBool('color_averaging', config.color_averaging) if config.random_seed: # Random seed for repeatable experiments. self.ale.setInt('random_seed', config.random_seed) if config.record_screen_path: if not os.path.exists(config.record_screen_path): os.makedirs(config.record_screen_path) self.ale.setString('record_screen_dir', config.record_screen_path) if config.record_sound_filename: self.ale.setBool('sound', True) self.ale.setString('record_sound_filename', config.record_sound_filename) self.ale.loadROM(config.rom_file) if config.minimal_action_set: self.actions = self.ale.getMinimalActionSet() else: self.actions = self.ale.getLegalActionSet() self.screen_width = config.screen_width self.screen_height = config.screen_height def numActions(self): return len(self.actions) def new_game(self): state, terminal = self.reset() for _ in range(self.history_length + 1): self.history.add(state) return state, terminal, list(range(len(self.actions))) def reset(self): # In test mode, the game is simply initialized. In train mode, if the game # is in terminal state due to a life loss but not yet game over, then only # life loss flag is reset so that the next game starts from the current # state. Otherwise, the game is simply initialized. if (self.mode == 'test' or not self.life_lost or self.ale.game_over()): # `reset` called in a middle of episode # all lives are lost self.ale.reset_game() self.life_lost = False return self.getScreen(), self.isTerminal() def step(self, action): lives = self.ale.lives() reward = self.ale.act(self.actions[action]) self.life_lost = (not lives == self.ale.lives()) self.score += reward self.current_state = self.getScreen() self.history.add(self.current_state) self.terminal = self.isTerminal() return reward, self.history.get(), self.terminal def getScreen(self): screen = self.ale.getScreenGrayscale() #print 'screen:\n',type(screen) #print 'screen.shape',screen.shape resized = cv2.resize(screen / 255., (self.screen_width, self.screen_height)) #cv2.imshow("Image", screen) ''' cv2.namedWindow("Image") cv2.destroyAllWindows() ''' return resized def isTerminal(self): if self.mode == 'train': return self.ale.game_over() or self.life_lost return self.ale.game_over()
lastMemoryPointerPosition = 0 frameCount = 0 frameCountLast = frameCount terminal = 0 testFlag = False ale.reset_game() trainStart = False trainThread = threading.Thread(target=train) trainThread.start() # for frameCount in xrange(maxFrame): life0 = ale.lives() # rate = explorationRate if not testFlag else testExplorationRate # perceive if np.random.rand(1) > explorationRate: actionIndex = forward(memory.History, sess, Q_train) # actionIndex = np.argmax(sess.run(Q_train.y, feed_dict={Q_train.x_image: [memory.History]}),axis=1) else: actionIndex = np.random.randint(n_actions) # get action reward = ale.act(legal_actions[actionIndex]) # reward observe = Scale(ale.getScreenGrayscale()) # 0.08s life1 = ale.lives() if life1 < life0: reward += -1
class AtariEnvironment(interfaces.Environment): def __init__(self, atari_rom, frame_skip=4, noop_max=30, terminate_on_end_life=False, random_seed=123, frame_history_length=4, use_gui=False, max_num_frames=500000, repeat_action_probability=0.0, record_screen_dir=None): self.ale = ALEInterface() self.ale.setInt('random_seed', random_seed) self.ale.setInt('frame_skip', 1) self.ale.setFloat('repeat_action_probability', 0.0) self.ale.setInt('max_num_frames_per_episode', max_num_frames) if record_screen_dir is not None: self.ale.setString('record_screen_dir', record_screen_dir) self.ale.loadROM(atari_rom) self.frame_skip = frame_skip self.repeat_action_probability = repeat_action_probability self.noop_max = noop_max self.terminate_on_end_life = terminate_on_end_life self.current_lives = self.ale.lives() self.is_terminal = False self.previous_action = 0 self.num_actions = len(self.ale.getMinimalActionSet()) w, h = self.ale.getScreenDims() self.screen_width = w self.screen_height = h self.zero_last_frames = [ np.zeros((84, 84), dtype=np.uint8), np.zeros((84, 84), dtype=np.uint8) ] self.last_two_frames = copy.copy(self.zero_last_frames) self.zero_history_frames = [ np.zeros((84, 84), dtype=np.uint8) for i in range(0, frame_history_length) ] self.frame_history = copy.copy(self.zero_history_frames) atari_actions = self.ale.getMinimalActionSet() self.atari_to_onehot = dict( list(zip(atari_actions, list(range(len(atari_actions)))))) self.onehot_to_atari = dict( list(zip(list(range(len(atari_actions))), atari_actions))) self.screen_image = np.zeros(self.screen_height * self.screen_width, dtype=np.uint8) self.use_gui = use_gui self.original_frame = np.zeros((h, w), dtype=np.uint8) self.refresh_time = datetime.timedelta(milliseconds=1000 / 60) self.last_refresh = datetime.datetime.now() if (self.use_gui): self.gui_screen = pygame.display.set_mode((w, h)) def getRAM(self, ram=None): return self.ale.getRAM(ram) def _get_frame(self): self.ale.getScreenGrayscale(self.screen_image) image = self.screen_image.reshape( [self.screen_height, self.screen_width, 1]) self.original_frame = image image = cv2.resize(image, (84, 84)) return image def perform_action(self, onehot_index_action): if self.repeat_action_probability > 0: if np.random.uniform() < self.repeat_action_probability: onehot_index_action = self.previous_action self.previous_action = onehot_index_action action = self.onehot_to_atari[onehot_index_action] state, action, reward, next_state, self.is_terminal = self.perform_atari_action( action) return state, onehot_index_action, reward, next_state, self.is_terminal def perform_atari_action(self, atari_action): state = self.get_current_state() reward = self._act(atari_action, self.frame_skip) if self.use_gui: self.refresh_gui() self.frame_history[:-1] = self.frame_history[1:] self.frame_history[-1] = np.max(self.last_two_frames, axis=0) next_state = self.get_current_state() return state, atari_action, reward, next_state, self.is_terminal def _act(self, ale_action, repeat): reward = 0 for i in range(repeat): reward += self.ale.act(ale_action) if i >= repeat - 2: self.last_two_frames = [ self.last_two_frames[1], self._get_frame() ] self.is_terminal = self.ale.game_over() # terminate the episode if current_lives has decreased lives = self.ale.lives() if self.current_lives != lives: if self.current_lives > lives and self.terminate_on_end_life: self.is_terminal = True self.current_lives = lives return reward def get_current_state(self): #return copy.copy(self.frame_history) return [x.copy() for x in self.frame_history] def get_actions_for_state(self, state): return [ self.atari_to_onehot[a] for a in self.ale.getMinimalActionSet() ] def reset_environment(self): self.last_two_frames = [self.zero_history_frames[0], self._get_frame()] if self.terminate_on_end_life: if self.ale.game_over(): self.ale.reset_game() else: self.ale.reset_game() self.current_lives = self.ale.lives() if self.noop_max > 0: num_noops = np.random.randint(self.noop_max + 1) self._act(0, num_noops) self.previous_action = 0 self.frame_history = copy.copy(self.zero_history_frames) self.frame_history[-1] = np.max(self.last_two_frames, axis=0) if self.use_gui: self.refresh_gui() def is_current_state_terminal(self): return self.is_terminal def refresh_gui(self): current_time = datetime.datetime.now() if (current_time - self.last_refresh) > self.refresh_time: self.last_refresh = current_time gui_image = np.tile( np.transpose(self.original_frame, axes=(1, 0, 2)), [1, 1, 3]) # gui_image = np.zeros((self.screen_width, self.screen_height, 3), dtype=np.uint8) # channel = np.random.randint(3) # gui_image[:,:,channel] = np.transpose(self.original_frame, axes=(1, 0, 2))[:,:,0] pygame.surfarray.blit_array(self.gui_screen, gui_image) pygame.display.update()
class AtariEnvironment: """ Environment for playing Atari games using ALE Interface """ def __init__(self, game_filename, **kwargs): """ Create an environment with the provided game """ pygame.init() self.screen = pygame.display.set_mode((160, 210)) self.fps_clock = pygame.time.Clock() self.show_while_training = True # Buffer for grabbing the screen from ALE self.screen_buffer = np.zeros((100800, ), np.uint8) # Create the ALE interface and load the game self.ale = ALEInterface() self.ale.setBool('color_averaging', True) self.ale.setFloat('repeat_action_probability', 0.0) self.ale.loadROM(game_filename) # Grab the set of available moves for this game self.move_list = self.ale.getMinimalActionSet() self.listeners = [] def update_screen(self): """ Grab the current screen from ALE and display it via pygame """ self.ale.getScreenRGB(self.screen_buffer) # if self.show_while_training: # game_screen = self.screen_buffer.reshape((210,160,3)) # game_screen = np.transpose(game_screen, (1,0,2)) # # game_surface = pygame.surfarray.make_surface(game_screen) # self.screen.blit(game_surface, (0,0)) # pygame.display.flip() def get_reduced_screen(self): """ Convert current screen to 84x84 np array of luminescence values. Scale values from 0.0 to 1.0 to work with Tensorflow """ # Reshape the screen buffer to an appropriate shape game_screen = self.screen_buffer.reshape((210, 160, 3)) # Convert to luminosity gray_screen = np.dot(game_screen, np.array([0.299, 0.587, 0.114])).astype(np.uint8) gray_screen = ndimage.zoom(gray_screen, (0.4, 0.525)) return gray_screen def act(self, action): """ Perform an action on the environment """ ale_action = self.move_list[action] return self.ale.act(ale_action) def terminal(self): """ Return if the state is a terminal state """ return self.ale.game_over() def lives(self): """ How many lives are left """ return self.ale.lives() def reset_game(self): """ """ self.ale.reset_game()
class AtariPlayer(RLEnvironment): """ A wrapper for atari emulator. NOTE: will automatically restart when a real episode ends """ def __init__(self, rom_file, viz=0, height_range=(None, None), frame_skip=4, image_shape=(84, 84), nullop_start=30, live_lost_as_eoe=True): """ :param rom_file: path to the rom :param frame_skip: skip every k frames and repeat the action :param image_shape: (w, h) :param height_range: (h1, h2) to cut :param viz: visualization to be done. Set to 0 to disable. Set to a positive number to be the delay between frames to show. Set to a string to be a directory to store frames. :param nullop_start: start with random number of null ops :param live_losts_as_eoe: consider lost of lives as end of episode. useful for training. """ super(AtariPlayer, self).__init__() if not os.path.isfile(rom_file) and '/' not in rom_file: rom_file = os.path.join(get_dataset_dir('atari_rom'), rom_file) assert os.path.isfile(rom_file), \ "rom {} not found. Please download at {}".format(rom_file, ROM_URL) try: ALEInterface.setLoggerMode(ALEInterface.Logger.Warning) except AttributeError: log_once() # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86 with _ALE_LOCK: self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt("random_seed", self.rng.randint(0, 10000)) self.ale.setBool("showinfo", False) self.ale.setInt("frame_skip", 1) self.ale.setBool('color_averaging', False) # manual.pdf suggests otherwise. self.ale.setFloat('repeat_action_probability', 0.0) # viz setup if isinstance(viz, six.string_types): assert os.path.isdir(viz), viz self.ale.setString('record_screen_dir', viz) viz = 0 if isinstance(viz, int): viz = float(viz) self.viz = viz if self.viz and isinstance(self.viz, float): self.windowname = os.path.basename(rom_file) cv2.startWindowThread() cv2.namedWindow(self.windowname) self.ale.loadROM(rom_file) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() self.live_lost_as_eoe = live_lost_as_eoe self.frame_skip = frame_skip self.nullop_start = nullop_start self.height_range = height_range self.image_shape = image_shape self.current_episode_score = StatCounter() self.restart_episode() def _grab_raw_image(self): """ :returns: the current 3-channel image """ m = self.ale.getScreenRGB() return m.reshape((self.height, self.width, 3)) def current_state(self): """ :returns: a gray-scale (h, w, 1) float32 image """ ret = self._grab_raw_image() # max-pooled over the last screen ret = np.maximum(ret, self.last_raw_screen) if self.viz: if isinstance(self.viz, float): #m = cv2.resize(ret, (1920,1200)) cv2.imshow(self.windowname, ret) time.sleep(self.viz) ret = ret[self.height_range[0]:self.height_range[1], :].astype( 'float32') # 0.299,0.587.0.114. same as rgb2y in torch/image ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY) ret = cv2.resize(ret, self.image_shape) ret = np.expand_dims(ret, axis=2) return ret def get_action_space(self): return DiscreteActionSpace(len(self.actions)) def restart_episode(self): if self.current_episode_score.count > 0: self.stats['score'].append(self.current_episode_score.sum) self.current_episode_score.reset() self.ale.reset_game() # random null-ops start n = self.rng.randint(self.nullop_start) self.last_raw_screen = self._grab_raw_image() for k in range(n): if k == n - 1: self.last_raw_screen = self._grab_raw_image() self.ale.act(0) def action(self, act): """ :param act: an index of the action :returns: (reward, isOver) """ oldlives = self.ale.lives() r = 0 for k in range(self.frame_skip): if k == self.frame_skip - 1: self.last_raw_screen = self._grab_raw_image() r += self.ale.act(self.actions[act]) newlives = self.ale.lives() if self.ale.game_over() or \ (self.live_lost_as_eoe and newlives < oldlives): break self.current_episode_score.feed(r) isOver = self.ale.game_over() if isOver: self.restart_episode() if self.live_lost_as_eoe: isOver = isOver or newlives < oldlives return (r, isOver)
class AtariPlayer(RLEnvironment): """ A wrapper for atari emulator. NOTE: will automatically restart when a real episode ends """ def __init__(self, rom_file, viz=0, height_range=(None,None), frame_skip=4, image_shape=(84, 84), nullop_start=30, live_lost_as_eoe=True): """ :param rom_file: path to the rom :param frame_skip: skip every k frames and repeat the action :param image_shape: (w, h) :param height_range: (h1, h2) to cut :param viz: visualization to be done. Set to 0 to disable. Set to a positive number to be the delay between frames to show. Set to a string to be a directory to store frames. :param nullop_start: start with random number of null ops :param live_losts_as_eoe: consider lost of lives as end of episode. useful for training. """ super(AtariPlayer, self).__init__() if not os.path.isfile(rom_file) and '/' not in rom_file: rom_file = get_dataset_dir('atari_rom', rom_file) assert os.path.isfile(rom_file), \ "rom {} not found. Please download at {}".format(rom_file, ROM_URL) try: ALEInterface.setLoggerMode(ALEInterface.Logger.Warning) except AttributeError: log_once() # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86 with _ALE_LOCK: self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt(b"random_seed", self.rng.randint(0, 10000)) self.ale.setBool(b"showinfo", False) self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b'color_averaging', False) # manual.pdf suggests otherwise. self.ale.setFloat(b'repeat_action_probability', 0.0) # viz setup if isinstance(viz, six.string_types): assert os.path.isdir(viz), viz self.ale.setString(b'record_screen_dir', viz) viz = 0 if isinstance(viz, int): viz = float(viz) self.viz = viz if self.viz and isinstance(self.viz, float): self.windowname = os.path.basename(rom_file) cv2.startWindowThread() cv2.namedWindow(self.windowname) self.ale.loadROM(rom_file.encode('utf-8')) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() self.live_lost_as_eoe = live_lost_as_eoe self.frame_skip = frame_skip self.nullop_start = nullop_start self.height_range = height_range self.image_shape = image_shape self.current_episode_score = StatCounter() self.restart_episode() def _grab_raw_image(self): """ :returns: the current 3-channel image """ m = self.ale.getScreenRGB() return m.reshape((self.height, self.width, 3)) def current_state(self): """ :returns: a gray-scale (h, w, 1) float32 image """ ret = self._grab_raw_image() # max-pooled over the last screen ret = np.maximum(ret, self.last_raw_screen) if self.viz: if isinstance(self.viz, float): #m = cv2.resize(ret, (1920,1200)) cv2.imshow(self.windowname, ret) time.sleep(self.viz) ret = ret[self.height_range[0]:self.height_range[1],:].astype('float32') # 0.299,0.587.0.114. same as rgb2y in torch/image ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY) ret = cv2.resize(ret, self.image_shape) ret = np.expand_dims(ret, axis=2) return ret def get_action_space(self): return DiscreteActionSpace(len(self.actions)) def restart_episode(self): if self.current_episode_score.count > 0: self.stats['score'].append(self.current_episode_score.sum) self.current_episode_score.reset() self.ale.reset_game() # random null-ops start n = self.rng.randint(self.nullop_start) self.last_raw_screen = self._grab_raw_image() for k in range(n): if k == n - 1: self.last_raw_screen = self._grab_raw_image() self.ale.act(0) def action(self, act): """ :param act: an index of the action :returns: (reward, isOver) """ oldlives = self.ale.lives() r = 0 for k in range(self.frame_skip): if k == self.frame_skip - 1: self.last_raw_screen = self._grab_raw_image() r += self.ale.act(self.actions[act]) newlives = self.ale.lives() if self.ale.game_over() or \ (self.live_lost_as_eoe and newlives < oldlives): break self.current_episode_score.feed(r) isOver = self.ale.game_over() if isOver: self.restart_episode() if self.live_lost_as_eoe: isOver = isOver or newlives < oldlives return (r, isOver)
def act(self, a): lives = ALEInterface.lives(self) reward = ALEInterface.act(self, self.legal_actions[a]) game_over = ALEInterface.game_over(self) or (not self.test_mode and ALEInterface.lives(self) < lives) return reward, game_over
class Environment(object): def __init__(self, config): #self.env = gym.make(config.env_name) self.env = ALEInterface() self.env.setInt(b'random_seed', 123) self.env.loadROM('./breakout.bin') self.actionSet = self.env.getMinimalActionSet() screen_width, screen_height, self.action_repeat, self.random_start = \ config.screen_width, config.screen_height, config.action_repeat, config.random_start self.display = config.display self.dims = (screen_width, screen_height) self._screen = None self.reward = 0 self.terminal = True def new_game(self, from_random_game=False): if self.lives == 0: # self._screen = self.env.reset() self.env.reset_game() #self.env.reset() self._screen = self.env.getScreenRGB() self._step(0) self.render() return self.screen, 0, 0, self.terminal def new_random_game(self): self.new_game(True) for _ in xrange(random.randint(0, self.random_start - 1)): self._step(0) self.render() return self.screen, 0, 0, self.terminal def _step(self, action): # self._screen, self.reward, self.terminal, _ = self.env.step(action) self.reward = self.env.act(self.actionSet[action]) self._screen = self.env.getScreenRGB() self.terminal = self.env.game_over() def _random_step(self): action = random.randint(0, self.action_size - 1) #self.env.action_space.sample() self._step(action) @property def screen(self): return cv2.resize( cv2.cvtColor(self._screen, cv2.COLOR_RGB2GRAY) / 255., self.dims) #return cv2.resize(cv2.cvtColor(self._screen, cv2.COLOR_BGR2YCR_CB)/255., self.dims)[:,:,0] @property def action_size(self): return len(self.actionSet) #self.env.action_space.n @property def lives(self): return self.env.lives() #self.env.ale.lives() @property def state(self): return self.screen, self.reward, self.terminal def render(self): if self.display: 1 #self.env.render() def after_act(self, action): self.render()
class AtariEmulator(BaseEnvironment): def __init__(self, emulator_id, game, resource_folder, random_seed=3, random_start=True, single_life_episodes=False, history_window=1, visualize=False, verbose=0, **unknown): if verbose >= 2: logging.debug('Emulator#{} received unknown args: {}'.format( emulator_id, unknown)) self.emulator_id = emulator_id self.ale = ALEInterface() self.ale.setInt(b"random_seed", random_seed * (emulator_id + 1)) # For fuller control on explicit action repeat (>= ALE 0.5.0) self.ale.setFloat(b"repeat_action_probability", 0.0) # Disable frame_skip and color_averaging # See: http://is.gd/tYzVpj self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b"color_averaging", False) self.ale.setBool(b"display_screen", visualize) full_rom_path = resource_folder + "/" + game + ".bin" self.ale.loadROM(str.encode(full_rom_path)) self.legal_actions = self.ale.getMinimalActionSet() #this env is fixed until firing, so you have to... self._have_to_fire = ('FIRE' in [ ACTION_MEANING[a] for a in self.legal_actions ]) self.screen_width, self.screen_height = self.ale.getScreenDims() self.lives = self.ale.lives() self.random_start = random_start self.single_life_episodes = single_life_episodes self.call_on_new_frame = visualize self.history_window = history_window self.observation_shape = (self.history_window, IMG_SIZE_X, IMG_SIZE_Y) self.rgb_screen = np.zeros((self.screen_height, self.screen_width, 3), dtype=np.uint8) self.gray_screen = np.zeros((self.screen_height, self.screen_width, 1), dtype=np.uint8) # Processed historcal frames that will be fed in to the network (i.e., four 84x84 images) self.history = create_history_observation(self.history_window) #ObservationPool(np.zeros(self.observation_shape, dtype=np.uint8)) self.frame_preprocessor = FramePreprocessor(self.gray_screen.shape, FRAMES_IN_POOL) def get_legal_actions(self): return self.legal_actions def __get_screen_image(self): """ Get the current frame luminance :return: the current frame """ self.ale.getScreenGrayscale(self.gray_screen) if self.call_on_new_frame: self.ale.getScreenRGB(self.rgb_screen) self.on_new_frame(self.rgb_screen) return self.gray_screen def on_new_frame(self, frame): pass def __random_start_reset(self): """ Restart game """ self.ale.reset_game() if self.random_start: wait = random.randint(0, MAX_START_WAIT + 1) for _ in range(wait): self.ale.act(self.get_noop()) if self.__is_over(): self.ale.reset_game() self.lives = self.ale.lives() def __new_game(self): self.__random_start_reset() if self._have_to_fire: #take action on reset for environments that are fixed until firing self.ale.act(self.legal_actions[1]) if self.__is_over(): self.__random_start_reset() self.ale.act(self.legal_actions[2]) if self.__is_over(): self.__random_start_reset() def __action_repeat(self, a, times=ACTION_REPEAT): """ Repeat action and grab screen into frame pool """ reward = 0 for i in range(times - FRAMES_IN_POOL): reward += self.ale.act(self.legal_actions[a]) # Only need to add the last FRAMES_IN_POOL frames to the frame pool for i in range(FRAMES_IN_POOL): reward += self.ale.act(self.legal_actions[a]) self.frame_preprocessor.new_frame(self.__get_screen_image()) return reward def reset(self): """ Get the initial state """ self.__new_game() for step in range(self.history_window): _ = self.__action_repeat(0) self.history.new_observation( self.frame_preprocessor.get_processed()) if self.__is_terminal(): raise Exception('This should never happen.') return self.history.get_state(), None def next(self, action): """ Get the next state, reward, and game over signal """ reward = self.__action_repeat(action) self.history.new_observation(self.frame_preprocessor.get_processed()) terminal = self.__is_terminal() self.lives = self.ale.lives() return self.history.get_state(), reward, terminal, None def __is_terminal(self): if self.single_life_episodes: return self.__is_over() or (self.lives > self.ale.lives()) else: return self.__is_over() def __is_over(self): return self.ale.game_over() def get_noop(self): return self.legal_actions[0] def close(self): del self.ale
class Env(object): def __init__(self, rom_path, rescale_size, noop_range, state_tensor_type, state_frames=1, action_repeat=0): self._rompath = rom_path self._noop_range = noop_range self._state_tensor_type = state_tensor_type self._state_frames = state_frames self._action_repeat = action_repeat self._NOOP_ACTION = 0 self.__init_ale() self.action_set = self._ale.getMinimalActionSet() self._w, self._h = self._ale.getScreenDims() self._frames_hist = [] self._preprocessor = Preprocess(rescale_size) self.reset_game() def __init_ale(self): self._ale = ALEInterface() self._ale.setBool('display_screen', False) self._ale.loadROM(self._rompath) def __init_frames_hist(self): temp = self.get_current_screen() self._frames_hist = [temp] * (self._state_frames + 1) def __add_frame(self, frame): self._frames_hist.append(frame) self._frames_hist = self._frames_hist[-1 * (self._state_frames + 1):] def get_current_screen(self): temp = np.zeros((self._h, self._w, 3), dtype=np.uint8) self._ale.getScreenRGB(screen_data=temp) return temp def get_state(self): if self.__isdone(): return None preprocessed = self._preprocessor.process_images(self._frames_hist) state_tensor = torch.from_numpy(preprocessed).type( self._state_tensor_type) if len(state_tensor.size()) == 3: # 3d tensor state_tensor = state_tensor.unsqueeze(0) return state_tensor def __isdone(self): return self._ale.lives() <= 0 def take_action(self, action, clip_rewards=True): i = 0 total_reward = 0.0 while not self.__isdone() and i <= self._action_repeat: reward = self._ale.act(action) if clip_rewards: reward = min(1, max(reward, -1)) total_reward += reward i += 1 screen_img = self.get_current_screen() self.__add_frame(screen_img) return self.get_state(), total_reward, self.__isdone() def reset_game(self): self._ale.reset_game() self.__take_noop_actions() self.__init_frames_hist() return self.__isdone() def __take_noop_actions(self): noop_actions = random.randint(*(self._noop_range)) while noop_actions > 0: self._ale.act(self._NOOP_ACTION) noop_actions -= 1
class AleEnv(object): '''ALE wrapper for RL training game_over_conditions={'points':(-1, 1)}: dict that describes all desired game over conditions each key corresponds to a condition that is checked; the first condition met produces a game over points: int or tuple of integers int: if x < 0, game ends when score is <= x if x >= 0, game ends when score is >= x tuple: game ends if score <= x[0] or score >= x[1] lives: int that ends game when lives <= x frames: int that ends game when total number of frames >= x episodes: int that ends game when num of episodes >= x Use max_num_frames_per_episode to set max episode length ''' # will include timing and hidden functionality in future iterations def __init__(self, rom_file, display_screen=False, sound=False, random_seed=0, game_over_conditions={}, frame_skip=1, repeat_action_probability=0.25, max_num_frames_per_episode=0, min_action_set=False, screen_color='gray', fps=60, output_buffer_size=1, reduce_screen=False): # ALE instance and setup self.ale = ALEInterface() #TODO: check if rom file exists; will crash jupyter kernel otherwise self.ale.loadROM(str.encode(rom_file)) self.ale.setBool(b'sound', sound) self.ale.setBool(b'display_screen', display_screen) if min_action_set: self.legal_actions = self.ale.getMinimalActionSet() else: self.legal_actions = self.ale.getLegalActionSet() self.ale.setInt(b'random_seed', random_seed) self.ale.setInt(b'frame_skip', frame_skip) self.frame_skip = frame_skip self.ale.setFloat(b'repeat_action_probability', repeat_action_probability) self.ale.setInt(b'max_num_frames_per_episode', max_num_frames_per_episode) self.ale.loadROM(str.encode(rom_file)) self.game_over_conditions = game_over_conditions self.screen_color = screen_color self.reduce_screen = reduce_screen self.d_frame = (fps**-1) * self.frame_skip # set up output buffer self.output_buffer_size = output_buffer_size self.queue_size = self.output_buffer_size self._reset_params() def observe(self, flatten=False, expand_dim=False): if flatten is True: out = np.stack(self.output_queue[i] for i in range(self.output_buffer_size)).flatten() if expand_dim is True: return np.expand_dims(np.expand_dims(out, axis=0), axis=1) else: return out else: out = np.stack(self.output_queue[i] for i in range(self.output_buffer_size)) out = np.squeeze(out) if expand_dim is True: return np.expand_dims(np.expand_dims(out, axis=0), axis=1) else: return out @property def width(self): return self.game_screen.shape[1] @property def height(self): return self.game_screen.shape[0] @property def game_over(self): return self._game_over() @property def actions(self): return self.legal_actions @property def lives(self): return self.ale.lives() def _reset_params(self): self.total_points = 0 self.total_frames = 0 self.curr_episode = 1 self.prev_ep_frame_num = -float("inf") if self.screen_color == 'gray' or self.screen_color == 'grey': self.game_screen = np.squeeze(self.ale.getScreenGrayscale()) if self.reduce_screen: self.game_screen = resize(self.game_screen, output_shape=(110, 84)) self.game_screen = self.game_screen[0 + 21:-1 - 4, :] elif self.screen_color == 'rgb' or self.screen_color == 'color': self.game_screen = self.ale.getScreenRGB() if self.reduce_screen: self.game_screen = resize(self.game_screen, output_shape=(110, 84, 3)) self.game_screen = self.game_screen[0 + 21:-1 - 4, :, :] self.output_queue = deque( np.zeros(shape=(self.queue_size - 1, self.height, self.width)), self.queue_size) self.output_queue.appendleft(self.game_screen) def reset(self): self.ale.reset_game() self._reset_params() def act(self, action): reward = self.ale.act(self.legal_actions[action]) if self.screen_color == 'gray' or self.screen_color == 'grey': self.game_screen = np.squeeze(self.ale.getScreenGrayscale()) if self.reduce_screen: self.game_screen = resize(self.game_screen, output_shape=(110, 84)) self.game_screen = self.game_screen[0 + 21:-1 - 4, :] elif self.screen_color == 'rgb' or self.screen_color == 'color': self.game_screen = self.ale.getScreenRGB() if self.reduce_screen: self.game_screen = resize(self.game_screen, output_shape=(110, 84, 3)) self.game_screen = self.game_screen[0 + 21:-1 - 4, :, :] self.output_queue.pop() self.output_queue.appendleft(self.game_screen) self.total_points += reward self.total_frames += self.frame_skip if self.ale.getEpisodeFrameNumber() <= self.prev_ep_frame_num: self.curr_episode += 1 self.prev_ep_frame_num = self.ale.getEpisodeFrameNumber() return reward, self.d_frame, self.game_over def _game_over(self): if self.ale.game_over(): return True for cond in self.game_over_conditions: if cond == 'points': if isinstance(self.game_over_conditions[cond], int): if self.total_points >= self.game_over_conditions[cond]: return True elif isinstance(self.game_over_conditions[cond], tuple): if (self.total_points <= self.game_over_conditions[cond][0] or self.total_points >= self.game_over_conditions[cond][1]): return True elif cond == 'lives': if self.lives <= self.game_over_conditions[cond]: return True elif cond == 'frames': if self.total_frames >= self.game_over_conditions[cond]: return True elif cond == 'episodes': if self.curr_episode >= self.game_over_conditions[cond]: return True else: raise RuntimeError("ERROR: Invalid game over condition") return False
frameCountLast = frameCount terminal = 1 # t0s = t1s = t2s = t3s = t4s =t5s =t6s=t7s= 0 ale.reset_game() trainThread = threading.Thread(target=train) trainThread.start() for frameCount in xrange(maxFrame): t00 = time.time() lives = ale.lives() observe = Scale(ale.getScreen()) # 0.08s if terminal: actionIndex = 1 # # a random start # ale.act(1) # for i in xrange(np.random.randint(0,maxRandomStartFrame)): # ale.act(np.random.randint(len(legal_actions))) # ale.act(0) # actionIndex = 0 else: if np.random.rand(1) > explorationRate: [actionIndex, actionValue] = forward([np.transpose(memory.History,[1,2,0])],Q_train, all=False) else:
class AtariPlayer(gym.Env): """ A wrapper for ALE emulator, with configurations to mimic DeepMind DQN settings. Info: score: the accumulated reward in the current game gameOver: True when the current game is Over """ def __init__(self, rom_file, viz=0, frame_skip=4, nullop_start=30, live_lost_as_eoe=True, max_num_frames=0): """ Args: rom_file: path to the rom frame_skip: skip every k frames and repeat the action viz: visualization to be done. Set to 0 to disable. Set to a positive number to be the delay between frames to show. Set to a string to be a directory to store frames. nullop_start: start with random number of null ops. live_losts_as_eoe: consider lost of lives as end of episode. Useful for training. max_num_frames: maximum number of frames per episode. """ super(AtariPlayer, self).__init__() if not os.path.isfile(rom_file) and '/' not in rom_file: rom_file = get_dataset_path('atari_rom', rom_file) assert os.path.isfile(rom_file), \ "rom {} not found. Please download at {}".format(rom_file, ROM_URL) try: ALEInterface.setLoggerMode(ALEInterface.Logger.Error) except AttributeError: if execute_only_once(): logger.warn("You're not using latest ALE") # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86 with _ALE_LOCK: self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt(b"random_seed", self.rng.randint(0, 30000)) self.ale.setInt(b"max_num_frames_per_episode", max_num_frames) self.ale.setBool(b"showinfo", False) self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b'color_averaging', False) # manual.pdf suggests otherwise. self.ale.setFloat(b'repeat_action_probability', 0.0) # viz setup if isinstance(viz, six.string_types): assert os.path.isdir(viz), viz self.ale.setString(b'record_screen_dir', viz) viz = 0 if isinstance(viz, int): viz = float(viz) self.viz = viz if self.viz and isinstance(self.viz, float): self.windowname = os.path.basename(rom_file) cv2.namedWindow(self.windowname) self.ale.loadROM(rom_file.encode('utf-8')) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() self.live_lost_as_eoe = live_lost_as_eoe self.frame_skip = frame_skip self.nullop_start = nullop_start self.action_space = spaces.Discrete(len(self.actions)) self.observation_space = spaces.Box( low=0, high=255, shape=(self.height, self.width, 1), dtype=np.uint8) self._restart_episode() def get_action_meanings(self): return [ACTION_MEANING[i] for i in self.actions] def _grab_raw_image(self): """ :returns: the current 3-channel image """ m = self.ale.getScreenRGB() return m.reshape((self.height, self.width, 3)) def _current_state(self): """ :returns: a gray-scale (h, w, 1) uint8 image """ ret = self._grab_raw_image() # max-pooled over the last screen ret = np.maximum(ret, self.last_raw_screen) if self.viz: if isinstance(self.viz, float): cv2.imshow(self.windowname, ret) cv2.waitKey(int(self.viz * 1000)) ret = ret.astype('float32') # 0.299,0.587.0.114. same as rgb2y in torch/image ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)[:, :, np.newaxis] return ret.astype('uint8') # to save some memory def _restart_episode(self): with _ALE_LOCK: self.ale.reset_game() # random null-ops start n = self.rng.randint(self.nullop_start) self.last_raw_screen = self._grab_raw_image() for k in range(n): if k == n - 1: self.last_raw_screen = self._grab_raw_image() self.ale.act(0) def reset(self): if self.ale.game_over(): self._restart_episode() return self._current_state() def step(self, act): oldlives = self.ale.lives() r = 0 for k in range(self.frame_skip): if k == self.frame_skip - 1: self.last_raw_screen = self._grab_raw_image() r += self.ale.act(self.actions[act]) newlives = self.ale.lives() if self.ale.game_over() or \ (self.live_lost_as_eoe and newlives < oldlives): break isOver = self.ale.game_over() if self.live_lost_as_eoe: isOver = isOver or newlives < oldlives info = {'ale.lives': newlives} return self._current_state(), r, isOver, info
legal_actions = ale.getMinimalActionSet() # How to get screen rgb? width, height = ale.getScreenDims() screen_buffer = np.empty((height, width), dtype=np.uint8) # Some common settings HISTORY_LENGTH = 4 MAX_STEPS = 1000000 MAX_EPOCHS = 10 MINIBATCH_SIZE = 32 LONG_PRESS_TIMES = 4 GAMMA = 0.9 EPSILON = 0.1 UPDATE_FREQUENCY = 4 MAX_LIVES = ale.lives() episode_sum = 0 episode_sums = [] # Define history variables here images = RingBuffer(shape=(MAX_STEPS, 84, 84)) actions = RingBuffer(shape=(MAX_STEPS, 1)) rewards = RingBuffer(shape=(MAX_STEPS, 1)) terminals = RingBuffer(shape=(MAX_STEPS, 1)) # Initialize a neural network according to nature paper # Defining the neural net architecture model = Sequential() model.add(Convolution2D(16, 8, 8, subsample=(4,4), input_shape=(HISTORY_LENGTH,84,84))) model.add(Activation('relu'))
class AtariEmulator(BaseEnvironment): def __init__(self, rom_addr, random_start=False, random_seed=6, visualize=True, single_life=False): self.ale = ALEInterface() self.ale.setInt(b"random_seed", 2 * random_seed) # For fuller control on explicit action repeat (>= ALE 0.5.0) self.ale.setFloat(b"repeat_action_probability", 0.0) # Disable frame_skip and color_averaging # See: http://is.gd/tYzVpj self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b"color_averaging", False) full_rom_path = rom_addr self.ale.loadROM(str.encode(full_rom_path)) self.legal_actions = self.ale.getMinimalActionSet() self.screen_width, self.screen_height = self.ale.getScreenDims() self.lives = self.ale.lives() self.writer = imageio.get_writer('breakout0.gif', fps=30) self.random_start = random_start self.single_life_episodes = single_life self.call_on_new_frame = visualize # Processed historcal frames that will be fed in to the network # (i.e., four 84x84 images) self.observation_pool = ObservationPool( np.zeros((84, 84, 4), dtype=np.uint8)) self.rgb_screen = np.zeros((self.screen_height, self.screen_width, 3), dtype=np.uint8) self.gray_screen = np.zeros((self.screen_height, self.screen_width, 1), dtype=np.uint8) self.frame_pool = FramePool( np.empty((2, self.screen_height, self.screen_width), dtype=np.uint8), self.__process_frame_pool) def get_legal_actions(self): return self.legal_actions def __get_screen_image(self): """ Get the current frame luminance :return: the current frame """ self.ale.getScreenGrayscale(self.gray_screen) if self.call_on_new_frame: self.ale.getScreenRGB(self.rgb_screen) self.on_new_frame(self.rgb_screen) return np.squeeze(self.gray_screen) def on_new_frame(self, frame): pass def __new_game(self): """ Restart game """ self.ale.reset_game() self.lives = self.ale.lives() if self.random_start: wait = random.randint(0, MAX_START_WAIT) for _ in range(wait): self.ale.act(self.legal_actions[0]) def __process_frame_pool(self, frame_pool): """ Preprocess frame pool """ img = np.amax(frame_pool, axis=0) img = imresize(img, (84, 84), interp='nearest') img = img.astype(np.uint8) return img def __action_repeat(self, a, times=ACTION_REPEAT): """ Repeat action and grab screen into frame pool """ reward = 0 for i in range(times - FRAMES_IN_POOL): reward += self.ale.act(self.legal_actions[a]) # Only need to add the last FRAMES_IN_POOL frames to the frame pool for i in range(FRAMES_IN_POOL): reward += self.ale.act(self.legal_actions[a]) self.frame_pool.new_frame(self.__get_screen_image()) return reward def get_initial_state(self): """ Get the initial state """ self.__new_game() for step in range(4): _ = self.__action_repeat(0) self.observation_pool.new_observation( self.frame_pool.get_processed_frame()) if self.__is_terminal(): raise Exception('This should never happen.') return self.observation_pool.get_pooled_observations() def next(self, action): """ Get the next state, reward, and game over signal """ reward = self.__action_repeat(np.argmax(action)) self.observation_pool.new_observation( self.frame_pool.get_processed_frame()) terminal = self.__is_terminal() self.lives = self.ale.lives() observation = self.observation_pool.get_pooled_observations() return observation, reward, terminal def __is_terminal(self): if self.single_life_episodes: return self.__is_over() or (self.lives > self.ale.lives()) else: return self.__is_over() def __is_over(self): return self.ale.game_over() def get_noop(self): return [1.0, 0.0]
class AtariEnvironment: """ Environment for playing Atari games using ALE Interface """ def __init__(self, game_path, **kwargs): """ Create an environment with the provided game """ # Optional parameters self.screen_size = kwargs.get('screen_size', (84, 84)) # self.random_seed = kwargs.get('seed', 123) self.random_seed = kwargs.get('seed', 0) # Buffer for grabbing the screen from ALE self.screen_buffer = np.zeros((100800, ), np.uint8) # Create the ALE interface and load the game self.ale = ALEInterface() self.ale.setBool('color_averaging', True) self.ale.setFloat('repeat_action_probability', 0.0) self.ale.setInt('random_seed', self.random_seed) self.ale.loadROM(game_path) # Grab the set of available moves for this game self.move_list = self.ale.getMinimalActionSet() self.num_actions = len(self.move_list) print "Number of Actions:", self.num_actions self.listeners = [] self.screen = None # self.screen = pygame.display.set_mode((160,210)) def get_state(self): """ Convert current screen to 84x84 np array of luminescence values. """ self.ale.getScreenRGB(self.screen_buffer) # Reshape the screen buffer to an appropriate shape game_screen = self.screen_buffer.reshape((210, 160, 3)) # Convert to luminosity and scale to the desired screen size gray_screen = np.dot(game_screen, np.array([0.299, 0.587, 0.114])).astype(np.uint8) gray_screen = ndimage.zoom(gray_screen, (0.4, 0.525)) return gray_screen def act(self, action): """ Perform an action on the environment """ ale_action = self.move_list[action] return self.ale.act(ale_action) def terminal(self): """ Return if the state is a terminal state """ return self.ale.game_over() def lives(self): """ How many lives are left """ return self.ale.lives() def reset_game(self): """ """ self.ale.reset_game() def display(self): """ """ game_screen = self.screen_buffer.reshape((210, 160, 3)) game_surf = pygame.surfarray.make_surface(game_screen) game_surf = pygame.transform.rotate(game_surf, -90) self.screen.blit(game_surf, (0, 0)) pygame.display.flip()