class Emulator(object): FRAME_SKIP = 4 SCREEN_WIDTH = 84 SCREEN_HEIGHT = 84 def __init__(self, rom): self.ale = ALEInterface() self.max_num_frames_per_episode = 100000 #self.ale.getInt('max_num_frames_per_episode') self.ale.setInt('frame_skip', self.FRAME_SKIP) self.ale.loadROM('roms/' + rom) self.actions = self.ale.getMinimalActionSet() def reset(self): self.ale.reset_game() def image(self): screen = self.ale.getScreenGrayscale() screen = cv2.resize(screen, (self.SCREEN_HEIGHT, self.SCREEN_WIDTH)) return np.reshape(screen, (self.SCREEN_HEIGHT, self.SCREEN_WIDTH)) def act(self, action): return self.ale.act(action) def terminal(self): return self.ale.game_over()
class Emulate: def __init__(self, rom_file, display_screen=False,frame_skip=4,screen_height=84,screen_width=84,repeat_action_probability=0,color_averaging=True,random_seed=0,record_screen_path='screen_pics',record_sound_filename=None,minimal_action_set=True): self.ale = ALEInterface() if display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', frame_skip) self.ale.setFloat('repeat_action_probability', repeat_action_probability) self.ale.setBool('color_averaging', color_averaging) if random_seed: self.ale.setInt('random_seed', random_seed) self.ale.loadROM(rom_file) if minimal_action_set: self.actions = self.ale.getMinimalActionSet() else: self.actions = self.ale.getLegalActionSet() self.dims = (screen_width,screen_height) def numActions(self): return len(self.actions) def getActions(self): return self.actions def restart(self): self.ale.reset_game() def act(self, action): reward = self.ale.act(self.actions[action]) return reward def getScreen(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, self.dims) return resized def getScreenGray(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, self.dims) rotated = np.rot90(resized,k=1) return rotated def getScreenColor(self): screen = self.ale.getScreenRGB() resized = cv2.resize(screen, self.dims) rotated = np.rot90(resized,k=1) return rotated def isTerminal(self): return self.ale.game_over()
class pyrlcade_environment(object): def init(self,rom_file,ale_frame_skip): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode"); self.ale.set("random_seed",123) self.ale.set("disable_color_averaging",1) self.ale.set("frame_skip",ale_frame_skip) self.ale.loadROM(rom_file) self.legal_actions = self.ale.getMinimalActionSet() ram_size = self.ale.getRAMSize() self.ram = np.zeros((ram_size),dtype=np.uint8) self.ale.getRAM(self.ram) self.state = self.ale.getRAM(self.ram) def reset_state(self): self.ale.reset_game() def set_action(self,a): self.action = a def step(self): self.reward = self.ale.act(self.action) is_terminal = self.ale.game_over() return is_terminal def get_state(self): self.ale.getRAM(self.ram) return self.ram def get_reward(self): return self.reward
class Emulator(object): def __init__(self, settings): self.ale = ALEInterface() self.ale.setInt('frame_skip', settings['frame_skip']) self.ale.setInt('random_seed', np.random.RandomState().randint(1000)) self.ale.setBool('color_averaging', False) self.ale.loadROM('roms/' + settings['rom_name']) self.actions = self.ale.getMinimalActionSet() self.width = settings['screen_width'] self.height = settings['screen_height'] def reset(self): self.ale.reset_game() def image(self): screen = self.ale.getScreenGrayscale() screen = cv2.resize(screen, (self.height, self.width), interpolation=cv2.INTER_LINEAR) return np.reshape(screen, (self.height, self.width)) def full_image(self): screen = self.ale.getScreenRGB() return screen def act(self, action): return self.ale.act(self.actions[action]) def terminal(self): return self.ale.game_over()
def launch(): logging.basicConfig(level=logging.INFO) myArgs = getParameters() rom = myArgs.game full_rom_path = os.path.join(myArgs.base_rom_path,rom) rng = np.random.RandomState() ale = ALEInterface() ale.setInt('random_seed',38) ale.setBool('display_screen',myArgs.display_screen) ale.setInt('frame_skip',myArgs.frame_skip) ale.setFloat('repeat_action_probability',myArgs.repeat_action_probability) ale.loadROM(full_rom_path) valid_actions = ale.getMinimalActionSet() '''for episode in xrange(10): total_reward = 0 while not ale.game_over(): from random import randrange a = valid_actions[randrange(len(valid_actions))] ale.act(a) #print reward #print ale.getScreenRGB() #total_reward += reward #print 'Episode', episode, 'ended with score:', total_reward ale.reset_game() ''' memory_pool = ReplayMemory(myArgs.memory_size,rng) network_model = buildNetwork(myArgs.resized_height,myArgs.resized_width,myArgs.rmsp_epsilon,myArgs.rmsp_rho,myArgs.learning_rate,len(valid_actions)) ddqn = DDQN(network_model,valid_actions,myArgs.target_nn_update_frequency,myArgs.discount,myArgs.phi_len) agent = Agent(myArgs,ddqn,memory_pool,valid_actions,rng) train_agent = TrainMyAgent(myArgs,ale,agent,valid_actions,rng) train_agent.run()
class Environment: def __init__(self, rom_file, args): self.ale = ALEInterface() if args.display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', args.frame_skip) self.ale.setFloat('repeat_action_probability', args.repeat_action_probability) self.ale.setBool('color_averaging', args.color_averaging) if args.random_seed: self.ale.setInt('random_seed', args.random_seed) if args.record_screen_path: if not os.path.exists(args.record_screen_path): logger.info("Creating folder %s" % args.record_screen_path) os.makedirs(args.record_screen_path) logger.info("Recording screens to %s", args.record_screen_path) self.ale.setString('record_screen_dir', args.record_screen_path) if args.record_sound_filename: logger.info("Recording sound to %s", args.record_sound_filename) self.ale.setBool('sound', True) self.ale.setString('record_sound_filename', args.record_sound_filename) self.ale.loadROM(rom_file) if args.minimal_action_set: self.actions = self.ale.getMinimalActionSet() logger.info("Using minimal action set with size %d" % len(self.actions)) else: self.actions = self.ale.getLegalActionSet() logger.info("Using full action set with size %d" % len(self.actions)) logger.debug("Actions: " + str(self.actions)) self.dims = (args.screen_height, args.screen_width) def numActions(self): return len(self.actions) def restart(self): self.ale.reset_game() def act(self, action): reward = self.ale.act(self.actions[action]) return reward def getScreen(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, self.dims) return resized def isTerminal(self): return self.ale.game_over()
def map_game_to_ALE(game_name, interactive): game_path = '/cvgl/u/nishith/MultiTaskRL/libs/DQN_ale/roms/' \ + game_name + '.bin' print game_path game = ALEInterface() if interactive: setup_display(game) game.loadROM(game_path) return game
class AtariMDP(MDP, Serializable): def __init__(self, rom_path, obs_type=OBS_RAM, frame_skip=4): Serializable.__init__(self, rom_path, obs_type, frame_skip) self.options = (rom_path, obs_type, frame_skip) self.ale = ALEInterface() self.ale.loadROM(rom_path) self._rom_path = rom_path self._obs_type = obs_type self._action_set = self.ale.getMinimalActionSet() self.frame_skip = frame_skip def get_image(self): return to_rgb(self.ale) def get_ram(self): return to_ram(self.ale) def game_over(self): return self.ale.game_over() def reset_game(self): return self.ale.reset_game() @property def n_actions(self): return len(self.action_set) def get_obs(self): if self._obs_type == OBS_RAM: return self.get_ram()[None,:] else: assert self._obs_type == OBS_IMAGE return self.get_image()[None,:,:,:] def step(self, a): reward = 0.0 action = self.action_set[a] for _ in xrange(self.frame_skip): reward += self.ale.act(action) ob = self.get_obs().reshape(1,-1) return ob, np.array([reward]), self.ale.game_over() # return: (states, observations) def reset(self): self.ale.reset_game() return self.get_obs() @property def action_set(self): return self._action_set def plot(self): import cv2 cv2.imshow("atarigame",self.get_image()) #pylint: disable=E1101 cv2.waitKey(10) #pylint: disable=E1101
class Emulator: def __init__(self): self.ale = ALEInterface() # turn off the sound self.ale.setBool('sound', False) self.ale.setBool('display_screen', EMULATOR_DISPLAY) self.ale.setInt('frame_skip', FRAME_SKIP) self.ale.setFloat('repeat_action_probability', REPEAT_ACTION_PROBABILITY) self.ale.setBool('color_averaging', COLOR_AVERAGING) self.ale.setInt('random_seed', RANDOM_SEED) if RECORD_SCENE_PATH: self.ale.setString('record_screen_dir', RECORD_SCENE_PATH) self.ale.loadROM(ROM_PATH) self.actions = self.ale.getMinimalActionSet() logger.info("Actions: " + str(self.actions)) self.dims = DIMS #self.start_lives = self.ale.lives() def getActions(self): return self.actions def numActions(self): return len(self.actions) def restart(self): self.ale.reset_game() # can be omitted def act(self, action): reward = self.ale.act(self.actions[action]) return reward def getScreen(self): # why grayscale ? screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, self.dims) # normalize #resized /= COLOR_SCALE return resized def isTerminal(self): # while training deepmind only ends when agent dies #terminate = DEATH_END and TRAIN and (self.ale.lives() < self.start_lives) return self.ale.game_over()
def init(): pygame.init() rom_path = '/Users/maciej/Development/atari-roms' ale = ALEInterface() ale.setInt('random_seed', 123) ale.setBool('frame_skip', 1) ale.loadROM(rom_path + '/space_invaders.bin') ale.setFloat("repeat_action_probability", 0) return ale
def _init_ale(rand_seed, rom_file): assert os.path.exists(rom_file), '%s does not exists.' ale = ALEInterface() ale.setInt('random_seed', rand_seed) ale.setBool('showinfo', False) ale.setInt('frame_skip', 1) ale.setFloat('repeat_action_probability', 0.0) ale.setBool('color_averaging', False) ale.loadROM(rom_file) return ale
def peekActionSize(rom): if args.use_gym: import gym env = gym.make(args.gym_env) return env.action_space.n else: from ale_python_interface import ALEInterface ale = ALEInterface() ale.loadROM(rom.encode('ascii')) return len(ale.getMinimalActionSet())
def loadROM(self, rom_file): ALEInterface.loadROM(self, rom_file) if self.minimum_actions: self.legal_actions = self.getMinimalActionSet() else: self.legal_actions = self.getLegalActionSet() self.num_actions = len(self.legal_actions) self.setInt('frame_skip', self.frame_skip) if self.random_seed is not None: self.setInt('random_seed', self.random_seed) self.height, self.width = self.getScreenDims()
def init(game, display_screen=False, record_dir=None): if display_screen: import pygame pygame.init() ale = ALEInterface() ale.setBool('display_screen', display_screen) ale.setInt('random_seed', 123) if record_dir is not None: ale.setString("record_screen_dir", record_dir) ale.loadROM('{game}.bin'.format(game=game)) ale.setFloat("repeat_action_probability", 0) return ale
def init(display_screen=False): if display_screen: import pygame pygame.init() rom_path = '.' ale = ALEInterface() ale.setBool('display_screen', display_screen) ale.setInt('random_seed', 123) ale.setBool('frame_skip', 1) ale.loadROM(rom_path + '/space_invaders.bin') ale.setFloat("repeat_action_probability", 0) return ale
class Environment: def __init__(self, show_screen, history_length): self.ale = ALEInterface() self.ale.setInt('frame_skip', 4) self.history = None self.history_length = history_length if show_screen: self.display_screen() self.load_game() (screen_width, screen_height) = self.ale.getScreenDims() self.screen_data = np.empty((screen_height, screen_width, 1), dtype=np.uint8) # 210x160 screen data self.dims = (84, 84) # input size for neural network self.actions = [3, 0, 1, 4] # noop, left, right, fire, def display_screen(self): self.ale.setBool("display_screen", True) def turn_on_sound(self): self.ale.setBool("sound", True) def restart(self): """reset game""" self.ale.reset_game() def act(self, action): """:returns reward of an action""" return self.ale.act(self.actions[action]) def __get_screen(self): """:returns Grayscale thresholded resized screen image """ self.ale.getScreenGrayscale(self.screen_data) resized = cv2.resize(self.screen_data, self.dims) return resized def get_state(self): binary_screen = self.__get_screen() if self.history is None: self.history = deque(maxlen=self.history_length) for _ in range(self.history_length - 1): self.history.append(binary_screen) self.history.append(binary_screen) result = np.stack(self.history, axis=0) return result def isTerminal(self): """checks if game is over""" return self.ale.game_over() def load_game(self): """load game from file""" self.ale.loadROM("Breakout.bin")
def init(display_screen=False, record_dir=None): if display_screen: import pygame pygame.init() rom_path = '.' ale = ALEInterface() ale.setBool('display_screen', display_screen) ale.setInt('random_seed', 123) if record_dir is not None: ale.setString("record_screen_dir", record_dir) ale.loadROM(rom_path + '/space_invaders.bin') ale.setFloat("repeat_action_probability", 0) return ale
class AleInterface(object): def __init__(self, game, args): self.game = game self.ale = ALEInterface() # if sys.platform == 'darwin': # self.ale.setBool('sound', False) # Sound doesn't work on OSX # elif sys.platform.startswith('linux'): # self.ale.setBool('sound', True) # self.ale.setBool('display_screen', True) # self.ale.setBool('display_screen', args.display_screen) self.ale.setInt('frame_skip', args.frame_skip) self.ale.setFloat('repeat_action_probability', args.repeat_action_probability) self.ale.setBool('color_averaging', args.color_averaging) self.ale.setInt('random_seed', args.random_seed) # # if rand_seed is not None: # self.ale.setInt('random_seed', rand_seed) rom_file = "./roms/%s.bin" % game if not os.path.exists(rom_file): print "not found rom file:", rom_file sys.exit(-1) self.ale.loadROM(rom_file) self.actions = self.ale.getMinimalActionSet() def get_actions_num(self): return len(self.actions) def act(self, action): reward = self.ale.act(self.actions[action]) return reward def get_screen_gray(self): return self.ale.getScreenGrayscale() def get_screen_rgb(self): return self.ale.getScreenRGB() def game_over(self): return self.ale.game_over() def reset_game(self): return self.ale.reset_game()
class emulator: def __init__(self, rom_name, vis,windowname='preview'): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode"); self.ale.setInt("random_seed",123) self.ale.setInt("frame_skip",4) self.ale.loadROM('roms/' + rom_name ) self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() self.windowname = windowname for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i self.init_frame_number = 0 # print(self.legal_actions) self.screen_width,self.screen_height = self.ale.getScreenDims() print("width/height: " +str(self.screen_width) + "/" + str(self.screen_height)) self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow(self.windowname) def get_image(self): numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image def newGame(self): # Instead of resetting the game, we load a checkpoint and start from there. # self.ale.reset_game() self.ale.restoreState(self.ale.decodeState(checkpoints[random.randint(0,99)].astype('uint8'))) self.init_frame_number = self.ale.getFrameNumber() #self.ale.restoreState(self.ale.decodeState(np.reshape(checkpoint,(1009,1)))) return self.get_image() def next(self, action_indx): reward = self.ale.act(action_indx) nextstate = self.get_image() # scipy.misc.imsave('test.png',nextstate) if self.vis: cv2.imshow(self.windowname,nextstate) return nextstate, reward, self.ale.game_over() def get_frame_number(self): return self.ale.getFrameNumber() - self.init_frame_number
def __init__(self, rom_filename, seed=None, use_sdl=False, n_last_screens=4, frame_skip=4, treat_life_lost_as_terminal=True, crop_or_scale='scale', max_start_nullops=30, record_screen_dir=None): self.n_last_screens = n_last_screens self.treat_life_lost_as_terminal = treat_life_lost_as_terminal self.crop_or_scale = crop_or_scale self.max_start_nullops = max_start_nullops ale = ALEInterface() if seed is not None: assert seed >= 0 and seed < 2 ** 16, \ "ALE's random seed must be represented by unsigned int" else: # Use numpy's random state seed = np.random.randint(0, 2 ** 16) ale.setInt(b'random_seed', seed) ale.setFloat(b'repeat_action_probability', 0.0) ale.setBool(b'color_averaging', False) if record_screen_dir is not None: ale.setString(b'record_screen_dir', str.encode(record_screen_dir)) self.frame_skip = frame_skip if use_sdl: if 'DISPLAY' not in os.environ: raise RuntimeError( 'Please set DISPLAY environment variable for use_sdl=True') # SDL settings below are from the ALE python example if sys.platform == 'darwin': import pygame pygame.init() ale.setBool(b'sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool(b'sound', True) ale.setBool(b'display_screen', True) ale.loadROM(str.encode(rom_filename)) assert ale.getFrameNumber() == 0 self.ale = ale self.legal_actions = ale.getMinimalActionSet() self.initialize()
class Breakout(object): steps_between_actions = 4 def __init__(self): self.ale = ALEInterface() self.ale.setInt('random_seed', 123) self.ale.setBool("display_screen", False) self.ale.setBool("sound", False) self.ale.loadROM("%s/breakout.bin" % rom_directory) self.current_state = [ self.ale.getScreenRGB(), self.ale.getScreenRGB() ] def start_episode(self): self.ale.reset_game() def take_action(self, action): assert not self.terminated def step(): reward = self.ale.act(action) self.roll_state() return reward reward = sum(step() for _ in xrange(self.steps_between_actions)) return (reward, self.current_state) def roll_state(self): assert len(self.current_state) == 2 self.current_state = [self.current_state[1], self.ale.getScreenRGB()] assert len(self.current_state) == 2 @property def actions(self): return self.ale.getMinimalActionSet() @property def terminated(self): return self.ale.game_over() or self.ale.lives() < 5
def init_ale(rom, display): ale = ALEInterface() # Get & Set the desired settings ale.setInt(b'random_seed', 123) # Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = display if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool('sound', True) ale.setBool('display_screen', display) # Load the ROM file ale.loadROM(rom) return ale
class emulator: def __init__(self, rom_name, vis): if vis: import cv2 self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode"); self.ale.setInt("random_seed",123) self.ale.setInt("frame_skip",4) self.ale.loadROM('roms/' + rom_name ) self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i # print(self.legal_actions) self.screen_width,self.screen_height = self.ale.getScreenDims() print("width/height: " +str(self.screen_width) + "/" + str(self.screen_height)) self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow("preview") def get_image(self): numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image def newGame(self): self.ale.reset_game() return self.get_image() def next(self, action_indx): reward = self.ale.act(action_indx) nextstate = self.get_image() # scipy.misc.imsave('test.png',nextstate) if self.vis: cv2.imshow('preview',nextstate) return nextstate, reward, self.ale.game_over()
class Game(): """ Wrapper around the ALEInterface class. """ def __init__(self, rom_file, sdl=False): self.ale = ALEInterface() # Setup SDL if sdl: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool(b'sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool(b'sound', True) self.ale.setBool(b'display_screen', True) # Load rom self.ale.loadROM(str.encode(rom_file)) def get_action_set(self): return self.ale.getLegalActionSet() def get_minimal_action_set(self): return self.ale.getMinimalActionSet() def game_over(self): return self.ale.game_over() def act(self, action): return self.ale.act(action) def reset_game(self): self.ale.reset_game() def get_frame(self): return self.ale.getScreenRGB()
def get_random_baseline(gamepath): ale = ALEInterface() ale.setInt('random_seed', 42) recordings_dir = './recordings/breakout/' USE_SDL = True if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX #ale.setString("record_screen_dir", recordings_dir); elif sys.platform.startswith('linux'): ale.setBool('sound', True) ale.setBool('display_screen', True) # Load the ROM file ale.loadROM(gamepath) # Get the list of legal actions legal_actions = ale.getLegalActionSet() # Play 5 episodes rewards = [] for episode in xrange(10): total_reward = 0 while not ale.game_over(): a = legal_actions[randrange(len(legal_actions))] reward = ale.act(a); total_reward += reward rewards.append(total_reward) #print 'Episode', episode, 'ended with score:', total_reward ale.reset_game() avg_reward = sum(rewards) / float(len(rewards)) return avg_reward
def ale_load_from_rom(rom_path, display_screen): rng = get_numpy_rng() try: from ale_python_interface import ALEInterface except ImportError as e: raise ImportError('Unable to import the python package of Arcade Learning Environment. ' \ 'ALE may not have been installed correctly. Refer to ' \ '`https://github.com/mgbellemare/Arcade-Learning-Environment` for some' \ 'installation guidance') ale = ALEInterface() ale.setInt(b'random_seed', rng.randint(1000)) if display_screen: import sys if sys.platform == 'darwin': import pygame pygame.init() ale.setBool(b'sound', False) # Sound doesn't work on OSX ale.setBool(b'display_screen', True) else: ale.setBool(b'display_screen', False) ale.setFloat(b'repeat_action_probability', 0) ale.loadROM(str.encode(rom_path)) return ale
class Atari: def __init__(self,rom_name): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode") self.ale.setInt("random_seed",123) self.ale.setInt("frame_skip",4) self.ale.loadROM('./' +rom_name) self.screen_width,self.screen_height = self.ale.getScreenDims() self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i #print len(self.legal_actions) self.windowname = rom_name #cv2.startWindowThread() #cv2.namedWindow(rom_name) def get_image(self): numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image def newGame(self): self.ale.reset_game() return self.get_image() def next(self, action): reward = self.ale.act(self.legal_actions[np.argmax(action)]) nextstate = self.get_image() #cv2.imshow(self.windowname,nextstate) if self.ale.game_over(): self.newGame() #print "reward %d" % reward return nextstate, reward, self.ale.game_over()
def get_num_actions(rom_path, rom_name): from ale_python_interface import ALEInterface filename = '{0}/{1}.bin'.format(rom_path, rom_name) ale = ALEInterface() ale.loadROM(filename) return len(ale.getMinimalActionSet())
def __init__(self, random_seed, frame_skip, repeat_action_probability, sound, display_screen, block_state_repr=None, enemy_state_repr=None, friendly_state_repr=None): ale = ALEInterface() # Get & Set the desired settings if random_seed is not None: ale.setInt('random_seed', random_seed) ale.setInt('frame_skip', frame_skip) ale.setFloat('repeat_action_probability', repeat_action_probability) if display_screen: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', sound) ale.setBool('display_screen', display_screen) # Load the ROM file ale.loadROM('qbert.bin') # Get the list of legal actions legal_actions = ale.getLegalActionSet() minimal_actions = ale.getMinimalActionSet() logging.debug('Legal actions: {}'.format( [action_number_to_name(a) for a in legal_actions])) logging.debug('Minimal actions: {}'.format( [action_number_to_name(a) for a in minimal_actions])) width, height = ale.getScreenDims() rgb_screen = np.empty([height, width, 3], dtype=np.uint8) ram_size = ale.getRAMSize() ram = np.zeros(ram_size, dtype=np.uint8) # ALE components self.ale = ale self.lives = ale.lives() self.rgb_screen = rgb_screen self.ram_size = ale.getRAMSize() self.ram = ram # Verbose state representation self.desired_color = COLOR_YELLOW self.block_colors = INITIAL_COLORS self.enemies = INITIAL_ENEMY_POSITIONS self.friendlies = INITIAL_FRIENDLY_POSITIONS self.discs = INITIAL_DISCS self.current_row, self.current_col = 0, 0 self.level = 1 self.enemy_present = False self.friendly_present = False self.block_state_repr = block_state_repr self.enemy_state_repr = enemy_state_repr self.friendly_state_repr = friendly_state_repr self.num_colored_blocks = 0
class Agent(object): def __init__(self): self._ale = ALEInterface() self._ale.setInt('random_seed', 123) self._ale.setFloat('repeat_action_probability', 0.0) self._ale.setBool('color_averaging', False) self._ale.loadROM('roms/enduro.bin') self._controller = Controller(self._ale) self._extractor = StateExtractor(self._ale) self._image = None self.curr_action = 0 def run(self, learn, episodes=1, draw=False): """ Implements the playing/learning loop. Args: learn(bool): Whether the self.learn() function should be called. episodes (int): The number of episodes to run the agent for. draw (bool): Whether to overlay the environment state on the frame. Returns: None """ if learn: self.init_Q() action = random.choice( self.getActionsSet()) # init_action for q_learning for e in range(episodes): # Observe the environment to set the initial state (grid, self._image) = self._extractor.run(draw=draw, scale=4.0) self.initialise(grid) num_frames = self._ale.getFrameNumber() # Each episode lasts 6500 frames while self._ale.getFrameNumber() - num_frames < 6500: # Take an action self.act(action) # Update the environment grid s_grid = grid (grid, self._image) = self._extractor.run(draw=draw, scale=4.0) self.sense(grid) s_next_grid = grid # Perform learning if required if learn: # self.learn(s_grid,s_next_grid) # for q learning action = self.learn(s_grid, s_next_grid, action) self.callback(learn, e + 1, self._ale.getFrameNumber() - num_frames) self.end_state(e) self._ale.reset_game() def getActionsSet(self): """ Returns the set of all possible actions """ return [Action.ACCELERATE, Action.RIGHT, Action.LEFT, Action.BREAK] def move(self, action): """ Executes the action and advances the game to the next state. Args: action (int): The action which should executed. Make sure to use the constants returned by self.getActionsSet() Returns: int: The obtained reward after executing the action """ return self._controller.move(action) def initialise(self, grid): """ Called at the beginning of each episode, mainly used for state initialisation. Args: grid (np.ndarray): 11x10 array with the initial environment grid. Returns: None """ raise NotImplementedError def act(self): """ Called at each loop iteration to choose and execute an action. Returns: None """ raise NotImplementedError def sense(self, grid): """ Called at each loop iteration to construct the new state from the update environment grid. Returns: None """ raise NotImplementedError def learn(self): """ Called at each loop iteration when the agent is learning. It should implement the learning procedure. Returns: None """ raise NotImplementedError def callback(self, learn, episode, iteration): """ Called at each loop iteration mainly for reporting purposes. Args: learn (bool): Indicates whether the agent is learning or not. episode (int): The number of the current episode. iteration (int): The number of the current iteration. Returns: None """ def get_surround(): raise NotImplementedError
default=True, type=bool_arg, help="Whether or not to start with 30 noops for each env. Default True", dest="random_start") return parser if __name__ == "__main__": args = get_arg_parser().parse_args() from atari_emulator import AtariEmulator from ale_python_interface import ALEInterface filename = args.rom_path + "/" + args.game + ".bin" ale_int = ALEInterface() ale_int.loadROM(str.encode(filename)) num_actions = len(ale_int.getMinimalActionSet()) args.num_actions = num_actions args.random_seed = 3 ray.init() create_environment = lambda i: AtariEmulator.remote(i, args) emulators = np.asarray([create_environment(i) for i in range(4)]) variables = [(np.asarray([ ray.get(emulator.get_initial_state.remote()) for emulator in emulators ], dtype=np.uint8)), (np.zeros(4, dtype=np.float32)), (np.asarray([False] * 4, dtype=np.float32)), (np.zeros((4, num_actions), dtype=np.float32))]
vf.main_model.load_state_dict(tc.load(model_path)) vf.update_model.load_state_dict(tc.load(model_path)) gpu_dtype = tc.cuda.FloatTensor cpu_dtype = tc.FloatTensor # device = tc.device("cuda:0" if tc.cuda.is_available() else "cpu") # vf = nn.Neural_Net().to(device) # get screen or not USE_SDL = False if USE_SDL: ale.setBool(b'display_screen', True) # load game rom file name_of_the_game = 'space_invaders' game_path = '/home/juna/Documents/Projects/atari_project/Arcade-Learning-Environment/roms/' + name_of_the_game + '.bin' ale.loadROM(game_path.encode()) minimal_actions = ale.getMinimalActionSet() print('minimal_actions :\n', minimal_actions) screen_data = None #initialize the state image = ale.getScreenGrayscale(screen_data) image = impre(name_of_the_game, image) state = tc.stack((image, image, image, image), dim=0).unsqueeze(0).type(gpu_dtype) del image memory_buffer = []
episode, type(agent).__name__)) if agent.explorationProb > MINIMUM_EXPLORATION_EPSILON: agent.explorationProb -= reduce_exploration_prob_amount print('episode: {} ended with score: {}'.format(episode, total_reward)) ale.reset_game() return rewards if __name__ == '__main__': game = 'asterix.bin' gamepath = os.path.join('roms', game) agent = LEARNING_ALGORITHM ale = ALEInterface() ale.loadROM(gamepath) actions = ale.getMinimalActionSet() agent.actions = actions print actions if LOAD_WEIGHTS: agent.weights = file_utils.load_weights(WEIGHTS_FILENAME) rewards = train_agent( gamepath, agent, n_episodes=NUM_EPISODES, display_screen=DISPLAY_SCREEN, record_weights=RECORD_WEIGHTS, reduce_exploration_prob_amount=EXPLORATION_REDUCTION_AMOUNT, n_frames_to_skip=NUM_FRAMES_TO_SKIP)
def train_agent(gamepath, agent, n_episodes, display_screen, record_weights, reduce_exploration_prob_amount, n_frames_to_skip): """ :description: trains an agent to play a game :type gamepath: string :param gamepath: path to the binary of the game to be played :type agent: subclass RLAlgorithm :param agent: the algorithm/agent that learns to play the game :type n_episodes: int :param n_episodes: number of episodes of the game on which to train """ # load the ale interface to interact with ale = ALEInterface() ale.setInt('random_seed', 42) # display/recording settings, doesn't seem to work currently recordings_dir = './recordings/breakout/' # previously "USE_SDL" if display_screen: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX #ale.setString("record_screen_dir", recordings_dir); elif sys.platform.startswith('linux'): ale.setBool('sound', True) ale.setBool('display_screen', True) ale.loadROM(gamepath) ale.setInt("frame_skip", n_frames_to_skip) screen_preprocessor = screen_utils.RGBScreenPreprocessor() rewards = [] best_reward = 0 print('starting training...') for episode in xrange(n_episodes): action = 0 reward = 0 newAction = None total_reward = 0 counter = 0 lives = ale.lives() screen = np.zeros((32, 32, 3), dtype=np.int8) state = { "screen": screen, "objects": None, "prev_objects": None, "prev_action": 0, "action": 0 } while not ale.game_over(): # if newAction is None then we are training an off-policy algorithm # otherwise, we are training an on policy algorithm if newAction is None: action = agent.getAction(state) else: action = newAction reward += ale.act(action) if ale.lives() < lives: lives = ale.lives() reward -= 1 total_reward += reward new_screen = ale.getScreenRGB() new_screen = screen_preprocessor.preprocess(new_screen) new_state = { "screen": new_screen, "objects": None, "prev_objects": state["objects"], "prev_action": state["action"], "action": action } newAction = agent.incorporateFeedback(state, action, reward, new_state) state = new_state reward = 0 rewards.append(total_reward) if total_reward > best_reward and record_weights: best_reward = total_reward print("Best reward: {}".format(total_reward)) if episode % PRINT_TRAINING_INFO_PERIOD == 0: print '\n############################' print '### training information ###' print("Average reward: {}".format(np.mean(rewards))) print("Last 50: {}".format( np.mean(rewards[-NUM_EPISODES_AVERAGE_REWARD_OVER:]))) print("Exploration probability: {}".format(agent.explorationProb)) print('action: {}'.format(action)) print('size of weights dict: {}'.format(len(agent.weights))) print('current objects: {}'.format(state['objects'])) print('previous objects: {}'.format(state['prev_objects'])) avg_feat_weight = np.mean( [v for k, v in agent.weights.iteritems()]) print('average feature weight: {}'.format(avg_feat_weight)) print '############################' print '############################\n' if episode != 0 and episode % RECORD_WEIGHTS_PERIOD == 0 and record_weights: file_utils.save_rewards(rewards, filename='episode-{}-{}-rewards'.format( episode, type(agent).__name__)) file_utils.save_weights(agent.weights, filename='episode-{}-{}-weights'.format( episode, type(agent).__name__)) if agent.explorationProb > MINIMUM_EXPLORATION_EPSILON: agent.explorationProb -= reduce_exploration_prob_amount print('episode: {} ended with score: {}'.format(episode, total_reward)) ale.reset_game() return rewards
class AtariPlayer(RLEnvironment): """ A wrapper for atari emulator. Will automatically restart when a real episode ends (isOver might be just lost of lives but not game over). """ def __init__(self, rom_file, viz=0, height_range=(None, None), frame_skip=4, image_shape=(84, 84), nullop_start=30, live_lost_as_eoe=True, env_name="Boxing-v0"): """ :param rom_file: path to the rom :param frame_skip: skip every k frames and repeat the action :param image_shape: (w, h) :param height_range: (h1, h2) to cut :param viz: visualization to be done. Set to 0 to disable. Set to a positive number to be the delay between frames to show. Set to a string to be a directory to store frames. :param nullop_start: start with random number of null ops :param live_losts_as_eoe: consider lost of lives as end of episode. useful for training. """ super(AtariPlayer, self).__init__() if not os.path.isfile(rom_file) and '/' not in rom_file: rom_file = get_dataset_path('atari_rom', rom_file) assert os.path.isfile(rom_file), \ "rom {} not found. Please download at {}".format(rom_file, ROM_URL) try: ALEInterface.setLoggerMode(ALEInterface.Logger.Warning) except AttributeError: if execute_only_once(): logger.warn("You're not using latest ALE") # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86 with _ALE_LOCK: self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt(b"random_seed", self.rng.randint(0, 30000)) self.ale.setBool(b"showinfo", False) self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b'color_averaging', False) # manual.pdf suggests otherwise. self.ale.setFloat(b'repeat_action_probability', 0.0) # viz setup if isinstance(viz, six.string_types): assert os.path.isdir(viz), viz self.ale.setString(b'record_screen_dir', viz) viz = 0 if isinstance(viz, int): viz = float(viz) self.viz = viz if self.viz and isinstance(self.viz, float): self.windowname = os.path.basename(rom_file) cv2.startWindowThread() cv2.namedWindow(self.windowname) self.ale.loadROM(rom_file.encode('utf-8')) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() self.live_lost_as_eoe = live_lost_as_eoe self.frame_skip = frame_skip self.nullop_start = nullop_start self.height_range = height_range self.image_shape = image_shape self.current_episode_score = StatCounter() self.restart_episode() # re def _grab_raw_image(self): """ :returns: the current 3-channel image """ m = self.ale.getScreenRGB() return m.reshape((self.height, self.width, 3)) def current_state(self): """ :returns: a gray-scale (h, w, 1) uint8 image """ ret = self._grab_raw_image() # max-pooled over the last screen ret = np.maximum(ret, self.last_raw_screen) if self.viz: if isinstance(self.viz, float): cv2.imshow(self.windowname, ret) time.sleep(self.viz) ret = ret[self.height_range[0]:self.height_range[1], :].astype( 'float32') # 0.299,0.587.0.114. same as rgb2y in torch/image ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY) ret = cv2.resize(ret, self.image_shape) ret = np.expand_dims(ret, axis=2) return ret.astype('uint8') # to save some memory def get_action_space(self): return DiscreteActionSpace(len(self.actions)) def finish_episode(self): self.stats['score'].append(self.current_episode_score.sum) def restart_episode(self): self.current_episode_score.reset() with _ALE_LOCK: self.ale.reset_game() # random null-ops start n = self.rng.randint(self.nullop_start) self.last_raw_screen = self._grab_raw_image() for k in range(n): if k == n - 1: self.last_raw_screen = self._grab_raw_image() self.ale.act(0) def action(self, act): """ :param act: an index of the action :returns: (reward, isOver) """ oldlives = self.ale.lives() r = 0 for k in range(self.frame_skip): if k == self.frame_skip - 1: self.last_raw_screen = self._grab_raw_image() r += self.ale.act(self.actions[act]) newlives = self.ale.lives() if self.ale.game_over() or \ (self.live_lost_as_eoe and newlives < oldlives): break self.current_episode_score.feed(r) isOver = self.ale.game_over() if self.live_lost_as_eoe: isOver = isOver or newlives < oldlives if isOver: self.finish_episode() if self.ale.game_over(): self.restart_episode() return (r, isOver)
14 #11111 fire up/down/left/right (invalid) ) if(len(sys.argv) < 2): print("Usage ./ale_python_test_pygame_player.py <ROM_FILE_NAME>") sys.exit() ale = ALEInterface() max_frames_per_episode = ale.getInt("max_num_frames_per_episode"); ale.set("random_seed",123) random_seed = ale.getInt("random_seed") print("random_seed: " + str(random_seed)) ale.loadROM(sys.argv[1]) legal_actions = ale.getMinimalActionSet() print legal_actions (screen_width,screen_height) = ale.getScreenDims() print("width/height: " +str(screen_width) + "/" + str(screen_height)) (display_width,display_height) = (1024,420) #init pygame pygame.init() screen = pygame.display.set_mode((display_width,display_height)) pygame.display.set_caption("Arcade Learning Environment Player Agent Display") game_surface = pygame.Surface((screen_width,screen_height))
ale = ALEInterface() ale.setInt('random_seed', 123) ale.setInt("frame_skip",frameSkip) USE_SDL = True if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool('sound', False) ale.setBool('display_screen', display_screen) ale.loadROM("rom/Breakout.A26") legal_actions = ale.getMinimalActionSet() n_senses = 41*36 n_actions = len(legal_actions) temporal_window = 1 hiddenSize1 = 256 hiddenSize2 = 32 network_size = n_senses*(temporal_window) + n_actions*(temporal_window-1) dataRom = np.zeros([dataRomSize,n_senses + n_actions + 1],dtype= 'uint8') sess = tf.InteractiveSession(config=tf.ConfigProto(log_device_placement=True)) x = tf.placeholder(tf.float32, [None, network_size])
class ALEEnvironment(): def __init__(self, rom_file, args): self.ale = ALEInterface() self.histLen = 4 if args.display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', args.frame_skip) self.ale.setFloat('repeat_action_probability', 0.0) self.ale.setBool('color_averaging', args.color_averaging) #if args.random_seed: # self.ale.setInt('random_seed', args.random_seed) self.ale.setInt( 'random_seed', 0) #hoang addition to fix the random seed across all environment self.ale.loadROM(rom_file) if args.minimal_action_set: self.actions = self.ale.getMinimalActionSet() logger.info("Using minimal action set with size %d" % len(self.actions)) else: self.actions = self.ale.getLegalActionSet() logger.info("Using full action set with size %d" % len(self.actions)) logger.debug("Actions: " + str(self.actions)) self.screen_width = args.screen_width self.screen_height = args.screen_height self.mode = "train" self.life_lost = False self.initSrcreen = self.getScreen() print("size of screen is:", self.initSrcreen.shape) im = Image.fromarray(self.initSrcreen) im.save('initial_screen.jpeg') im = Image.open('initial_screen.jpeg') pix = im.load() # print "devil's color", pix[13,62] # print "agent's color", pix[42,33] # exit() # draw = ImageDraw.Draw(im) # draw.rectangle([(37, 29),(48, 37)], outline = 'red') # draw.rectangle([(69, 68), (73, 71)], outline = 'white') # draw.rectangle([(7, 41), (11, 45)], outline = 'white') # draw.rectangle([(11, 58), (15, 66)], outline = 'white') # draw.rectangle([(70, 20), (73, 35)], outline='white') #right door # draw.rectangle([(11, 68), (15, 71)], outline='white') # im.save('first_subgoal_box.jpeg') # exit() # use this tool to get bounding box: http://nicodjimenez.github.io/boxLabel/annotate.html self.goalSet = [] # goal 0 self.goalSet.append([[69, 68], [ 73, 71 ]]) # Lower Right Ladder. This is the box for detecting first subgoal # self.goalSet.append([[11, 58], [15, 66]]) # lower left ladder 3 # self.goalSet.append([[11, 68], [15, 71]]) # lower left ladder 3 # goal 2 self.goalSet.append([[7, 41], [11, 45]]) # Key. This will be second sub goal self.goalSet.append([[11, 68], [15, 71]]) # lower left ladder 3 # goal 4 self.goalSet.append( [[69, 68], [73, 71]]) # Lower Right Ladder again, this will be the third subgoal # goal 6 self.goalSet.append([[70, 20], [73, 35]]) # Right Door. This will be the 4th subgoal self.goalCenterLoc = [] for goal in self.goalSet: goalCenter = [ float(goal[0][0] + goal[1][0]) / 2, float(goal[0][1] + goal[1][1]) / 2 ] self.goalCenterLoc.append(goalCenter) self.agentOriginLoc = [42, 33] self.agentLastX = 42 self.agentLastY = 33 self.devilLastX = 0 self.devilLastY = 0 self.reachedGoal = [0, 0, 0, 0, 0, 0, 0] self.histState = self.initializeHistState() def initializeHistState(self): histState = np.concatenate((self.getState(), self.getState()), axis=2) histState = np.concatenate((histState, self.getState()), axis=2) histState = np.concatenate((histState, self.getState()), axis=2) return histState def numActions(self): return len(self.actions) def resetGoalReach(self): self.reachedGoal = [0, 0, 0, 0, 0, 0, 0, 0] def restart(self): self.ale.reset_game() self.life_lost = False self.reachedGoal = [0, 0, 0, 0, 0, 0, 0] for i in range(19): self.act(0) #wait for initialization self.histState = self.initializeHistState() self.agentLastX = self.agentOriginLoc[0] self.agentLastY = self.agentOriginLoc[1] """ def restart(self): # In test mode, the game is simply initialized. In train mode, if the game # is in terminal state due to a life loss but not yet game over, then only # life loss flag is reset so that the next game starts from the current # state. Otherwise, the game is simply initialized. if ( self.mode == 'test' or not self.life_lost or # `reset` called in a middle of episode self.ale.game_over() # all lives are lost ): self.ale.reset_game() self.life_lost = False self.reachedGoal = [0, 0, 0] for i in range(19): self.act(0) #wait for initialization self.histState = self.initializeHistState() self.agentLastX = self.agentOriginLoc[0] self.agentLastY = self.agentOriginLoc[1] """ def beginNextLife(self): self.life_lost = False self.reachedGoal = [0, 0, 0, 0, 0, 0, 0] for i in range(19): self.act(0) #wait for initialization self.histState = self.initializeHistState() self.agentLastX = self.agentOriginLoc[0] self.agentLastY = self.agentOriginLoc[1] def act(self, action): lives = self.ale.lives() reward = self.ale.act(self.actions[action]) self.life_lost = (not lives == self.ale.lives()) currState = self.getState() self.histState = np.concatenate((self.histState[:, :, 1:], currState), axis=2) return reward def getScreen(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, (self.screen_width, self.screen_height)) return resized def getScreenRGB(self): screen = self.ale.getScreenRGB() resized = cv2.resize(screen, (self.screen_width, self.screen_height)) #resized = screen return resized def getAgentLoc(self, img): # img = self.getScreenRGB() man = [200, 72, 72] mask = np.zeros(np.shape(img)) mask[:, :, 0] = man[0] mask[:, :, 1] = man[1] mask[:, :, 2] = man[2] diff = img - mask indxs = np.where(diff == 0) diff[np.where(diff < 0)] = 0 diff[np.where(diff > 0)] = 0 diff[indxs] = 255 if (np.shape(indxs[0])[0] == 0): mean_x = self.agentLastX mean_y = self.agentLastY else: mean_y = np.sum(indxs[0]) / np.shape(indxs[0])[0] mean_x = np.sum(indxs[1]) / np.shape(indxs[1])[0] self.agentLastX = mean_x self.agentLastY = mean_y return (mean_x, mean_y) def getDevilLoc(self, img): # img = self.getScreenRGB() # man = [0, 16, 2] devilColor = [236, 236, 236] mask = np.zeros(np.shape(img)) mask[:, :, 0] = devilColor[0] mask[:, :, 1] = devilColor[1] mask[:, :, 2] = devilColor[2] diff = img - mask indxs = np.where(diff == 0) diff[np.where(diff < 0)] = 0 diff[np.where(diff > 0)] = 0 diff[indxs] = 255 if (np.shape(indxs[0])[0] == 0): mean_x = self.devilLastX mean_y = self.devilLastY else: mean_y = np.sum(indxs[0]) / np.shape(indxs[0])[0] mean_x = np.sum(indxs[1]) / np.shape(indxs[1])[0] self.devilLastX = mean_x self.devilLastY = mean_y return (mean_x, mean_y) def distanceReward(self, lastGoal, goal): if (lastGoal == -1): lastGoalCenter = self.agentOriginLoc else: lastGoalCenter = self.goalCenterLoc[lastGoal] goalCenter = self.goalCenterLoc[goal] agentX, agentY = self.getAgentLoc() dis = np.sqrt((goalCenter[0] - agentX) * (goalCenter[0] - agentX) + (goalCenter[1] - agentY) * (goalCenter[1] - agentY)) disLast = np.sqrt((lastGoalCenter[0] - agentX) * (lastGoalCenter[0] - agentX) + (lastGoalCenter[1] - agentY) * (lastGoalCenter[1] - agentY)) disGoals = np.sqrt((goalCenter[0] - lastGoalCenter[0]) * (goalCenter[0] - lastGoalCenter[0]) + (goalCenter[1] - lastGoalCenter[1]) * (goalCenter[1] - lastGoalCenter[1])) return 0.001 * (disLast - dis) / disGoals # add color channel for input of network def getState(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, (self.screen_width, self.screen_height)) return np.reshape(resized, (84, 84, 1)) def getStackedState(self): return self.histState def isTerminal(self): if self.mode == 'train': return self.ale.game_over() or self.life_lost return self.ale.game_over() def isGameOver(self): return self.ale.game_over() def isLifeLost(self): return self.life_lost def reset(self): self.ale.reset_game() self.life_lost = False def goalReached(self, goal): # if goal in [0,2,4,6]: # those are original task where bounding boxes are used to detect the location of agents subset = [ 0, 2, 3, 4, 6 ] # those are original task where bounding boxes are used to detect the location of agents if goal in subset: # goal_index = goal/2 goal_index = subset.index(goal) goalPosition = self.goalSet[goal_index] goalScreen = self.initSrcreen stateScreen = self.getScreen() count = 0 for y in range(goalPosition[0][0], goalPosition[1][0]): for x in range(goalPosition[0][1], goalPosition[1][1]): if goalScreen[x][y] != stateScreen[x][y]: count = count + 1 # 30 is total number of pixels of agent if float(count) / 30 > 0.3: self.reachedGoal[goal] = 1 return True if goal == 1: # detect if agent is to the left of the devil # return self.agent_left_devil() return self.detect_left_ladder() ############## -- DML modified -- ########### # if goal == 4: # # detect if agent is to the right of the devil # # return self.agent_right_devil() # return self.detect_right_ladder() ################# -- end -- ########### if goal == 5: # detect if the agent is back to the original location return self.original_location_reached() return False def detect_right_ladder(self): goalPosition = self.goalSet[0] goalScreen = self.initSrcreen stateScreen = self.getScreen() count = 0 for y in range(goalPosition[0][0], goalPosition[1][0]): for x in range(goalPosition[0][1], goalPosition[1][1]): if goalScreen[x][y] != stateScreen[x][y]: count = count + 1 # 30 is total number of pixels of agent if float(count) / 30 > 0.3: goal = 5 self.reachedGoal[goal] = 1 return True return False def detect_left_ladder(self): goalPosition = self.goalSet[2] goalScreen = self.initSrcreen stateScreen = self.getScreen() count = 0 for y in range(goalPosition[0][0], goalPosition[1][0]): for x in range(goalPosition[0][1], goalPosition[1][1]): if goalScreen[x][y] != stateScreen[x][y]: count = count + 1 # 30 is total number of pixels of agent if float(count) / 30 > 0.3: goal = 5 self.reachedGoal[goal] = 1 return True return False def original_location_reached(self): img = self.getScreenRGB() (x, y) = self.getAgentLoc(img) # print "Agent's location:",x,y if abs(x - 42) <= 2 and abs(y - 33) <= 2: return True else: return False def pause(self): os.system('read -s -n 1 -p "Press any key to continue...\n"') def agent_left_devil(self): img = self.ale.getScreenRGB() (x, y) = self.getAgentLoc(img) (a, b) = self.getDevilLoc(img) # print "Agent's location:",x,y # print "Devil's location:", a,b if (a - x > 40) and (abs(y - b) <= 40): return True else: return False def agent_right_devil(self): img = self.getScreenRGB() (x, y) = self.getAgentLoc(img) (a, b) = self.getDevilLoc(img) # print "Agent's location:",x,y # print "Devil's location:",a,b # if (x-a > 25) and (abs(y-b) <= 40): if (x - a > 40) and (abs(y - b) <= 40): return True else: return False def goalNotReachedBefore(self, goal): if (self.reachedGoal[goal] == 1): return False return True
class ALEEnvironment(Environment): def __init__(self, rom_file, args): from ale_python_interface import ALEInterface self.ale = ALEInterface() # Set ALE configuration self.ale.setInt(b'frame_skip', args.frame_skip) self.ale.setFloat(b'repeat_action_probability', args.repeat_action_probability) self.ale.setBool(b'color_averaging', args.color_averaging) if args.random_seed: self.ale.setInt(b'random_seed', args.random_seed) if args.record_screen_path: if not os.path.exists(args.record_screen_path): os.makedirs(args.record_screen_path) self.ale.setString(b'record_screen_dir', args.record_screen_path.encode()) if args.record_sound_filename: self.ale.setBool(b'sound', True) self.ale.setString(b'record_sound_filename', args.record_sound_filename.encode()) # Load ROM self.ale.loadROM(rom_file.encode()) # Set game difficulty and mode (after loading) self.ale.setDifficulty(args.game_difficulty) self.ale.setMode(args.game_mode) # Whether to use minimum set or set if args.minimal_action_set: self.actions = self.ale.getMinimalActionSet() else: self.actions = self.ale.getLegalActionSet() # Life lost control self.life_lost = False # Initialize base class super(ALEEnvironment, self).__init__(args) def action_dim(self): return len(self.actions) def reset(self): # In test mode, the game is simply initialized. In train mode, if the game # is in terminal state due to a life loss but not yet game over, then only # life loss flag is reset so that the next game starts from the current # state. Otherwise, the game is simply initialized. if (self.mode == 'test' or not self.life_lost or # `reset` called in a middle of episode self.ale.game_over() # all lives are lost ): self.ale.reset_game() self.life_lost = False screen = self._get_state(self.ale.getScreenRGB()) return screen def step(self, action, action_b=0, ignore_screen=False): lives = self.ale.lives() # Act on environment reward = self.ale.act(self.actions[action], self.actions[action_b] + 18) # Check if life was lost self.life_lost = (not lives == self.ale.lives()) # Check terminal state terminal = (self.ale.game_over() or self.life_lost ) if self.mode == 'train' else self.ale.game_over() # Check if should ignore the screen (in case of RobotEnvironment) if ignore_screen: screen = None else: # Get screen from ALE screen = self._get_state(self.ale.getScreenRGB()) # Wait for next frame to start self.fps_control.wait_next_frame() return screen, reward, terminal
class AtariPlayer(gym.Env): """ A wrapper for ALE emulator, with configurations to mimic DeepMind DQN settings. Info: score: the accumulated reward in the current game gameOver: True when the current game is Over """ def __init__(self, rom_file, viz=0, frame_skip=4, nullop_start=30, live_lost_as_eoe=True, max_num_frames=0): """ Args: rom_file: path to the rom frame_skip: skip every k frames and repeat the action viz: visualization to be done. Set to 0 to disable. Set to a positive number to be the delay between frames to show. Set to a string to be a directory to store frames. nullop_start: start with random number of null ops. live_losts_as_eoe: consider lost of lives as end of episode. Useful for training. max_num_frames: maximum number of frames per episode. """ super(AtariPlayer, self).__init__() if not os.path.isfile(rom_file) and '/' not in rom_file: rom_file = get_dataset_path('atari_rom', rom_file) assert os.path.isfile(rom_file), \ "rom {} not found. Please download at {}".format(rom_file, ROM_URL) try: ALEInterface.setLoggerMode(ALEInterface.Logger.Error) except AttributeError: if execute_only_once(): logger.warn("You're not using latest ALE") # avoid simulator bugs: https://github.com/mgbellemare/Arcade-Learning-Environment/issues/86 with _ALE_LOCK: self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt(b"random_seed", self.rng.randint(0, 30000)) self.ale.setInt(b"max_num_frames_per_episode", max_num_frames) self.ale.setBool(b"showinfo", False) self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b'color_averaging', False) # manual.pdf suggests otherwise. self.ale.setFloat(b'repeat_action_probability', 0.0) # viz setup if isinstance(viz, six.string_types): assert os.path.isdir(viz), viz self.ale.setString(b'record_screen_dir', viz) viz = 0 if isinstance(viz, int): viz = float(viz) self.viz = viz if self.viz and isinstance(self.viz, float): self.windowname = os.path.basename(rom_file) cv2.namedWindow(self.windowname) self.ale.loadROM(rom_file.encode('utf-8')) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() self.live_lost_as_eoe = live_lost_as_eoe self.frame_skip = frame_skip self.nullop_start = nullop_start self.action_space = spaces.Discrete(len(self.actions)) self.observation_space = spaces.Box(low=0, high=255, shape=(self.height, self.width), dtype=np.uint8) self._restart_episode() def get_action_meanings(self): return [ACTION_MEANING[i] for i in self.actions] def _grab_raw_image(self): """ :returns: the current 3-channel image """ m = self.ale.getScreenRGB() return m.reshape((self.height, self.width, 3)) def _current_state(self): """ :returns: a gray-scale (h, w) uint8 image """ ret = self._grab_raw_image() # max-pooled over the last screen ret = np.maximum(ret, self.last_raw_screen) if self.viz: if isinstance(self.viz, float): cv2.imshow(self.windowname, ret) cv2.waitKey(int(self.viz * 1000)) ret = ret.astype('float32') # 0.299,0.587.0.114. same as rgb2y in torch/image ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)[:, :] return ret.astype('uint8') # to save some memory def _restart_episode(self): with _ALE_LOCK: self.ale.reset_game() # random null-ops start n = self.rng.randint(self.nullop_start) self.last_raw_screen = self._grab_raw_image() for k in range(n): if k == n - 1: self.last_raw_screen = self._grab_raw_image() self.ale.act(0) def reset(self): if self.ale.game_over(): self._restart_episode() return self._current_state() def render(self, *args, **kwargs): pass # visualization for this env is through the viz= argument when creating the player def step(self, act): oldlives = self.ale.lives() r = 0 for k in range(self.frame_skip): if k == self.frame_skip - 1: self.last_raw_screen = self._grab_raw_image() r += self.ale.act(self.actions[act]) newlives = self.ale.lives() if self.ale.game_over() or \ (self.live_lost_as_eoe and newlives < oldlives): break isOver = self.ale.game_over() if self.live_lost_as_eoe: isOver = isOver or newlives < oldlives info = {'ale.lives': newlives} return self._current_state(), r, isOver, info
class AtariEmulator: def __init__(self, rom, visualization = False, save = False, windowName = 'AtariGame'): self.ale = ALEInterface() # self.ale.setInt(b'frame_skip', 1) self.ale.setInt(b"random_seed", 123) # self.ale.setFloat(b'repeat_action_probability', 0) # default = 0.25 self.ale.loadROM(b'roms/' + rom) self.legalActions = self.ale.getMinimalActionSet() self.life_lost = False self.mode = 'train' self.visualization = visualization and not save self.windowName = windowName self.save = save self.totalReward = 0 if self.visualization: cv2.namedWindow(self.windowName) elif self.save: self.index = 0 self.bestReward = 0 self.totalReward = 0 if os.path.exists('result'): shutil.rmtree('result') if os.path.exists('best_result'): shutil.rmtree('best_result') if not os.path.exists('result'): os.mkdir('result') if not os.path.exists('best_result'): os.mkdir('best_result') def start(self): # In train mode: life_lost = True but game is not over, don't restart the game if self.mode == 'test' or not self.life_lost or self.ale.game_over(): self.ale.reset_game() self.life_lost = False return cv2.resize(self.ale.getScreenGrayscale(), (84, 110))[26:] def isTerminal(self): if self.mode == 'train': return self.ale.game_over() or self.life_lost return self.ale.game_over() def next(self, action): # index of action int legalActions lives = self.ale.lives() # the remaining lives reward = 0 for i in range(4): # action repeat reward += self.ale.act(self.legalActions[action]) self.life_lost = (lives != self.ale.lives()) # after action, judge life lost if self.mode == 'train' and self.life_lost: reward -= 1 if self.isTerminal(): break self.totalReward += reward state = self.ale.getScreenGrayscale() rgb_state = self.ale.getScreenRGB() if self.visualization: cv2.imshow(self.windowName, rgb_state) cv2.waitKey(10) elif self.save: cv2.imwrite(os.path.join('result', '%04d.png') % self.index, rgb_state) self.index += 1 if self.isTerminal(): print('Scores: %d, index: %d' % (self.totalReward, self.index)) if self.totalReward > self.bestReward: self.bestReward = self.totalReward copyDir('result', 'best_result') self.index = 0 self.totalReward = 0 return cv2.resize(state, (84, 110))[26:], reward, self.isTerminal() def setMode(self, mode): self.mode = mode def randomStart(self, s_t): channels = s_t.shape[-1] self.start() for i in range(np.random.randint(channels, 30) + 1): s_t_plus_1, r_t, isTerminal = self.next(0) s_t[..., 0:channels-1] = s_t[..., 1:channels] s_t[..., -1] = s_t_plus_1 if isTerminal: self.start()
saveData = False saveModel = False gamma = .99 learningRate = 0.00025 display_screen = False frameSkip = 4 ale = ALEInterface() ale.setInt('random_seed', 0) ale.setInt("frame_skip", frameSkip) ale.setBool('color_averaging', True) ale.setBool('sound', False) ale.setBool('display_screen', False) ale.setFloat("repeat_action_probability", 0.) t = ale.getFloat("repeat_action_probability") ale.loadROM("rom/breakout.bin") legal_actions = ale.getMinimalActionSet() width = 84 height = 84 memorySize = 1000000 maxEpisode = 10000000 maxFrame = 50000000 historyLength = 4 batchSize = 32 startLearningFrame = 50000 finalExplorationFrame = 1000000 # dummy = 30
def main(): pygame.init() ale = ALEInterface() ale.setInt(b'random_seed', 123) ale.setBool(b'display_screen', True) ale.setInt(b'frame_skip', 4) # ale.setFloat(b'repeat_action_probability', .7) # ale.setBool(b'color_averaging', True) game = 'breakout' #ACKTR tasks#, 'space_invaders', 'seaquest', 'qbert', 'pong', 'beam_rider', 'breakout' rom = home + '/Documents/ALE/roms/supported/' + game + '.bin' ale.loadROM(str.encode(rom)) legal_actions = ale.getLegalActionSet() rewards, num_episodes = [], 5 config = [] agent = DQN_agent(config) for episode in range(num_episodes): total_reward = 0 exp_state = [] exp_action = 0 exp_reward = 0 exp_next_state = [] while not ale.game_over(): #Save frame frame = ale.getScreenGrayscale() frame = cv2.resize(frame, (84, 84)) exp_next_state.append(frame) #Make action action = random.choice(legal_actions) reward = ale.act(action) total_reward += reward exp_reward += exp_reward #Make experience if len(exp_next_state) == 4: state_ready = np.reshape(np.stack(exp_next_state), [4 * 84, 84]) # cv2.imshow('image',state_ready) # cv2.waitKey(0) exp_action = action if len(exp_state) == 0: exp_state = exp_next_state else: experience = [ exp_state, exp_action, exp_reward, exp_next_state ] exp_reward = 0 exp_state = exp_next_state exp_next_state = [] print('Episode %d reward %d.' % (episode, total_reward)) rewards.append(total_reward) ale.reset_game() average = sum(rewards) / len(rewards) print('Average for %d episodes: %d' % (num_episodes, average))
class ALE(object): def __init__(self, init_seed, init_rand): self.ale = ALEInterface() self.ale.setInt(b'random_seed', init_seed) self.ale.setBool('display_screen', False) self.ale.setBool('sound', False) self.ale.setFloat(b'repeat_action_probability', 0.0) self.ale.loadROM('./breakout.bin') self.action_size = 4 self._screen = None self.reward = 0 self.terminal = True self.init_rand = init_rand #def setSetting(self, action_repeat, random_init_step, screen_type): def setSetting(self, action_repeat, screen_type): self.action_repeat = action_repeat self.screen_type = screen_type #self.random_init_step = random_init_step def _step(self, action): # debug transform if action == 2: action = 4 self.reward = self.ale.act(action) self.terminal = self.ale.game_over() if self.screen_type == 0: self._screen = self.ale.getScreenRGB() elif self.screen_type == 1: self._screen = self.ale.getScreenGrayscale() else: sys.stderr.write('screen_type error!') exit() def state(self): return self.reward, self.screen, self.terminal def act(self, action): cumulated_reward = 0 for _ in range(self.action_repeat): self._step(action) cumulated_reward += self.reward if self.terminal: break self.reward = cumulated_reward return self.state() def train_act(self, action): cumulated_reward = 0 for _ in range(self.action_repeat): self._step(action) cumulated_reward += self.reward if self.terminal: break self.reward = cumulated_reward return (self._screen, self.state()) def new_game(self): if self.ale.game_over(): self.ale.reset_game() if self.screen_type == 0: self._screen = self.ale.getScreenRGB() elif self.screen_type == 1: self._screen = self.ale.getScreenGrayscale() else: sys.stderr.write('screen_type error!') exit() self._step(0) #for _ in range(random.randint(0, self.random_init_step - 1)): for _ in range(self.init_rand): self._step(0) return self.screen @property def screen(self): return cv2.resize( cv2.cvtColor(self._screen, cv2.COLOR_RGB2GRAY) / 255., (84, 84))
class ArcadeLearningEnvironment(Environment): """ [Arcade Learning Environment](https://github.com/mgbellemare/Arcade-Learning-Environment) adapter (specification key: `ale`, `arcade_learning_environment`). May require: ```bash sudo apt-get install libsdl1.2-dev libsdl-gfx1.2-dev libsdl-image1.2-dev cmake git clone https://github.com/mgbellemare/Arcade-Learning-Environment.git cd Arcade-Learning-Environment mkdir build && cd build cmake -DUSE_SDL=ON -DUSE_RLGLUE=OFF -DBUILD_EXAMPLES=ON .. make -j 4 cd .. pip3 install . ``` Args: level (string): ALE rom file (<span style="color:#C00000"><b>required</b></span>). loss_of_life_termination: Signals a terminal state on loss of life (<span style="color:#00C000"><b>default</b></span>: false). loss_of_life_reward (float): Reward/Penalty on loss of life (negative values are a penalty) (<span style="color:#00C000"><b>default</b></span>: 0.0). repeat_action_probability (float): Repeats last action with given probability (<span style="color:#00C000"><b>default</b></span>: 0.0). visualize (bool): Whether to visualize interaction (<span style="color:#00C000"><b>default</b></span>: false). frame_skip (int > 0): Number of times to repeat an action without observing (<span style="color:#00C000"><b>default</b></span>: 1). seed (int): Random seed (<span style="color:#00C000"><b>default</b></span>: none). """ def __init__(self, level, life_loss_terminal=False, life_loss_punishment=0.0, repeat_action_probability=0.0, visualize=False, frame_skip=1, seed=None): super().__init__() from ale_python_interface import ALEInterface self.environment = ALEInterface() self.rom_file = level self.life_loss_terminal = life_loss_terminal self.life_loss_punishment = life_loss_punishment self.environment.setFloat(b'repeat_action_probability', repeat_action_probability) self.environment.setBool(b'display_screen', visualize) self.environment.setInt(b'frame_skip', frame_skip) if seed is not None: self.environment.setInt(b'random_seed', seed) # All set commands must be done before loading the ROM. self.environment.loadROM(rom_file=self.rom_file.encode()) self.available_actions = tuple(self.environment.getLegalActionSet()) # Full list of actions: # No-Op, Fire, Up, Right, Left, Down, Up Right, Up Left, Down Right, Down Left, Up Fire, # Right Fire, Left Fire, Down Fire, Up Right Fire, Up Left Fire, Down Right Fire, Down Left # Fire def __str__(self): return super().__str__() + '({})'.format(self.rom_file) def states(self): width, height = self.environment.getScreenDims() return dict(type='float', shape=(height, width, 3)) def actions(self): return dict(type='int', num_values=len(self.available_actions)) def close(self): self.environment.__del__() self.environment = None def get_states(self): screen = np.copy( self.environment.getScreenRGB(screen_data=self.screen)) screen = screen.astype(dtype=np.float32) / 255.0 return screen def reset(self): self.environment.reset_game() width, height = self.environment.getScreenDims() self.screen = np.empty((height, width, 3), dtype=np.uint8) self.lives = self.environment.lives() return self.get_states() def execute(self, actions): reward = self.environment.act(action=self.available_actions[actions]) terminal = self.environment.game_over() states = self.get_states() next_lives = self.environment.lives() if next_lives < self.lives: if self.life_loss_terminal: terminal = True elif self.life_loss_punishment > 0.0: reward -= self.life_loss_punishment self.lives = next_lives return states, terminal, reward
def get_num_actions(rom_path, rom_name): from ale_python_interface import ALEInterface filename = rom_path + "/" + rom_name + ".bin" ale = ALEInterface() ale.loadROM(filename) return len(ale.getMinimalActionSet())
class KungFuMaster(object): def __init__( self, rom='/home/josema/AI/ALE/Arcade-Learning-Environment/Roms/kung_fu_master.bin', trainsessionname='test'): self.agent = None self.isAuto = True self.gui_visible = False self.userquit = False self.optimalPolicyUser = False # optimal policy set by user self.trainsessionname = trainsessionname self.elapsedtime = 0 # elapsed time for this experiment self.keys = 0 # Configuration self.pause = False # game is paused self.debug = False self.sleeptime = 0.0 self.command = 0 self.iteration = 0 self.cumreward = 0 self.cumreward100 = 0 # cum reward for statistics self.cumscore100 = 0 self.ngoalreached = 0 self.max_level = 1 self.hiscore = 0 self.hireward = -1000000 self.resfile = open("data/" + self.trainsessionname + ".dat", "a+") self.legal_actions = 0 self.rom = rom self.key_status = [] def init(self, agent): # init after creation (uses args set from cli) self.ale = ALEInterface() self.ale.setInt('random_seed', 123) ram_size = self.ale.getRAMSize() self.ram = np.zeros((ram_size), dtype=np.uint8) if (self.gui_visible): os.environ['SDL_VIDEO_CENTERED'] = '1' if sys.platform == 'darwin': pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): pygame.init() self.ale.setBool('sound', True) self.ale.setBool('display_screen', False) self.ale.loadROM(self.rom) self.legal_actions = self.ale.getLegalActionSet() if (self.gui_visible): (self.screen_width, self.screen_height) = self.ale.getScreenDims() print("width/height: " + str(self.screen_width) + "/" + str(self.screen_height)) (display_width, display_height) = (1024, 420) self.screen = pygame.display.set_mode( (display_width, display_height)) pygame.display.set_caption( "Reinforcement Learning - Sapienza - Jose M Salas") self.numpy_surface = np.zeros( (self.screen_height, self.screen_width, 3), dtype=np.uint8) self.game_surface = pygame.Surface( (self.screen_width, self.screen_height)) pygame.display.flip() #init clock self.clock = pygame.time.Clock() self.agent = agent self.nactions = len( self.legal_actions ) # 0: not moving, 1: left, 2: right, 3: up, 4: down for i in range(self.nactions): self.key_status.append(False) print(self.nactions) # ns = 89999 # Number of statuses if we use enemy type ram info without level number #FINAL ns = 489999 # Number of statuses if we use enemy type ram info ns = 4899999 # Number of statuses if we use enemy type ram info # ns = 48999 print('Number of states: %d' % ns) self.agent.init(ns, self.nactions) # 1 for RA not used here def initScreen(self): if (self.gui_visible): if sys.platform == 'darwin': pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): pygame.init() self.ale.setBool('sound', True) self.ale.setBool('display_screen', False) if (self.gui_visible): (self.screen_width, self.screen_height) = self.ale.getScreenDims() print("width/height: " + str(self.screen_width) + "/" + str(self.screen_height)) (display_width, display_height) = (1024, 420) self.screen = pygame.display.set_mode( (display_width, display_height)) pygame.display.set_caption( "Reinforcement Learning - Sapienza - Jose M Salas") self.numpy_surface = np.zeros( (self.screen_height, self.screen_width, 3), dtype=np.uint8) self.game_surface = pygame.Surface( (self.screen_width, self.screen_height)) pygame.display.flip() #init clock self.clock = pygame.time.Clock() def reset(self): self.pos_x = 0 self.pos_y = 0 # Kung fu master observations self.enemy_pos = 0 self.n_enemies = 0 self.my_pos = 0 self.danger_pos = 0 self.danger_type = 0 self.enemy_type = 0 # 0, 1, 2, 3, 80, 81, 82, 40 self.blocked = 0 self.prev_blocked = 0 self.hold_hit = 0 self.time_left1 = 0 self.time_left2 = 0 self.my_energy = 39 self.previous_my_energy = 39 self.lifes = 3 self.previous_lifes = 3 self.got_hit = 0 self.got_blocked = 0 self.got_unblocked = 0 self.still_blocked = False self.starting_pos = 0 self.level = 1 self.score = 0 self.cumreward = 0 self.cumscore = 0 self.action_reward = 0 self.current_reward = 0 # accumulate reward over all events happened during this action until next different state self.prev_state = None # previous state self.firstAction = True # first action of the episode self.finished = False # episode finished self.newstate = True # new state reached self.numactions = 0 # number of actions in this episode self.iteration += 1 self.agent.optimal = self.optimalPolicyUser or ( self.iteration % 100 ) == 0 # False #(random.random() < 0.5) # choose greedy action selection for the entire episode def pair_function(self): # Combine the number of enemies, player blocked and danger type information into 7 different states if self.n_enemies > 0: self.danger_type = 0 # print (str(self.n_enemies) + " - " + str(self.danger_type) + ' - ' + str(self.blocked)) pair = (int)( (0.5 * (self.n_enemies + self.danger_type) * (self.n_enemies + self.danger_type + 1) + self.danger_type + 1) * (1 - (self.blocked / 128))) if pair > 8: return 5 #game not started yet else: return pair def enemy_type_s(self): if self.enemy_type > 127: return (self.enemy_type - 128 + 4) elif self.enemy_type == 64: return 8 else: return self.enemy_type def getstate(self): # print ('enemy type: ' + str(self.enemy_type_s()) + 'level: ' + str(self.level -1) ) x = (int)((self.level - 1) * 1000000 + self.pair_function() * 100000 + (self.enemy_type_s() * 10000) + np.rint(self.my_pos / 32) * 1000 + np.rint(self.enemy_pos / 32) * 100 + np.rint(self.danger_pos / 32) * 10 + np.rint(self.hold_hit / 16)) #3FINAL x = (int)((self.enemy_type_s()*1000) + (self.level-1)*100000 + self.pair_function()*10000 + np.rint(self.enemy_pos/32)*100 + np.rint(self.danger_pos/32)*10 + np.rint(self.hold_hit/16)) #2NO LEVEL x = (int)((self.enemy_type_s()*1000) + self.pair_function()*10000 + np.rint(self.enemy_pos/32)*100 + np.rint(self.danger_pos/32)*10 + np.rint(self.hold_hit/16)) #1NO ENEMY TYPE x = (int)((self.level-1)*10000 + self.pair_function()*1000 + np.rint(self.enemy_pos/32)*100 + np.rint(self.danger_pos/32)*10 + np.rint(self.hold_hit/16)) return x def goal_reached(self): #return (self.my_energy>0 and self.time_left1==0 and self.time_left2<5) #and self.my_energy==39) return (self.level == 5) def update(self, a): self.command = a # Update RAM self.ale.getRAM(self.ram) # Get info from RAM self.enemy_pos = self.ram[72] self.n_enemies = self.ram[91] self.danger_pos = self.ram[73] self.my_pos = self.ram[74] self.hold_hit = self.ram[77] self.enemy_type = self.ram[54] if self.level < self.ram[31]: self.starting_pos = self.ram[74] self.level = self.ram[31] self.max_level = max(self.level, self.max_level) # Danger/Enemy position: # 49 = no danger # 50 = danger approaching from left # 208 = danger approaching from right # ram[96] = 6, danger comes from top # ram[96] = 29, danger comes from bottom # ram[96] = 188, none if self.ram[96] == 6: self.danger_type = 0 elif self.ram[96] == 29: self.danger_type = 1 else: self.danger_type = 2 self.time_left1 = self.ram[27] self.time_left2 = self.ram[28] self.previous_my_energy = self.my_energy self.my_energy = self.ram[75] if self.my_energy < self.previous_my_energy and not self.still_blocked and self.ram[ 34] == 0: self.got_hit = STATES['GotHit'] else: self.got_hit = 0 self.previous_lifes = self.lifes self.lifes = self.ram[29] self.prev_blocked = self.blocked self.blocked = self.ram[61] if self.blocked > self.prev_blocked and not self.still_blocked: self.got_blocked = STATES['GotBlocked'] self.still_blocked = True self.got_unblocked = 0 elif self.blocked < self.prev_blocked and self.still_blocked: self.got_unblocked = STATES['GotUnblocked'] self.still_blocked = False self.got_blocked = 0 else: self.got_blocked = 0 self.got_unblocked = 0 # print ('enemy_pos=' +str(self.enemy_pos) + ' - danger_pos=' + str(self.danger_pos) + ' - my_position=' # + str(self.my_pos) + ' - my_energy=' + str(self.my_energy) + ' - blocked=' + str(self.blocked) + ' - danger_type=' + str(self.danger_type)) self.prev_state = self.getstate() # remember previous state # print " == Update start ",self.prev_state," action",self.command self.current_reward = 0 # accumulate reward over all events happened during this action until next different state #print('self.current_reward = 0') self.numactions += 1 # total number of actions axecuted in this episode # while (self.prev_state == self.getstate()): if (self.firstAction): self.starting_pos = self.ram[74] self.firstAction = False self.current_reward = self.ale.act(a) else: self.current_reward = self.ale.act(a) if self.ram[34] == 0: #only when playing if (a == 3 and self.starting_pos < self.my_pos) or ( a == 4 and self.starting_pos > self.my_pos): self.action_reward = STATES['MoveFW'] elif (a == 3 and self.starting_pos > self.my_pos) or ( a == 4 and self.starting_pos < self.my_pos): self.action_reward = STATES['MoveBW'] else: self.action_reward = STATES['NotMoving'] self.score += self.current_reward self.current_reward += self.action_reward # print('score= ' + str(self.score) + ' current reward=' +str(np.rint(self.current_reward))+ ' - energy=' + str(self.my_energy/39.0) + # ' - got_hot='+ str(self.got_hit) + ' - got_blocked=' + str(self.got_blocked) + ' - got_unblocked=' + str(self.got_unblocked)) # check if episode terminated #self.draw_screen if self.goal_reached(): self.current_reward += STATES['Alive'] self.ngoalreached += 1 #self.ale.reset_game() self.finished = True if (self.ale.game_over()): self.current_reward += STATES['Dead'] if self.level > 1: print('game over in level ' + str(self.level)) if self.my_energy > 0 and self.lifes == 3: print('Game over alive????') self.ale.reset_game() self.finished = True if self.level > 2: if self.gui_visible == False: self.gui_visible = True self.initScreen() #print " ** Update end ",self.getstate(), " prev ",self.prev_state def input(self): self.isPressed = False if self.gui_visible: for event in pygame.event.get(): if event.type == pygame.QUIT: return False if event.type == pygame.KEYDOWN: if event.key == pygame.K_SPACE: self.pause = not self.pause print "Game paused: ", self.pause elif event.key == pygame.K_a: self.isAuto = not self.isAuto self.sleeptime = int(self.isAuto) * 0.07 elif event.key == pygame.K_s: self.sleeptime = 1.0 self.agent.debug = False elif event.key == pygame.K_d: self.sleeptime = 0.07 self.agent.debug = False elif event.key == pygame.K_f: self.sleeptime = 0.005 self.agent.debug = False elif event.key == pygame.K_g: self.sleeptime = 0.0 self.agent.debug = False elif event.key == pygame.K_o: self.optimalPolicyUser = not self.optimalPolicyUser print "Best policy: ", self.optimalPolicyUser elif event.key == pygame.K_q: self.userquit = True print "User quit !!!" else: pressed = pygame.key.get_pressed() self.keys = 0 self.keys |= pressed[pygame.K_UP] self.keys |= pressed[pygame.K_DOWN] << 1 self.keys |= pressed[pygame.K_LEFT] << 2 self.keys |= pressed[pygame.K_RIGHT] << 3 self.keys |= pressed[pygame.K_z] << 4 self.command = key_action_tform_table[self.keys] self.key_status[self.command] = True if event.type == pygame.KEYUP: pressed = pygame.key.get_pressed() self.keys = 0 self.keys |= pressed[pygame.K_UP] self.keys |= pressed[pygame.K_DOWN] << 1 self.keys |= pressed[pygame.K_LEFT] << 2 self.keys |= pressed[pygame.K_RIGHT] << 3 self.keys |= pressed[pygame.K_z] << 4 self.command = key_action_tform_table[self.keys] self.key_status[self.command] = False if not (True in self.key_status): self.command = 0 return True def getUserAction(self): return self.command def getreward(self): r = np.rint( self.current_reward ) + self.got_hit + self.got_blocked + self.got_unblocked - np.rint( self.blocked / 128) self.cumreward += r return r def print_report(self, printall=False): toprint = printall ch = ' ' if (self.agent.optimal): ch = '*' toprint = True s = 'Iter %6d, sc: %3d, l: %d, na: %4d, r: %5d %c' % ( self.iteration, self.score, self.level, self.numactions, self.cumreward, ch) if self.score > self.hiscore: self.hiscore = self.score s += ' HISCORE ' toprint = True if self.cumreward > self.hireward: self.hireward = self.cumreward s += ' HIREWARD ' toprint = True if (toprint): print(s) self.cumreward100 += self.cumreward self.cumscore100 += self.score numiter = 100 if (self.iteration % numiter == 0): #self.doSave() pgoal = float(self.ngoalreached * 100) / numiter print( '----------------------------------------------------------------------------------------------------------------------' ) print( "%s %6d avg last 100: reward %d | score %.2f | level %d | p goals %.1f %%" % (self.trainsessionname, self.iteration, self.cumreward100 / 100, float(self.cumscore100) / 100, self.max_level, pgoal)) print( '----------------------------------------------------------------------------------------------------------------------' ) self.cumreward100 = 0 self.cumscore100 = 0 self.ngoalreached = 0 sys.stdout.flush() self.resfile.write( "%d,%d,%d,%d\n" % (self.score, self.cumreward, self.goal_reached(), self.numactions)) self.resfile.flush() def draw(self): if self.gui_visible: self.screen.fill((0, 0, 0)) self.ale.getScreenRGB(self.numpy_surface) pygame.surfarray.blit_array( self.game_surface, np.transpose(self.numpy_surface, (1, 0, 2))) # pygame.pixelcopy.array_to_surface(self.game_surface, np.transpose(self.numpy_surface,(1,0,2))) self.screen.blit( pygame.transform.scale2x( pygame.transform.scale( self.game_surface, (self.screen_height, self.screen_height))), (0, 0)) #Display ram bytes font = pygame.font.SysFont("Ubuntu Mono", 32) text = font.render("RAM: ", 1, (255, 208, 208)) self.screen.blit(text, (430, 10)) font = pygame.font.SysFont("Ubuntu Mono", 25) height = font.get_height() * 1.2 line_pos = 40 ram_pos = 0 while (ram_pos < 128): ram_string = ''.join([ "%02X " % self.ram[x] for x in range(ram_pos, min(ram_pos + 16, 128)) ]) text = font.render(ram_string, 1, (255, 255, 255)) self.screen.blit(text, (440, line_pos)) line_pos += height ram_pos += 16 #display current action font = pygame.font.SysFont("Ubuntu Mono", 32) text = font.render("Current Action: " + str(self.command), 1, (208, 208, 255)) height = font.get_height() * 1.2 self.screen.blit(text, (430, line_pos)) line_pos += height #display reward font = pygame.font.SysFont("Ubuntu Mono", 30) text = font.render("Total Reward: " + str(self.cumreward), 1, (208, 255, 255)) self.screen.blit(text, (430, line_pos)) pygame.display.flip() # clock.tick(60.) else: return 0 def quit(self): self.resfile.close() pygame.quit()
class AtariEmulator: def __init__(self, args): ''' Initialize Atari environment ''' # Parameters self.buffer_length = args.buffer_length self.screen_dims = args.screen_dims self.frame_skip = args.frame_skip self.blend_method = args.blend_method self.reward_processing = args.reward_processing self.max_start_wait = args.max_start_wait self.history_length = args.history_length self.start_frames_needed = self.buffer_length - 1 + ((args.history_length - 1) * self.frame_skip) #Initialize ALE instance self.ale = ALEInterface() self.ale.setFloat(b'repeat_action_probability', 0.0) if args.watch: self.ale.setBool(b'sound', True) self.ale.setBool(b'display_screen', True) self.ale.loadROM(str.encode(args.rom_path + '/' + args.game + '.bin')) self.buffer = np.empty((self.buffer_length, 210, 160)) self.current = 0 self.action_set = self.ale.getMinimalActionSet() self.lives = self.ale.lives() self.reset() def get_possible_actions(self): ''' Return list of possible actions for game ''' return self.action_set def get_screen(self): ''' Add screen to frame buffer ''' self.buffer[self.current] = np.squeeze(self.ale.getScreenGrayscale()) self.current = (self.current + 1) % self.buffer_length def reset(self): self.ale.reset_game() self.lives = self.ale.lives() if self.max_start_wait < 0: print("ERROR: max start wait decreased beyond 0") sys.exit() elif self.max_start_wait <= self.start_frames_needed: wait = 0 else: wait = random.randint(0, self.max_start_wait - self.start_frames_needed) for _ in range(wait): self.ale.act(self.action_set[0]) # Fill frame buffer self.get_screen() for _ in range(self.buffer_length - 1): self.ale.act(self.action_set[0]) self.get_screen() # get initial_states state = [(self.preprocess(), 0, 0, False)] for step in range(self.history_length - 1): state.append(self.run_step(0)) # make sure agent hasn't died yet if self.isTerminal(): print("Agent lost during start wait. Decreasing max_start_wait by 1") self.max_start_wait -= 1 return self.reset() return state def run_step(self, action): ''' Apply action to game and return next screen and reward ''' raw_reward = 0 for step in range(self.frame_skip): raw_reward += self.ale.act(self.action_set[action]) self.get_screen() reward = None if self.reward_processing == 'clip': reward = np.clip(raw_reward, -1, 1) else: reward = raw_reward terminal = self.isTerminal() self.lives = self.ale.lives() return (self.preprocess(), action, reward, terminal, raw_reward) def preprocess(self): ''' Preprocess frame for agent ''' img = None if self.blend_method == "max": img = np.amax(self.buffer, axis=0) return cv2.resize(img, self.screen_dims, interpolation=cv2.INTER_LINEAR) def isTerminal(self): return (self.isGameOver() or (self.lives > self.ale.lives())) def isGameOver(self): return self.ale.game_over()
import cv2 import random import threading import sys import time import os from replayMemory import ReplayMemory from buildGraph import createQNetwork, build_train_op ale = ALEInterface() viz = False rom_name = "roms/Breakout.bin" ale.setBool('sound', False) ale.setBool('display_screen', viz) ale.setInt("frame_skip", 4) ale.loadROM(rom_name) legal_actions = ale.getMinimalActionSet() action_map = {} for i in range(len(legal_actions)): action_map[i] = legal_actions[i] action_num = len(action_map) class config: batch_size = args.batch_size action_num = action_num replay_memory_capacity = args.replay_memory_capacity steps_before_training = args.steps_before_training buff_size = 4 device = args.device gamma = args.gamma
class Emulator: def __init__(self, rom_path, rom_name, visualize, actor_id, rseed, single_life_episodes=False): self.ale = ALEInterface() self.ale.setInt("random_seed", rseed * (actor_id + 1)) # For fuller control on explicit action repeat (>= ALE 0.5.0) self.ale.setFloat("repeat_action_probability", 0.0) # Disable frame_skip and color_averaging # See: http://is.gd/tYzVpj self.ale.setInt("frame_skip", 1) self.ale.setBool("color_averaging", False) self.ale.loadROM(rom_path + "/" + rom_name + ".bin") self.legal_actions = self.ale.getMinimalActionSet() self.screen_width, self.screen_height = self.ale.getScreenDims() #self.ale.setBool('display_screen', True) # Processed historcal frames that will be fed in to the network # (i.e., four 84x84 images) self.screen_images_processed = np.zeros( (IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES)) self.rgb_screen = np.zeros((self.screen_height, self.screen_width, 3), dtype=np.uint8) self.gray_screen = np.zeros((self.screen_height, self.screen_width, 1), dtype=np.uint8) self.frame_pool = np.empty((2, self.screen_height, self.screen_width)) self.current = 0 self.lives = self.ale.lives() self.visualize = visualize self.visualize_processed = False self.windowname = rom_name + ' ' + str(actor_id) if self.visualize: logger.debug("Opening emulator window...") #from skimage import io #io.use_plugin('qt') cv2.startWindowThread() cv2.namedWindow(self.windowname) logger.debug("Emulator window opened") if self.visualize_processed: logger.debug("Opening processed frame window...") cv2.startWindowThread() logger.debug("Processed frame window opened") cv2.namedWindow(self.windowname + "_processed") self.single_life_episodes = single_life_episodes def get_screen_image(self): """ Add screen (luminance) to frame pool """ # [screen_image, screen_image_rgb] = [self.ale.getScreenGrayscale(), # self.ale.getScreenRGB()] self.ale.getScreenGrayscale(self.gray_screen) self.ale.getScreenRGB(self.rgb_screen) self.frame_pool[self.current] = np.squeeze(self.gray_screen) self.current = (self.current + 1) % FRAMES_IN_POOL return self.rgb_screen def new_game(self): """ Restart game """ self.ale.reset_game() self.lives = self.ale.lives() if MAX_START_WAIT < 0: logger.debug("Cannot time travel yet.") sys.exit() elif MAX_START_WAIT > 0: wait = random.randint(0, MAX_START_WAIT) else: wait = 0 for _ in xrange(wait): self.ale.act(self.legal_actions[0]) def process_frame_pool(self): """ Preprocess frame pool """ img = None if BLEND_METHOD == "max_pool": img = np.amax(self.frame_pool, axis=0) #img resize(img[:210, :], (84, 84)) img = cv2.resize(img[:210, :], (84, 84), interpolation=cv2.INTER_LINEAR) img = img.astype(np.float32) img *= (1.0 / 255.0) return img # Reduce height to 210, if not so #cropped_img = img[:210, :] # Downsample to 110x84 #down_sampled_img = resize(cropped_img, (84, 84)) # Crop to 84x84 playing area #stackable_image = down_sampled_img[:, 26:110] #return stackable_image def action_repeat(self, a): """ Repeat action and grab screen into frame pool """ reward = 0 for i in xrange(ACTION_REPEAT): reward += self.ale.act(self.legal_actions[a]) new_screen_image_rgb = self.get_screen_image() return reward, new_screen_image_rgb def get_reshaped_state(self, state): return np.reshape(state, (1, IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES)) #return np.reshape(self.screen_images_processed, # (1, IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES)) def get_initial_state(self): """ Get the initial state """ self.new_game() for step in xrange(NR_IMAGES): reward, new_screen_image_rgb = self.action_repeat(0) self.screen_images_processed[:, :, step] = self.process_frame_pool() self.show_screen(new_screen_image_rgb) if self.is_terminal(): MAX_START_WAIT -= 1 return self.get_initial_state() return np.copy(self.screen_images_processed) #get_reshaped_state() def next(self, action): """ Get the next state, reward, and game over signal """ reward, new_screen_image_rgb = self.action_repeat(np.argmax(action)) self.screen_images_processed[:, :, 0:3] = \ self.screen_images_processed[:, :, 1:4] self.screen_images_processed[:, :, 3] = self.process_frame_pool() self.show_screen(new_screen_image_rgb) terminal = self.is_terminal() self.lives = self.ale.lives() return np.copy( self.screen_images_processed ), reward, terminal #get_reshaped_state(), reward, terminal def show_screen(self, image): """ Show visuals for raw and processed images """ if self.visualize: #io.imshow(image[:210, :], fancy=True) cv2.imshow(self.windowname, image[:210, :]) if self.visualize_processed: #io.imshow(self.screen_images_processed[:, :, 3], fancy=True) cv2.imshow(self.windowname + "_processed", self.screen_images_processed[:, :, 3]) def is_terminal(self): if self.single_life_episodes: return (self.is_over() or (self.lives > self.ale.lives())) else: return self.is_over() def is_over(self): return self.ale.game_over()
if frame_pooling_style == "color_averaging": ale.setInt('frame_skip', agent_params["frame_skip"]) # Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = False # True if USE_SDL: ale.setBool('display_screen', True) if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX # Load the ROM file ale.loadROM('../roms/' + sys.argv[1] + '.bin') #Get the list of available modes and difficulties avail_modes = ale.getAvailableModes() avail_diff = ale.getAvailableDifficulties() print 'Number of available modes: ', len(avail_modes) print 'Number of available difficulties: ', len(avail_diff) # Get the list of legal actions if use_minimal_actions: action_set = ale.getMinimalActionSet() else: action_set = ale.getLegalActionSet() agent_params["n_actions"] = len(action_set)
class Atari: def __init__(self, rom_dir): self.ale = ALEInterface() # Set settings self.ale.setInt("random_seed", 123) self.frame_skip = 1 self.ale.setInt("frame_skip", self.frame_skip) self.ale.setBool("display_screen", False) self.ale.setBool("sound", True) self.record_sound_for_user = True self.ale.setBool("record_sound_for_user", self.record_sound_for_user) # NOTE recording audio to file still works. But if both file recording and # record_sound_for_user are enabled, then only the latter is done # self.ale.setString("record_sound_filename", "") # Get settings self.ale.loadROM(rom_dir) self.action_count = 0 self.screen_width, self.screen_height = self.ale.getScreenDims() self.legal_actions = self.ale.getLegalActionSet() self.framerate = 60 # Should read from ALE settings technically self.samples_per_frame = 512 # Should read from ALE SoundExporter class technically self.audio_freq = self.framerate * self.samples_per_frame #/self.frame_skip self.all_audio = np.zeros((0, ), dtype=np.uint8) # Saving audio/video to disk for verification. self.save_to_file = True # NOTE set to False to test actual screen/audio query speed! if self.save_to_file: self.save_dir_av = './logs_av_seq_Example' # Save png sequence and audio wav file here self.save_dir_movies = './log_movies_Example' self.save_image_prefix = 'image_frames' self.save_audio_filename = 'audio_user_recorder.wav' self.create_save_dir(self.save_dir_av) def take_action(self): action = self.legal_actions[np.random.randint(self.legal_actions.size)] self.ale.act(action) def create_save_dir(self, directory): # Remove previous img/audio image logs if os.path.exists(directory): shutil.rmtree(directory) os.makedirs(directory) def get_image_and_audio(self): np_data_image = np.zeros(self.screen_width * self.screen_height * 3, dtype=np.uint8) if self.record_sound_for_user: np_data_audio = np.zeros(self.ale.getAudioSize(), dtype=np.uint8) self.ale.getScreenRGBAndAudio(np_data_image, np_data_audio) # Also supports independent audio queries if user desires: # self.ale.getAudio(np_data_audio) else: # np_data_audio = 0 np_data_audio = np.zeros(self.ale.getAudioSize(), dtype=np.uint8) self.ale.getAudio(np_data_audio) self.ale.getScreenRGB(np_data_image) return np.reshape(np_data_image, (self.screen_height, self.screen_width, 3)), np.asarray(np_data_audio) def audio_to_mfcc(self, audio): mfcc_data = mfcc(signal=audio, samplerate=self.audio_freq, winlen=0.002, winstep=0.0006) mfcc_data = np.swapaxes(mfcc_data, 0, 1) # Time on x-axis # Normalization min_data = np.min(mfcc_data.flatten()) max_data = np.max(mfcc_data.flatten()) mfcc_data = (mfcc_data - min_data) / (max_data - min_data) return mfcc_data def save_image(self, image): number = str(self.action_count).zfill(6) scipy.misc.imsave( os.path.join(self.save_dir_av, self.save_image_prefix + number + '.png'), image) def save_audio(self, audio): wavfile.write(os.path.join(self.save_dir_av, self.save_audio_filename), self.audio_freq, audio) def save_movie(self, movie_name): # Use ffmpeg to convert the saved img sequences and audio to mp4 # Video recording command = [ "ffmpeg", '-y', # overwrite output file if it exists '-r', str(self.framerate), # frames per second '-i', os.path.join(self.save_dir_av, self.save_image_prefix + '%6d.png') # Video input comes from pngs ] # Audio if available if self.record_sound_for_user: command.extend([ '-i', os.path.join(self.save_dir_av, self.save_audio_filename) ]) # Audio input comes from wav # Codecs and output command.extend([ '-c:v', 'libx264', # Video codec '-c:a', 'mp3', # Audio codec os.path.join(self.save_dir_movies, movie_name + '.mp4') # Output dir ]) # Make movie dir and write the mp4 if not os.path.exists(self.save_dir_movies): os.makedirs(self.save_dir_movies) sp.call( command ) # NOTE: needs ffmpeg! Will throw 'dir doesn't exist err' otherwise. def concat_image_audio(self, image, audio_mfcc): # Concatenates image and audio to test sync'ing in saved .mp4 audio_mfcc = scipy.misc.imresize(audio_mfcc, np.shape( image)) # Resize MFCC image to be same size as screen image cmap = plt.get_cmap('viridis') # Apply a colormap to spectrogram audio_mfcc = (np.delete(cmap(audio_mfcc), 3, 2) * 255.).astype( np.uint8) # Gray MFCC -> 4 channel colormap -> 3 channel colormap image = np.concatenate((image, audio_mfcc), axis=1) # Concat screen image and MFCC image return image def plot_mfcc(self, audio_mfcc): plt.clf() plt.imshow(audio_mfcc, interpolation='bilinear', cmap=plt.get_cmap('viridis')) plt.pause(0.001)
# print '' # initialization np.random.seed(SEED) ale = ALEInterface() if SEED == None: ale.setInt('random_seed', 0) else: ale.setInt('random_seed', SEED) ale.setInt("frame_skip",frameSkip) ale.setBool('color_averaging', True) ale.setBool('sound', False) ale.setBool('display_screen', False) ale.setFloat("repeat_action_probability", 0.0) ale.loadROM(romPath) legal_actions = ale.getMinimalActionSet() n_actions = len(legal_actions) opt.n_actions = n_actions explorationRateDelta = (initialExplorationRate - finalExplorationRate)/(finalExplorationFrame-startLearningFrame) explorationRate = initialExplorationRate + startLearningFrame*explorationRateDelta if networkType == "CNN": width = 84 height = 84 Dim = [height,width] else: width = 36 height = 41 Dim = [height*width]
class AtariPlayer(RLEnvironment): """ A wrapper for atari emulator. NOTE: will automatically restart when a real episode ends """ def __init__(self, rom_file, viz=0, height_range=(None, None), frame_skip=4, image_shape=(84, 84), nullop_start=30, live_lost_as_eoe=True): """ :param rom_file: path to the rom :param frame_skip: skip every k frames and repeat the action :param image_shape: (w, h) :param height_range: (h1, h2) to cut :param viz: visualization to be done. Set to 0 to disable. Set to a positive number to be the delay between frames to show. Set to a string to be a directory to store frames. :param nullop_start: start with random number of null ops :param live_losts_as_eoe: consider lost of lives as end of episode. useful for training. """ super(AtariPlayer, self).__init__() self.ale = ALEInterface() self.rng = get_rng(self) self.ale.setInt("random_seed", self.rng.randint(0, 10000)) self.ale.setBool("showinfo", False) try: ALEInterface.setLoggerMode(ALEInterface.Logger.Warning) except AttributeError: log_once() self.ale.setInt("frame_skip", 1) self.ale.setBool('color_averaging', False) # manual.pdf suggests otherwise. self.ale.setFloat('repeat_action_probability', 0.0) # viz setup if isinstance(viz, six.string_types): assert os.path.isdir(viz), viz self.ale.setString('record_screen_dir', viz) viz = 0 if isinstance(viz, int): viz = float(viz) self.viz = viz if self.viz and isinstance(self.viz, float): self.windowname = os.path.basename(rom_file) cv2.startWindowThread() cv2.namedWindow(self.windowname) self.ale.loadROM(rom_file) self.width, self.height = self.ale.getScreenDims() self.actions = self.ale.getMinimalActionSet() self.live_lost_as_eoe = live_lost_as_eoe self.frame_skip = frame_skip self.nullop_start = nullop_start self.height_range = height_range self.image_shape = image_shape self.current_episode_score = StatCounter() self.restart_episode() def _grab_raw_image(self): """ :returns: the current 3-channel image """ m = self.ale.getScreenRGB() return m.reshape((self.height, self.width, 3)) def current_state(self): """ :returns: a gray-scale (h, w, 1) image """ ret = self._grab_raw_image() # max-pooled over the last screen ret = np.maximum(ret, self.last_raw_screen) if self.viz: if isinstance(self.viz, float): #m = cv2.resize(ret, (1920,1200)) cv2.imshow(self.windowname, ret) time.sleep(self.viz) ret = ret[self.height_range[0]:self.height_range[1], :] # 0.299,0.587.0.114. same as rgb2y in torch/image ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY) ret = cv2.resize(ret, self.image_shape) ret = np.expand_dims(ret, axis=2) return ret def get_action_space(self): return DiscreteActionSpace(len(self.actions)) def restart_episode(self): if self.current_episode_score.count > 0: self.stats['score'].append(self.current_episode_score.sum) self.current_episode_score.reset() self.ale.reset_game() # random null-ops start n = self.rng.randint(self.nullop_start) self.last_raw_screen = self._grab_raw_image() for k in range(n): if k == n - 1: self.last_raw_screen = self._grab_raw_image() self.ale.act(0) def action(self, act): """ :param act: an index of the action :returns: (reward, isOver) """ oldlives = self.ale.lives() r = 0 for k in range(self.frame_skip): if k == self.frame_skip - 1: self.last_raw_screen = self._grab_raw_image() r += self.ale.act(self.actions[act]) newlives = self.ale.lives() if self.ale.game_over() or \ (self.live_lost_as_eoe and newlives < oldlives): break self.current_episode_score.feed(r) isOver = self.ale.game_over() if isOver: self.restart_episode() if self.live_lost_as_eoe: isOver = isOver or newlives < oldlives return (r, isOver) def get_stat(self): try: return { 'avg_score': np.mean(self.stats['score']), 'max_score': float(np.max(self.stats['score'])) } except ValueError: return {}
class GameState(object): def __init__(self, rand_seed, display=False): self.ale = ALEInterface() self.ale.setInt('random_seed', rand_seed) if display: self._setup_display() self.ale.loadROM(ROM) # height=210, width=160 self.screen = np.empty((210, 160, 1), dtype=np.uint8) no_action = 0 self.reward = self.ale.act(no_action) self.terminal = self.ale.game_over() # screenのshapeは、(210, 160, 1) self.ale.getScreenGrayscale(self.screen) # (210, 160)にreshape reshaped_screen = np.reshape(self.screen, (210, 160)) # height=110, width=84にリサイズ resized_screen = cv2.resize(reshaped_screen, (84, 110)) x_t = resized_screen[18:102,:] x_t = x_t.astype(np.float32) x_t *= (1.0/255.0) self.s_t = np.stack((x_t, x_t, x_t, x_t), axis = 2) # 実際に利用するactionのみを集めておく self.real_actions = self.ale.getMinimalActionSet() def _setup_display(self): if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) def process(self, action): # 18種類のうちの実際に利用するactionに変換 real_action = self.real_actions[action] self.reward = self.ale.act(real_action) #self.reward = self.ale.act(action) self.terminal = self.ale.game_over() # screenのshapeは、(210, 160, 1) self.ale.getScreenGrayscale(self.screen) # (210, 160)にreshape reshaped_screen = np.reshape(self.screen, (210, 160)) # height=210, width=160 # height=110, width=84にリサイズ resized_screen = cv2.resize(reshaped_screen, (84, 110)) x_t1 = resized_screen[18:102,:] x_t1 = np.reshape(x_t1, (84, 84, 1)) x_t1 = x_t1.astype(np.float32) x_t1 *= (1.0/255.0) self.s_t1 = np.append(x_t1, self.s_t[:,:,0:3], axis = 2) if self.terminal: self.ale.reset_game() def update(self): self.s_t = self.s_t1
class ALEEnvironment(BaseEnvironment): """ A wrapper of Arcade Learning Environment, which inherits all members of ``BaseEnvironment``. """ # 63 games ADVENTURE = "adventure" AIR_RAID = "air_raid" ALIEN = "alien" AMIDAR = "amidar" ASSAULT = "assault" ASTERIX = "asterix" ASTEROIDS = "asteroids" ATLANTIS = "aslantis" BANK_HEIST = "bank_heist" BATTLE_ZONE = "battle_zone" BEAM_RIDER = "beam_rider" BERZERK = "berzerk" BOWLING = "bowling" BOXING = "boxing" BREAKOUT = "breakout" CARNIVAL = "carnival" CENTIPEDE = "centipede" CHOPPER_COMMAND = "chopper_command" CRAZY_CLIMBER = "crazy_climber" DEFENDER = "defender" DEMON_ATTACK = "demon_attack" DOUBLE_DUNK = "double_dunk" ELEVATOR_ACTION = "elevator_action" ENDURO = "enduro" FISHING_DERBY = "fishing_derby" FREEWAY = "freeway" FROSTBITE = "frostbite" GOPHER = "gopher" GRAVITAR = "gravitar" HERO = "hero" ICE_HOCKEY = "ice_hockey" JAMESBOND = "jamesbond" JOURNEY_ESCAPE = "journey_escape" KABOOM = "kaboom" KANGAROO = "kangaroo" KRULL = "krull" KUNGFU_MASTER = "kung_fu_master" MONTEZUMA = "montezuma_revenge" MS_PACMAN = "ms_pacman" UNKNOWN = "name_this_game" PHOENIX = "phoenix" PITFALL = "pitfall" PONG = "pong" POOYAN = "pooyan" PRIVATE_EYE = "private_eye" QBERT = "qbert" RIVERRAID = "riverraid" ROAD_RUNNER = "road_runner" ROBOTANK = "robotank" SEAQUEST = "seaquest" SKIING = "skiing" SOLARIS = "solaris" SPACE_INVADERS = "space_invaders" STAR_GUNNER = "star_gunner" TENNIS = "tennis" TIME_PILOT = "time_pilot" TUTANKHAM = "tutankham" UP_N_DOWN = "up_n_down" VENTURE = "venture" VIDEO_PINBALL = "video_pinball" WIZARD_OF_WOR = "wizard_of_wor" YARS_REVENGE = "yars_revenge" ZAXXON = "zaxxon" def __init__(self, rom_name, frame_skip=4, repeat_action_probability=0., max_episode_steps=10000, loss_of_life_termination=False, loss_of_life_negative_reward=False, bitwise_max_on_two_consecutive_frames=False, is_render=False, seed=None, startup_policy=None, disable_actions=None, num_of_sub_actions=-1, state_processor=AtariProcessor(resize_shape=(84, 84), convert_to_grayscale=True)): os.environ['SDL_VIDEO_CENTERED'] = '1' file_exist = isfile(ALEEnvironment.get_rom_path(rom_name)) if not file_exist: raise ValueError("Rom not found ! Please put rom " + rom_name + ".bin into: " + ALEEnvironment.get_rom_path()) self.__rom_name = rom_name self.__ale = ALEInterface() if frame_skip < 0: print("Invalid frame_skip param ! Set default frame_skip = 4") self.__frame_skip = 4 else: self.__frame_skip = frame_skip if repeat_action_probability < 0 or repeat_action_probability > 1: raise ValueError("Invalid repeat_action_probability") else: self.__repeat_action_probability = repeat_action_probability self.__max_episode_steps = max_episode_steps self.__loss_of_life_termination = loss_of_life_termination self.__loss_of_life_negative_reward = loss_of_life_negative_reward self.__max_2_frames = bitwise_max_on_two_consecutive_frames # Max 2 frames only work with grayscale self.__grayscale = False if state_processor is not None and type( state_processor ) is AtariProcessor and state_processor.get_grayscale(): self.__grayscale = True if self.__max_2_frames and self.__frame_skip > 1 and self.__grayscale: self.__max_2_frames = True else: self.__max_2_frames = False self.__is_render = is_render self.__processor = state_processor if seed is None or seed <= 0 or seed >= 9999: if seed is not None and (seed < 0 or seed >= 9999): print("Invalid seed ! Default seed = randint(0, 9999") self.__seed = np.random.randint(0, 9999) self.__random_seed = True else: self.__random_seed = False self.__seed = seed self.__current_steps = 0 self.__is_life_lost = False self.__is_terminal = False self.__current_lives = 0 self.__action_reduction = num_of_sub_actions self.__scr_width, self.__scr_height, self.__action_set = self.__init_ale( ) self.__prev_buffer = np.empty((self.__scr_height, self.__scr_width, 3), dtype=np.uint8) self.__current_buffer = np.empty( (self.__scr_height, self.__scr_width, 3), dtype=np.uint8) self.__current_state = None self.__prev_state = None self.__startup_policy = startup_policy if disable_actions is None: self.__dis_act = [] else: self.__dis_act = disable_actions if self.__processor.get_number_of_objectives() > 1: self.__multi_objs = True else: self.__multi_objs = False def get_processor(self): return self.__processor def __init_ale(self): self.__ale.setBool(b'display_screen', self.__is_render) if self.__max_2_frames and self.__frame_skip > 1: self.__ale.setInt(b'frame_skip', 1) else: self.__ale.setInt(b'frame_skip', self.__frame_skip) self.__ale.setInt(b'random_seed', self.__seed) self.__ale.setFloat(b'repeat_action_probability', self.__repeat_action_probability) self.__ale.setBool(b'color_averaging', False) self.__ale.loadROM( ALEEnvironment.get_rom_path(self.__rom_name).encode()) width, height = self.__ale.getScreenDims() return width, height, self.__ale.getMinimalActionSet() def clone(self): if self.__random_seed: seed = np.random.randint(0, 9999) else: seed = self.__seed return ALEEnvironment(self.__rom_name, self.__frame_skip, self.__repeat_action_probability, self.__max_episode_steps, self.__loss_of_life_termination, self.__loss_of_life_negative_reward, self.__max_2_frames, self.__is_render, seed, self.__startup_policy, self.__dis_act, self.__action_reduction, self.__processor.clone()) def step_all(self, a): if isinstance(a, (list, np.ndarray)): if len(a) <= 0: raise ValueError('Empty action list !') a = a[0] self.__current_steps += 1 act = self.__action_set[a] rew = self._step(act) next_state = self.get_state() _is_terminal = self.is_terminal() return next_state, rew, _is_terminal, self.__current_steps def reset(self): self.__ale.reset_game() self.__current_lives = self.__ale.lives() self.__is_life_lost = False self.__is_terminal = False self.__current_state = None self.__prev_state = None action_space = self.get_action_space() v_range, is_range = action_space.get_range() if len(v_range) > 1: self.step(1) # No op steps if self.__startup_policy is not None: max_steps = int(self.__startup_policy.get_max_steps()) for _ in range(max_steps): act = self.__startup_policy.step(self.get_state(), action_space) self.step(act) # Start training from this point self.__current_steps = 0 # Reset processor self.__processor.reset() return self.get_state() def _pre_step(self, act): if self.__max_2_frames and self.__frame_skip > 1: rew = 0 for i in range(self.__frame_skip - 2): rew += self.__ale.act(act) self.__prev_buffer = self.__ale.getScreenRGB( self.__prev_buffer) self.__prev_buffer = self.__ale.getScreenRGB(self.__prev_buffer) rew += self.__ale.act(act) self.__current_buffer = self.__ale.getScreenRGB( self.__current_buffer) self.__is_terminal = self.__ale.game_over() self.__prev_state = self.__processor.process(self.__prev_buffer) self.__current_state = self.__processor.process( self.__current_buffer) self.__current_state = np.maximum.reduce( [self.__prev_state, self.__current_state]) else: rew = self.__ale.act(act) self.__current_buffer = self.__ale.getScreenRGB( self.__current_buffer) self.__is_terminal = self.__ale.game_over() if self.__processor is not None: self.__current_state = self.__processor.process( self.__current_buffer) if self.__multi_objs and self.__processor is not None: all_rewards = self.__processor.get_rewards(rew) return all_rewards else: return rew def _step(self, act): for i in range(len(self.__dis_act)): if act == self.__dis_act[i]: act = 0 if not self.__loss_of_life_termination and not self.__loss_of_life_negative_reward: if not self.__is_terminal: next_lives = self.__ale.lives() if next_lives < self.__current_lives: act = 1 self.__current_lives = next_lives return self._pre_step(act) else: rew = self._pre_step(act) next_lives = self.__ale.lives() if next_lives < self.__current_lives: if self.__loss_of_life_negative_reward: rew -= 1 self.__current_lives = next_lives self.__is_life_lost = True return rew def get_state(self): if not self.__max_2_frames: if self.__processor is not None: return self.__current_state else: return self.__current_buffer else: return self.__current_state def is_terminal(self): if self.__loss_of_life_termination and self.__is_life_lost: return True elif self.__max_episode_steps is not None and self.__current_steps > self.__max_episode_steps: return True else: return self.__is_terminal @staticmethod def get_rom_path(rom=None): if rom is None: return os.path.dirname(os.path.abspath(__file__)) + "/roms/" else: return os.path.dirname( os.path.abspath(__file__)) + "/roms/" + rom + ".bin" @staticmethod def list_all_roms(): return [ f for f in listdir(ALEEnvironment.get_rom_path()) if isfile(join(ALEEnvironment.get_rom_path(), f)) ] def get_state_space(self): if self.__processor is None: shape = self.__current_buffer.shape else: shape = self.__processor.process(self.__current_buffer).shape min_value = np.zeros(shape, dtype=np.uint8) max_value = np.full(shape, 255) return Space(min_value, max_value, True) def get_action_space(self): if self.__action_reduction >= 1: return Space(0, self.__action_reduction - 1, True) else: return Space(0, len(self.__action_set) - 1, True) def step(self, act): if isinstance(act, (list, np.ndarray)): if len(act) <= 0: raise ValueError('Empty action list !') act = act[0] self.__current_steps += 1 act = self.__action_set[act] rew = self._step(act) return rew def get_current_steps(self): return self.__current_steps def is_atari(self): return True def is_render(self): return self.__is_render def get_number_of_objectives(self): if self.__processor is None: return 1 else: return self.__processor.get_number_of_objectives() def get_number_of_agents(self): if self.__processor is None: return 1 else: return self.__processor.get_number_of_agents() def get_state_processor(self): return self.__processor
15, #10111 fire up/down/left (invalid) 11, #11000 fire right 14, #11001 fire up/right 16, #11010 fire down/right 14, #11011 fire up/down/right (invalid) 11, #11100 fire left/right (invalid) 14, #11101 fire left/right/up (invalid) 16, #11110 fire left/right/down (invalid) 14 #11111 fire up/down/left/right (invalid) ) ale = ALEInterface() rom = b'../roms/breakout.bin' ale.loadROM(rom) legal_actions = ale.getMinimalActionSet() print(legal_actions) (screen_width,screen_height) = ale.getScreenDims() print("width/height: " +str(screen_width) + "/" + str(screen_height)) (display_width,display_height) = (1024,420) #init pygame pygame.init() screen = pygame.display.set_mode((display_width,display_height)) pygame.display.set_caption("Arcade Learning Environment Player Agent Display") game_surface = pygame.Surface((screen_width,screen_height))
class AtariEmulator(BaseEnvironment): def __init__(self, actor_id, args): self.ale = ALEInterface() self.ale.setInt(b"random_seed", args.random_seed * (actor_id + 1)) # For fuller control on explicit action repeat (>= ALE 0.5.0) self.ale.setFloat(b"repeat_action_probability", 0.0) # Disable frame_skip and color_averaging # See: http://is.gd/tYzVpj self.ale.setInt(b"frame_skip", 1) self.ale.setBool(b"color_averaging", False) full_rom_path = args.rom_path + "/" + args.game + ".bin" self.ale.loadROM(str.encode(full_rom_path)) self.legal_actions = self.ale.getMinimalActionSet() self.screen_width, self.screen_height = self.ale.getScreenDims() self.lives = self.ale.lives() self.random_start = args.random_start self.single_life_episodes = args.single_life_episodes self.call_on_new_frame = args.visualize # Processed historcal frames that will be fed in to the network # (i.e., four 84x84 images) self.observation_pool = ObservationPool(np.zeros((IMG_SIZE_X, IMG_SIZE_Y, NR_IMAGES), dtype=np.uint8)) self.rgb_screen = np.zeros((self.screen_height, self.screen_width, 3), dtype=np.uint8) self.gray_screen = np.zeros((self.screen_height, self.screen_width, 1), dtype=np.uint8) self.frame_pool = FramePool(np.empty((2, self.screen_height, self.screen_width), dtype=np.uint8), self.__process_frame_pool) def get_legal_actions(self): return self.legal_actions def __get_screen_image(self): """ Get the current frame luminance :return: the current frame """ self.ale.getScreenGrayscale(self.gray_screen) if self.call_on_new_frame: self.ale.getScreenRGB(self.rgb_screen) self.on_new_frame(self.rgb_screen) return np.squeeze(self.gray_screen) def on_new_frame(self, frame): pass def __new_game(self): """ Restart game """ self.ale.reset_game() self.lives = self.ale.lives() if self.random_start: wait = random.randint(0, MAX_START_WAIT) for _ in range(wait): self.ale.act(self.legal_actions[0]) def __process_frame_pool(self, frame_pool): """ Preprocess frame pool """ img = np.amax(frame_pool, axis=0) img = imresize(img, (84, 84), interp='nearest') img = img.astype(np.uint8) return img def __action_repeat(self, a, times=ACTION_REPEAT): """ Repeat action and grab screen into frame pool """ reward = 0 for i in range(times - FRAMES_IN_POOL): reward += self.ale.act(self.legal_actions[a]) # Only need to add the last FRAMES_IN_POOL frames to the frame pool for i in range(FRAMES_IN_POOL): reward += self.ale.act(self.legal_actions[a]) self.frame_pool.new_frame(self.__get_screen_image()) return reward def get_initial_state(self): """ Get the initial state """ self.__new_game() for step in range(NR_IMAGES): _ = self.__action_repeat(0) self.observation_pool.new_observation(self.frame_pool.get_processed_frame()) if self.__is_terminal(): raise Exception('This should never happen.') return self.observation_pool.get_pooled_observations() def next(self, action): """ Get the next state, reward, and game over signal """ reward = self.__action_repeat(np.argmax(action)) self.observation_pool.new_observation(self.frame_pool.get_processed_frame()) terminal = self.__is_terminal() self.lives = self.ale.lives() observation = self.observation_pool.get_pooled_observations() return observation, reward, terminal def __is_terminal(self): if self.single_life_episodes: return self.__is_over() or (self.lives > self.ale.lives()) else: return self.__is_over() def __is_over(self): return self.ale.game_over() def get_noop(self): return [1.0, 0.0]
def main(): if len(sys.argv) < 2: dir_rom = '/Users/lguan/Documents/Study/Research/Atari-2600-Roms/K-P/ms_pacman.bin' else: dir_rom = sys.argv[1] ale = ALEInterface() # Get & Set the desired settings ale.setInt(b'random_seed', 123) # Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = False if USE_SDL: # mac OS if sys.platform == 'darwin': pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool('sound', True) ale.setBool('display_screen', True) # Load the ROM file rom_file = str.encode(dir_rom) print('- Loading ROM - %s' % dir_rom) ale.loadROM(rom_file) print('- Complete loading ROM') (game_surface_width, game_surface_height) = ale.getScreenDims() print("game surface width/height: " + str(game_surface_width) + "/" + str(game_surface_height)) (display_width, display_height) = (800, 640) print 'display width/height', (display_width, display_height) available_action = ale.getLegalActionSet() print available_action # init pygame pygame.init() display_screen = pygame.display.set_mode((display_width, display_height)) pygame.display.set_caption( "Arcade Learning Environment Player Agent Display") # init clock clock = pygame.time.Clock() is_exit = False # Play 10 episodes for episode in range(10): if is_exit: break total_reward = 0 while not ale.game_over() and not is_exit: a = getActionFromKeyboard() # Apply an action and get the resulting reward reward = ale.act(a) total_reward += reward # clear screen display_screen.fill((0, 0, 0)) # render game surface renderGameSurface(ale, display_screen, (game_surface_width, game_surface_height)) # display related info displayRelatedInfo(display_screen, a, total_reward) pygame.display.flip() # process pygame event queue for event in pygame.event.get(): if event.type == pygame.QUIT: is_exit = True break if event.type == pygame.KEYDOWN and event.key == pygame.K_q: is_exit = True break # delay to 60fps clock.tick(60.) print('Episode %d ended with score: %d' % (episode, total_reward)) ale.reset_game()
# Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = False if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool('sound', True) ale.setBool('display_screen', True) # Load the ROM file rom_file = str.encode(sys.argv[1]) ale.loadROM(rom_file) # Get the list of legal actions legal_actions = ale.getLegalActionSet() # Play 10 episodes for episode in range(10): total_reward = 0 while not ale.game_over(): a = legal_actions[randrange(len(legal_actions))] # Apply an action and get the resulting reward reward = ale.act(a) total_reward += reward print('Episode %d ended with score: %d' % (episode, total_reward)) ale.reset_game()