class Environment: def __init__(self, rom_file, args): self.ale = ALEInterface() if args.display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', args.frame_skip) self.ale.setFloat('repeat_action_probability', args.repeat_action_probability) self.ale.setBool('color_averaging', args.color_averaging) if args.random_seed: self.ale.setInt('random_seed', args.random_seed) if args.record_screen_path: if not os.path.exists(args.record_screen_path): logger.info("Creating folder %s" % args.record_screen_path) os.makedirs(args.record_screen_path) logger.info("Recording screens to %s", args.record_screen_path) self.ale.setString('record_screen_dir', args.record_screen_path) if args.record_sound_filename: logger.info("Recording sound to %s", args.record_sound_filename) self.ale.setBool('sound', True) self.ale.setString('record_sound_filename', args.record_sound_filename) self.ale.loadROM(rom_file) if args.minimal_action_set: self.actions = self.ale.getMinimalActionSet() logger.info("Using minimal action set with size %d" % len(self.actions)) else: self.actions = self.ale.getLegalActionSet() logger.info("Using full action set with size %d" % len(self.actions)) logger.debug("Actions: " + str(self.actions)) self.dims = (args.screen_height, args.screen_width) def numActions(self): return len(self.actions) def restart(self): self.ale.reset_game() def act(self, action): reward = self.ale.act(self.actions[action]) return reward def getScreen(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, self.dims) return resized def isTerminal(self): return self.ale.game_over()
class Emulate: def __init__(self, rom_file, display_screen=False,frame_skip=4,screen_height=84,screen_width=84,repeat_action_probability=0,color_averaging=True,random_seed=0,record_screen_path='screen_pics',record_sound_filename=None,minimal_action_set=True): self.ale = ALEInterface() if display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', frame_skip) self.ale.setFloat('repeat_action_probability', repeat_action_probability) self.ale.setBool('color_averaging', color_averaging) if random_seed: self.ale.setInt('random_seed', random_seed) self.ale.loadROM(rom_file) if minimal_action_set: self.actions = self.ale.getMinimalActionSet() else: self.actions = self.ale.getLegalActionSet() self.dims = (screen_width,screen_height) def numActions(self): return len(self.actions) def getActions(self): return self.actions def restart(self): self.ale.reset_game() def act(self, action): reward = self.ale.act(self.actions[action]) return reward def getScreen(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, self.dims) return resized def getScreenGray(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, self.dims) rotated = np.rot90(resized,k=1) return rotated def getScreenColor(self): screen = self.ale.getScreenRGB() resized = cv2.resize(screen, self.dims) rotated = np.rot90(resized,k=1) return rotated def isTerminal(self): return self.ale.game_over()
def launch(): logging.basicConfig(level=logging.INFO) myArgs = getParameters() rom = myArgs.game full_rom_path = os.path.join(myArgs.base_rom_path,rom) rng = np.random.RandomState() ale = ALEInterface() ale.setInt('random_seed',38) ale.setBool('display_screen',myArgs.display_screen) ale.setInt('frame_skip',myArgs.frame_skip) ale.setFloat('repeat_action_probability',myArgs.repeat_action_probability) ale.loadROM(full_rom_path) valid_actions = ale.getMinimalActionSet() '''for episode in xrange(10): total_reward = 0 while not ale.game_over(): from random import randrange a = valid_actions[randrange(len(valid_actions))] ale.act(a) #print reward #print ale.getScreenRGB() #total_reward += reward #print 'Episode', episode, 'ended with score:', total_reward ale.reset_game() ''' memory_pool = ReplayMemory(myArgs.memory_size,rng) network_model = buildNetwork(myArgs.resized_height,myArgs.resized_width,myArgs.rmsp_epsilon,myArgs.rmsp_rho,myArgs.learning_rate,len(valid_actions)) ddqn = DDQN(network_model,valid_actions,myArgs.target_nn_update_frequency,myArgs.discount,myArgs.phi_len) agent = Agent(myArgs,ddqn,memory_pool,valid_actions,rng) train_agent = TrainMyAgent(myArgs,ale,agent,valid_actions,rng) train_agent.run()
class env_atari: def __init__(self, params): self.params = params self.ale = ALEInterface() self.ale.setInt('random_seed', np.random.randint(0, 500)) self.ale.setFloat('repeat_action_probability', params['repeat_prob']) self.ale.setInt(b'frame_skip', params['frameskip']) self.ale.setBool('color_averaging', True) self.ale.loadROM('roms/' + params['rom'] + '.bin') self.actions = self.ale.getMinimalActionSet() self.action_space = c_action_space(len(self.actions)) self.screen_width, self.screen_height = self.ale.getScreenDims() def reset(self): self.ale.reset_game() seed = np.random.randint(0, 7) for i in range(seed): self.ale.act(0) return self.get_image() def step(self, action): reward = self.ale.act(self.actions[action]) next_s = self.get_image() terminate = self.ale.game_over() return next_s, reward, float(terminate), 0 def get_image(self): temp = np.zeros(self.screen_height * self.screen_width * 3, dtype=np.uint8) self.ale.getScreenRGB(temp) #self.ale.getScreenGrayscale(temp) return temp.reshape((self.screen_height, self.screen_width, 3))
def __init__(self, game, seed=None, use_sdl=False, n_last_screens=4, frame_skip=4, treat_life_lost_as_terminal=True, crop_or_scale='scale', max_start_nullops=30, record_screen_dir=None): self.n_last_screens = n_last_screens self.treat_life_lost_as_terminal = treat_life_lost_as_terminal self.crop_or_scale = crop_or_scale self.max_start_nullops = max_start_nullops # atari_py is used only to provide rom files. atari_py has its own # ale_python_interface, but it is obsolete. game_path = atari_py.get_game_path(game) ale = ALEInterface() if seed is not None: assert seed >= 0 and seed < 2 ** 16, \ "ALE's random seed must be represented by unsigned int" else: # Use numpy's random state seed = np.random.randint(0, 2**16) ale.setInt(b'random_seed', seed) ale.setFloat(b'repeat_action_probability', 0.0) ale.setBool(b'color_averaging', False) if record_screen_dir is not None: ale.setString(b'record_screen_dir', str.encode(str(record_screen_dir))) self.frame_skip = frame_skip if use_sdl: if 'DISPLAY' not in os.environ: raise RuntimeError( 'Please set DISPLAY environment variable for use_sdl=True') # SDL settings below are from the ALE python example if sys.platform == 'darwin': import pygame pygame.init() ale.setBool(b'sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool(b'sound', True) ale.setBool(b'display_screen', True) ale.loadROM(str.encode(str(game_path))) assert ale.getFrameNumber() == 0 self.ale = ale self.legal_actions = ale.getMinimalActionSet() self.initialize() self.action_space = spaces.Discrete(len(self.legal_actions)) one_screen_observation_space = spaces.Box(low=0, high=255, shape=(84, 84)) self.observation_space = spaces.Tuple([one_screen_observation_space] * n_last_screens)
def launch(args, defaults, description): """ Execute a complete training run. """ rec_screen = "" if "--nn-file" in args: temp_params = vars(load_params(args[args.index("--nn-file") + 1])) for p in temp_params: try: vars(defaults)[p.upper()] = temp_params[p] except: print "warning: parameter", p, "from param file doesn't exist." #rec_screen = args[args.index("--nn-file")+1][:-len("last_model.pkl")]+"/frames" parameters = process_args(args, defaults, description) if parameters.rom.endswith('.bin'): rom = parameters.rom else: rom = "%s.bin" % parameters.rom parameters.rom_path = os.path.join(defaults.BASE_ROM_PATH, rom) rng = np.random.RandomState(123456) folder_name = None if parameters.folder_name == "" else parameters.folder_name ale = ALEInterface() ale.setInt('random_seed', rng.randint(1000)) ale.setBool('display_screen', parameters.display_screen) ale.setString('record_screen_dir', rec_screen) trainer = Q_Learning(model_params=parameters, ale_env=ale, folder_name=folder_name) trainer.train()
def main(): arguments = docopt.docopt(__doc__, version='ALE Demo Version 1.0') pygame.init() ale = ALEInterface() ale.setInt(b'random_seed', 123) ale.setBool(b'display_screen', True) ale.loadROM(str.encode(arguments['<rom_file>'])) legal_actions = ale.getLegalActionSet() width, height = ale.getScreenDims() print(width, height) frame = ale.getScreenRGB() frame = np.array(frame, dtype=float) rewards, num_episodes = [], int(arguments['--iters'] or 5) for episode in range(num_episodes): total_reward = 0 while not ale.game_over(): total_reward += ale.act(random.choice(legal_actions)) print('Episode %d reward %d.' % (episode, total_reward)) rewards.append(total_reward) ale.reset_game() average = sum(rewards) / len(rewards) print('Average for %d episodes: %d' % (num_episodes, average))
class Emulator(object): FRAME_SKIP = 4 SCREEN_WIDTH = 84 SCREEN_HEIGHT = 84 def __init__(self, rom): self.ale = ALEInterface() self.max_num_frames_per_episode = 100000 #self.ale.getInt('max_num_frames_per_episode') self.ale.setInt('frame_skip', self.FRAME_SKIP) self.ale.loadROM('roms/' + rom) self.actions = self.ale.getMinimalActionSet() def reset(self): self.ale.reset_game() def image(self): screen = self.ale.getScreenGrayscale() screen = cv2.resize(screen, (self.SCREEN_HEIGHT, self.SCREEN_WIDTH)) return np.reshape(screen, (self.SCREEN_HEIGHT, self.SCREEN_WIDTH)) def act(self, action): return self.ale.act(action) def terminal(self): return self.ale.game_over()
class Emulator(object): def __init__(self, settings): self.ale = ALEInterface() self.ale.setInt('frame_skip', settings['frame_skip']) self.ale.setInt('random_seed', np.random.RandomState().randint(1000)) self.ale.setBool('color_averaging', False) self.ale.loadROM('roms/' + settings['rom_name']) self.actions = self.ale.getMinimalActionSet() self.width = settings['screen_width'] self.height = settings['screen_height'] def reset(self): self.ale.reset_game() def image(self): screen = self.ale.getScreenGrayscale() screen = cv2.resize(screen, (self.height, self.width), interpolation=cv2.INTER_LINEAR) return np.reshape(screen, (self.height, self.width)) def full_image(self): screen = self.ale.getScreenRGB() return screen def act(self, action): return self.ale.act(self.actions[action]) def terminal(self): return self.ale.game_over()
class Atari: def __init__(self, rom_dir): self.ale = ALEInterface() # Set settings self.ale.setInt("random_seed", 123) self.frame_skip = 4 self.ale.setInt("frame_skip", self.frame_skip) self.ale.setBool("display_screen", False) self.ale.setBool("sound", True) self.record_sound_for_user = True self.ale.setBool("record_sound_for_user", self.record_sound_for_user) # NOTE recording audio to file still works. But if both file recording and # record_sound_for_user are enabled, then only the latter is done # self.ale.setString("record_sound_filename", "") # Get settings self.ale.loadROM(rom_dir) self.screen_width, self.screen_height = self.ale.getScreenDims() self.legal_actions = self.ale.getLegalActionSet() # Action count across all episodes self.action_count = 0 self.start_time = time.time() self.reset() def reset(self): self.ale.reset_game() def take_action(self): action = self.legal_actions[np.random.randint(self.legal_actions.size)] self.ale.act(action) self.action_count += 1 def print_fps(self, delta_t=500): if self.action_count % delta_t == 0: print '[atari.py] Frames/second: %f' % ( self.action_count / (time.time() - self.start_time)) print '[atari.py] Overall game frame count:', atari.action_count * atari.frame_skip print '---------' def get_image_and_audio(self): np_data_image = np.zeros(self.screen_width * self.screen_height * 3, dtype=np.uint8) if self.record_sound_for_user: np_data_audio = np.zeros(self.ale.getAudioSize(), dtype=np.uint8) self.ale.getScreenRGBAndAudio(np_data_image, np_data_audio) # Also supports independent audio queries if user desires: # self.ale.getAudio(np_data_audio) else: np_data_audio = 0 self.ale.getScreenRGB(np_data_image) return np.reshape(np_data_image, (self.screen_height, self.screen_width, 3)), np.asarray(np_data_audio)
class ALE(object): def __init__(self, init_seed, init_rand): self.ale = ALEInterface() self.ale.setInt(b'random_seed', init_seed) self.ale.setFloat(b'repeat_action_probability', 0.0) self.ale.loadROM('./breakout.bin') self.action_size = 4 self.screen = None self.reward = 0 self.terminal = True self.init_rand = init_rand def setSetting(self, action_repeat, screen_type): self.action_repeat = action_repeat self.screen_type = screen_type def _step(self, action): self.reward = self.ale.act(action) self.terminal = self.ale.game_over() if self.screen_type == 0: self.screen = self.ale.getScreenRGB() elif self.screen_type == 1: self.screen = self.ale.getScreenGrayscale() else: sys.stderr.write('screen_type error!') exit() def state(self): return self.reward, self.screen, self.terminal def act(self, action): cumulated_reward = 0 for _ in range(self.action_repeat): self._step(action) cumulated_reward += self.reward if self.terminal: break self.reward = cumulated_reward return self.state() def new_game(self): if self.ale.game_over(): self.ale.reset_game() if self.screen_type == 0: self.screen = self.ale.getScreenRGB() elif self.screen_type == 1: self.screen = self.ale.getScreenGrayscale() else: sys.stderr.write('screen_type error!') exit() for _ in range(self.init_rand): self._step(0) return self.screen
class emulator: def __init__(self, rom_name, vis): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt("max_mum_frames_per_episode") self.ale.setInt("random_seed", 123) self.ale.setInt("frame_skip", 4) self.ale.loadROM('roms/' + rom_name) self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i print self.legal_actions self.screen_width, self.screen_height = self.ale.getScreenDims() print("width/height: "+ str(self.screen_width) + "/" + str(self.screen_height)) self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow("preview") def get_image(self): # numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8) # self.ale.getScreenRGB(numpy_surface) # image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) image = self.ale.getScreenRGB() image = np.reshape(image, (self.screen_height, self.screen_width, 3)) return image def newGame(self): self.ale.reset_game() return self.get_image(), 0, False def next(self, action_indx): reward = self.ale.act(action_indx) nextstate = self.get_image() if self.vis: cv2.imshow('preview', nextstate) return nextstate, reward, self.ale.game_over() def train(self): for episode in range(10): total_reward = 0 frame_number = 0 while not self.ale.game_over(): a = self.legal_actions[random.randrange(len(self.legal_actions))] # Apply an action and get the resulting reward reward = self.ale.act(a); total_reward += reward screen = self.ale.getScreenRGB() screen = np.array(screen).reshape([self.screen_height, self.screen_width, -1]) frame_number = self.ale.getEpisodeFrameNumber() cv2.imshow("screen", screen/255.0) cv2.waitKey(0) self.ale.saveScreenPNG("test_"+str(frame_number)+".png") print('Episode %d ended with score: %d' % (episode, total_reward)) print('Frame number is : ', frame_number) self.ale.reset_game()
class ALEGame(object): """ Class linked to the Arcade Learning Environment """ def __init__(self, rand_seed, game_name): self.ale = ALEInterface() self.ale.setInt(b'random_seed', rand_seed) self.ale.setFloat(b'repeat_action_probability', 0.0) self.ale.setBool(b'color_averaging', True) self.ale.setInt(b'frame_skip', SKIPED_FRAMES) self.ale.loadROM(game_name.encode('ascii')) self.real_actions = self.ale.getMinimalActionSet() self.screen = np.empty((IMAGE_HEIGHT, IMAGE_WIDTH, 1), dtype=np.uint8) self.reset() def preprocess_image(self, is_to_reshape=False): """ Get image from the game and reshape it """ self.ale.getScreenGrayscale(self.screen) reshaped_screen = np.reshape(self.screen, (IMAGE_HEIGHT, IMAGE_WIDTH)) x_t = skimage.transform.resize(reshaped_screen, (110, 84), preserve_range=True) x_t = x_t[18:102, :] if is_to_reshape: x_t = np.reshape(x_t, (84, 84, 1)) x_t = x_t.astype(np.float32) x_t *= (1.0 / 255.0) return x_t def reset(self): """ Resets the game and create the first state """ self.ale.reset_game() self.act(0) x_t = self.preprocess_image() self.s_t = np.stack((x_t, x_t, x_t, x_t), axis=2) def act(self, action): self.reward = self.ale.act(action) self.is_game_over = self.ale.game_over() def process_to_next_image(self, action): """ Acts and get new state """ real_action = self.real_actions[action] self.act(real_action) x_t1 = self.preprocess_image(True) self.s_t1 = np.append(self.s_t[:, :, 1:], x_t1, axis=2) def update(self): self.s_t = self.s_t1
class AtariEnvironment(Environment): """ Atari Environment Object """ def __init__(self, rom_path, action_repeat=4, death_end=True, width_resize=84, height_resize=84, resize_mod='scale'): super(Environment, self).__init__() self.action_repeat = action_repeat self.death_end = death_end self.width_resize = width_resize self.height_resize = height_resize self.resize_mod = resize_mod self.display = False from ale_python_interface import ALEInterface self.ale = ALEInterface() self.ale.loadROM(rom_path) self.ale.setInt('random_seed', np.random.randint(1000)) self.ale.setBool('display_screen', self.display) self.action_set = self.ale.getMinimalActionSet() self.num_actions = len(self.action_set) self.start_lives = self.ale.lives() width, height = self.ale.getScreenDims() self.currentScreen = np.empty((height, width), dtype=np.uint8) self.reset() def reset(self): self.ale.reset_game() self.ale.getScreenGrayscale(self.currentScreen) self.terminal = False def step(self, action, repeat=None): repeat = self.action_repeat if repeat is None else repeat reward = 0 for _ in range(repeat): reward += self.ale.act(self.action_set[action]) self.ale.getScreenGrayscale(self.currentScreen) self.terminal = self.death_end and self.ale.lives( ) < self.start_lives or self.ale.game_over() return reward def get_frame(self): if self.resize_mod == 'scale': return imresize(self.currentScreen, (self.width_resize, self.height_resize), interp='bilinear') elif self.resize_mod == 'crop': height, width = self.currentScreen.shape res = (height - width) / 2 crop = self.currentScreen[res:(res + width), :] return imresize(crop, (self.width_resize, self.height_resize), interp='bilinear')
class Emulator: def __init__(self): self.ale = ALEInterface() # turn off the sound self.ale.setBool('sound', False) self.ale.setBool('display_screen', EMULATOR_DISPLAY) self.ale.setInt('frame_skip', FRAME_SKIP) self.ale.setFloat('repeat_action_probability', REPEAT_ACTION_PROBABILITY) self.ale.setBool('color_averaging', COLOR_AVERAGING) self.ale.setInt('random_seed', RANDOM_SEED) if RECORD_SCENE_PATH: self.ale.setString('record_screen_dir', RECORD_SCENE_PATH) self.ale.loadROM(ROM_PATH) self.actions = self.ale.getMinimalActionSet() logger.info("Actions: " + str(self.actions)) self.dims = DIMS #self.start_lives = self.ale.lives() def getActions(self): return self.actions def numActions(self): return len(self.actions) def restart(self): self.ale.reset_game() # can be omitted def act(self, action): reward = self.ale.act(self.actions[action]) return reward def getScreen(self): # why grayscale ? screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, self.dims) # normalize #resized /= COLOR_SCALE return resized def isTerminal(self): # while training deepmind only ends when agent dies #terminate = DEATH_END and TRAIN and (self.ale.lives() < self.start_lives) return self.ale.game_over()
def init(): pygame.init() rom_path = '/Users/maciej/Development/atari-roms' ale = ALEInterface() ale.setInt('random_seed', 123) ale.setBool('frame_skip', 1) ale.loadROM(rom_path + '/space_invaders.bin') ale.setFloat("repeat_action_probability", 0) return ale
def _init_ale(rand_seed, rom_file): assert os.path.exists(rom_file), '%s does not exists.' ale = ALEInterface() ale.setInt('random_seed', rand_seed) ale.setBool('showinfo', False) ale.setInt('frame_skip', 1) ale.setFloat('repeat_action_probability', 0.0) ale.setBool('color_averaging', False) ale.loadROM(rom_file) return ale
class Environment: def __init__(self, render=False): self.ale = ALEInterface() self.ale.setInt(b'random_seed', 0) self.ale.setFloat(b'repeat_action_probability', 0.0) self.ale.setBool(b'color_averaging', True) self.ale.setInt(b'frame_skip', 4) self.ale.setBool(b'display_screen', render) self.ale.loadROM(ENV.encode('ascii')) self._screen = np.empty((210, 160, 1), dtype=np.uint8) self._no_op_max = 7 def set_render(self, render): if not render: self.ale.setBool(b'display_screen', render) def reset(self): self.ale.reset_game() # randomize initial state if self._no_op_max > 0: no_op = np.random.randint(0, self._no_op_max + 1) for _ in range(no_op): self.ale.act(0) self.ale.getScreenGrayscale(self._screen) screen = np.reshape(self._screen, (210, 160)) screen = cv2.resize(screen, (84, 110)) screen = screen[18:102, :] screen = screen.astype(np.float32) screen /= 255.0 self.frame_buffer = np.stack((screen, screen, screen, screen), axis=2) return self.frame_buffer def act(self, action): reward = self.ale.act(4 + action) done = self.ale.game_over() self.ale.getScreenGrayscale(self._screen) screen = np.reshape(self._screen, (210, 160)) screen = cv2.resize(screen, (84, 110)) screen = np.reshape(screen[18:102, :], (84, 84, 1)) screen = screen.astype(np.float32) screen *= (1 / 255.0) self.frame_buffer = np.append(self.frame_buffer[:, :, 1:], screen, axis=2) return self.frame_buffer, reward, done, "" def close(self): self.ale.setBool(b'display_screen', False)
class Environment: def __init__(self, show_screen, history_length): self.ale = ALEInterface() self.ale.setInt('frame_skip', 4) self.history = None self.history_length = history_length if show_screen: self.display_screen() self.load_game() (screen_width, screen_height) = self.ale.getScreenDims() self.screen_data = np.empty((screen_height, screen_width, 1), dtype=np.uint8) # 210x160 screen data self.dims = (84, 84) # input size for neural network self.actions = [3, 0, 1, 4] # noop, left, right, fire, def display_screen(self): self.ale.setBool("display_screen", True) def turn_on_sound(self): self.ale.setBool("sound", True) def restart(self): """reset game""" self.ale.reset_game() def act(self, action): """:returns reward of an action""" return self.ale.act(self.actions[action]) def __get_screen(self): """:returns Grayscale thresholded resized screen image """ self.ale.getScreenGrayscale(self.screen_data) resized = cv2.resize(self.screen_data, self.dims) return resized def get_state(self): binary_screen = self.__get_screen() if self.history is None: self.history = deque(maxlen=self.history_length) for _ in range(self.history_length - 1): self.history.append(binary_screen) self.history.append(binary_screen) result = np.stack(self.history, axis=0) return result def isTerminal(self): """checks if game is over""" return self.ale.game_over() def load_game(self): """load game from file""" self.ale.loadROM("Breakout.bin")
class emulator: def __init__(self, rom_name, vis, windowname='preview'): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt( "max_num_frames_per_episode") self.ale.setInt("random_seed", 123) self.ale.setInt("frame_skip", 4) self.ale.loadROM('roms/' + rom_name) self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() self.windowname = windowname for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i self.init_frame_number = 0 # print(self.legal_actions) self.screen_width, self.screen_height = self.ale.getScreenDims() print("width/height: " + str(self.screen_width) + "/" + str(self.screen_height)) self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow(self.windowname) def get_image(self): numpy_surface = np.zeros(self.screen_height * self.screen_width * 3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image def newGame(self): # Instead of resetting the game, we load a checkpoint and start from there. # self.ale.reset_game() self.ale.restoreState( self.ale.decodeState(checkpoints[random.randint( 0, 99)].astype('uint8'))) self.init_frame_number = self.ale.getFrameNumber() #self.ale.restoreState(self.ale.decodeState(np.reshape(checkpoint,(1009,1)))) return self.get_image() def next(self, action_indx): reward = self.ale.act(action_indx) nextstate = self.get_image() # scipy.misc.imsave('test.png',nextstate) if self.vis: cv2.imshow(self.windowname, nextstate) return nextstate, reward, self.ale.game_over() def get_frame_number(self): return self.ale.getFrameNumber() - self.init_frame_number
def init(game, display_screen=False, record_dir=None): if display_screen: import pygame pygame.init() ale = ALEInterface() ale.setBool('display_screen', display_screen) ale.setInt('random_seed', 123) if record_dir is not None: ale.setString("record_screen_dir", record_dir) ale.loadROM('{game}.bin'.format(game=game)) ale.setFloat("repeat_action_probability", 0) return ale
def init(display_screen=False): if display_screen: import pygame pygame.init() rom_path = '.' ale = ALEInterface() ale.setBool('display_screen', display_screen) ale.setInt('random_seed', 123) ale.setBool('frame_skip', 1) ale.loadROM(rom_path + '/space_invaders.bin') ale.setFloat("repeat_action_probability", 0) return ale
class emulator(object): def __init__(self, rom_name, vis, frameskip=1, windowname='preview'): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt( "max_num_frames_per_episode") self.ale.setInt("random_seed", 123) self.ale.setInt("frame_skip", frameskip) romfile = str(ROM_PATH) + str(rom_name) if not os.path.exists(romfile): print('No ROM file found at "' + romfile + '".\nAdjust ROM_PATH or double-check the filt exists.') self.ale.loadROM(romfile) self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() self.windowname = windowname for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i # print(self.legal_actions) self.screen_width, self.screen_height = self.ale.getScreenDims() print("width/height: " + str(self.screen_width) + "/" + str(self.screen_height)) self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow( self.windowname, flags=cv2.WINDOW_AUTOSIZE) # permit manual resizing def get_image(self): numpy_surface = np.zeros(self.screen_height * self.screen_width * 3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image def newGame(self): self.ale.reset_game() return self.get_image() def next(self, action_indx): reward = self.ale.act(action_indx) nextstate = self.get_image() # scipy.misc.imsave('test.png',nextstate) if self.vis: cv2.imshow(self.windowname, nextstate) if sys.platform == 'darwin': # if we don't do this, can hang on OS X cv2.waitKey(2) return nextstate, reward, self.ale.game_over()
class ALEInterfaceWrapper: def __init__(self, repeat_action_probability, rng): self.internal_action_repeat_prob = repeat_action_probability self.prev_action = 0 self.rng_source = rng self.rng = deepcopy(self.rng_source) self.ale = ALEInterface() ''' This sets the probability from the default 0.25 to 0. It ensures deterministic actions. ''' self.ale.setFloat('repeat_action_probability', 0.0) def getScreenRGB(self): return self.ale.getScreenRGB() def game_over(self): return self.ale.game_over() def reset_game(self): self.ale.reset_game() def lives(self): return self.ale.lives() def getMinimalActionSet(self): return self.ale.getMinimalActionSet() def setInt(self, key, value): self.ale.setInt(key, value) def setFloat(self, key, value): self.ale.setFloat(key, value) def loadROM(self, rom): self.ale.loadROM(rom) def reset_action_seed(self): self.rng = deepcopy(self.rng_source) def set_action_seed(self, seed): self.rng = np.random.RandomState(seed) def act(self, action): actual_action = action if self.internal_action_repeat_prob > 0: if self.rng.uniform(0, 1) < self.internal_action_repeat_prob: actual_action = self.prev_action self.prev_action = actual_action return self.ale.act(actual_action)
def init(display_screen=False, record_dir=None): if display_screen: import pygame pygame.init() rom_path = '.' ale = ALEInterface() ale.setBool('display_screen', display_screen) ale.setInt('random_seed', 123) if record_dir is not None: ale.setString("record_screen_dir", record_dir) ale.loadROM(rom_path + '/space_invaders.bin') ale.setFloat("repeat_action_probability", 0) return ale
class AleInterface(object): def __init__(self, game, args): self.game = game self.ale = ALEInterface() # if sys.platform == 'darwin': # self.ale.setBool('sound', False) # Sound doesn't work on OSX # elif sys.platform.startswith('linux'): # self.ale.setBool('sound', True) # self.ale.setBool('display_screen', True) # self.ale.setBool('display_screen', args.display_screen) self.ale.setInt('frame_skip', args.frame_skip) self.ale.setFloat('repeat_action_probability', args.repeat_action_probability) self.ale.setBool('color_averaging', args.color_averaging) self.ale.setInt('random_seed', args.random_seed) # # if rand_seed is not None: # self.ale.setInt('random_seed', rand_seed) rom_file = "./roms/%s.bin" % game if not os.path.exists(rom_file): print "not found rom file:", rom_file sys.exit(-1) self.ale.loadROM(rom_file) self.actions = self.ale.getMinimalActionSet() def get_actions_num(self): return len(self.actions) def act(self, action): reward = self.ale.act(self.actions[action]) return reward def get_screen_gray(self): return self.ale.getScreenGrayscale() def get_screen_rgb(self): return self.ale.getScreenRGB() def game_over(self): return self.ale.game_over() def reset_game(self): return self.ale.reset_game()
class Atari: # Constructor def __init__(self, rom_name): # 1º Passo: carregamos o jogo e definimos seus parâmetros self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt( b"max_num_frames_per_episode") self.ale.setInt(b"random_seed", 123) self.ale.setInt(b"frame_skip", 4) self.ale.loadROM(('game/' + rom_name).encode()) self.screen_width, self.screen_height = self.ale.getScreenDims() self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i # 2º Passo: criamos a janela para exibição self.windowname = rom_name cv2.startWindowThread() cv2.namedWindow(rom_name) # Essa função será utilizada para receber uma imagem do emulador, já em um formato esperado # por nosso algoritmo de treinamento. def get_image(self): numpy_surface = np.zeros(self.screen_height * self.screen_width * 3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image # Simplesmente inicializa o jogo def newGame(self): self.ale.reset_game() return self.get_image() # Essa função será responsável por retornar as informações da observação do estado após certa ação ser tomada. def next(self, action): reward = self.ale.act(self.legal_actions[np.argmax(action)]) nextstate = self.get_image() cv2.imshow(self.windowname, nextstate) if self.ale.game_over(): self.newGame() return nextstate, reward, self.ale.game_over()
class emulator: def __init__(self, rom_name, vis,windowname='preview'): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode"); self.ale.setInt("random_seed",123) self.ale.setInt("frame_skip",4) self.ale.loadROM('roms/' + rom_name ) self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() self.windowname = windowname for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i self.init_frame_number = 0 # print(self.legal_actions) self.screen_width,self.screen_height = self.ale.getScreenDims() print("width/height: " +str(self.screen_width) + "/" + str(self.screen_height)) self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow(self.windowname) def get_image(self): numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image def newGame(self): # Instead of resetting the game, we load a checkpoint and start from there. # self.ale.reset_game() self.ale.restoreState(self.ale.decodeState(checkpoints[random.randint(0,99)].astype('uint8'))) self.init_frame_number = self.ale.getFrameNumber() #self.ale.restoreState(self.ale.decodeState(np.reshape(checkpoint,(1009,1)))) return self.get_image() def next(self, action_indx): reward = self.ale.act(action_indx) nextstate = self.get_image() # scipy.misc.imsave('test.png',nextstate) if self.vis: cv2.imshow(self.windowname,nextstate) return nextstate, reward, self.ale.game_over() def get_frame_number(self): return self.ale.getFrameNumber() - self.init_frame_number
class Emulator: def __init__(self, rom_name, vis): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt( "max_num_frames_per_episode") self.ale.setInt("random_seed", 123) self.ale.setInt("frame_skip", 4) self.ale.loadROM('roms/' + rom_name) self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i #print(self.legal_actions) self.screen_width, self.screen_height = self.ale.getScreenDims() print("width/height: " + str(self.screen_width) + "/" + str(self.screen_height)) self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow("preview") def get_image(self): numpy_surface = np.zeros(self.screen_height * self.screen_width * 3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) #added by ben may 2016 print image print '&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& printing' return image def newGame(self): self.ale.reset_game() return self.get_image() def next(self, action_indx): reward = self.ale.act(action_indx) nextstate = self.get_image() # scipy.misc.imsave('test.png',nextstate) # scipy.misc.imsave('test.png',nextstate) if self.vis: cv2.imshow('preview', nextstate) return nextstate, reward, self.ale.game_over()
def train(): ale = ALEInterface() ale.setInt('random_seed', 123) ale.loadROM('roms/breakout.bin') legal_actions = ale.getLegalActionSet() total_reward = 0 while not ale.game_over(): a = legal_actions[randrange(len(legal_actions))] reward = ale.act(a) screen = None screen = ale.getScreenRGB() print(screen) plt.imshow(screen) plt.show() total_reward += reward print(total_reward) print('Episode end!')
def __init__(self, rom_path, n_last_screens=4, frame_skip=4, treat_life_lost_as_terminal=True, crop_or_scale='scale', max_start_nullops=30, record_screen_dir=None, render=False, max_episode_length=None, max_time=None): self.frame_skip = frame_skip self.n_last_screens = n_last_screens self.treat_life_lost_as_terminal = treat_life_lost_as_terminal self.crop_or_scale = crop_or_scale self.max_start_nullops = max_start_nullops self.max_episode_length = max_episode_length self.max_time = max_time ale = ALEInterface() # Use numpy's random state seed = np.random.randint(0, 2**16) ale.setInt(b'random_seed', seed) ale.setFloat(b'repeat_action_probability', 0.0) ale.setBool(b'color_averaging', False) if record_screen_dir is not None: ale.setString(b'record_screen_dir', str.encode(record_screen_dir)) if render: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool(b'sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool(b'sound', True) ale.setBool(b'display_screen', True) ale.loadROM(str.encode(rom_path)) self.ale = ale self.__exceed_max = False self.legal_actions = ale.getMinimalActionSet() self.reset()
class Atari: def __init__(self, rom_name): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt( "max_num_frames_per_episode") self.ale.setInt("random_seed", 123) self.ale.setInt("frame_skip", 4) self.ale.loadROM('game/' + rom_name) self.screen_width, self.screen_height = self.ale.getScreenDims() self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i # print len(self.legal_actions) self.windowname = rom_name cv2.startWindowThread() cv2.namedWindow(rom_name) def get_image(self): numpy_surface = np.zeros(self.screen_height * self.screen_width * 3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image def newGame(self): self.ale.reset_game() return self.get_image() def next(self, action): reward = self.ale.act(self.legal_actions[np.argmax(action)]) nextstate = self.get_image() cv2.imshow(self.windowname, nextstate) if self.ale.game_over(): self.newGame() # print "reward %d" % reward return nextstate, reward, self.ale.game_over()
class emulator: def __init__(self, rom_name, vis, windowname='preview'): self.ale = ALEInterface() # When it starts self.ale.setInt("random_seed", 123) # Skipping 4 frames self.ale.setInt("frame_skip", 4) self.ale.loadROM('roms/' + rom_name) self.legal_actions = self.ale.getMinimalActionSet() print('Actions : %s' % self.legal_actions) self.action_map = dict() self.windowname = windowname # Raw atari frames, 210 * 160 pixel images self.screen_width, self.screen_height = self.ale.getScreenDims() print("widht/height: " + str(self.screen_width) + "/" + str(self.screen_height)) # Visualize self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow(self.windowname) def get_image(self): # Need to specify data type as uint8 numpy_surface = np.zeros([self.screen_width * self.screen_height * 3], dtype=np.uint8) # get RGB values self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, [self.screen_height, self.screen_width, 3]) return image def new_game(self): self.ale.reset_game() # Reset game and getting reset image value return self.get_image() def next(self, action_index): # Get R(s,a) reward = self.ale.act(action_index) # Get image pixel value after taking an action next_state = self.get_image() if self.vis: cv2.imshow(self.windowname, next_state) # self.ale.game_over() returns True when game is over return next_state, reward, self.ale.game_over()
def main(): if len(sys.argv) < 2: dir_rom = '/Users/lguan/Documents/Study/Research/Atari-2600-Roms/T-Z/Tennis.bin' else: dir_rom = sys.argv[1] ale = ALEInterface() # Get & Set the desired settings ale.setInt(b'random_seed', 123) # Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = True if USE_SDL: # mac OS if sys.platform == 'darwin': pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool('sound', True) ale.setBool('display_screen', True) # Load the ROM file rom_file = str.encode(dir_rom) print('- Loading ROM - %s' % dir_rom) ale.loadROM(rom_file) print('- Complete loading ROM') legal_actions = ale.getMinimalActionSet() # Play 10 episodes for episode in range(10): total_reward = 0 while not ale.game_over(): a = legal_actions[np.random.randint(legal_actions.size)] # Apply an action and get the resulting reward reward = ale.act(a) total_reward += reward print('Episode %d ended with score: %d' % (episode, total_reward)) ale.reset_game()
def __init__(self, rom_filename, seed=None, use_sdl=False, n_last_screens=4, frame_skip=4, treat_life_lost_as_terminal=True, crop_or_scale='scale', max_start_nullops=30, record_screen_dir=None): self.n_last_screens = n_last_screens self.treat_life_lost_as_terminal = treat_life_lost_as_terminal self.crop_or_scale = crop_or_scale self.max_start_nullops = max_start_nullops ale = ALEInterface() if seed is not None: assert seed >= 0 and seed < 2 ** 16, \ "ALE's random seed must be represented by unsigned int" else: # Use numpy's random state seed = np.random.randint(0, 2 ** 16) ale.setInt(b'random_seed', seed) ale.setFloat(b'repeat_action_probability', 0.0) ale.setBool(b'color_averaging', False) if record_screen_dir is not None: ale.setString(b'record_screen_dir', str.encode(record_screen_dir)) self.frame_skip = frame_skip if use_sdl: if 'DISPLAY' not in os.environ: raise RuntimeError( 'Please set DISPLAY environment variable for use_sdl=True') # SDL settings below are from the ALE python example if sys.platform == 'darwin': import pygame pygame.init() ale.setBool(b'sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool(b'sound', True) ale.setBool(b'display_screen', True) ale.loadROM(str.encode(rom_filename)) assert ale.getFrameNumber() == 0 self.ale = ale self.legal_actions = ale.getMinimalActionSet() self.initialize()
class Atari: def __init__(self,rom_name): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode") self.ale.setInt("random_seed",123) self.ale.setInt("frame_skip",4) self.ale.loadROM(rom_name) self.screen_width,self.screen_height = self.ale.getScreenDims() self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i print len(self.legal_actions) self.windowname = rom_name cv2.startWindowThread() cv2.namedWindow(rom_name) def preprocess(self, image): image = cv2.cvtColor(cv2.resize(image, (84, 110)), cv2.COLOR_BGR2GRAY) image = image[26:110,:] ret, image = cv2.threshold(image,1,255,cv2.THRESH_BINARY) return np.reshape(image,(84,84, 1)) def get_image(self): numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return self.preprocess(image) def newGame(self): self.ale.reset_game() return self.get_image() def next(self, action): reward = self.ale.act(self.legal_actions[np.argmax(action)]) nextstate = self.get_image() cv2.imshow(self.windowname,nextstate) if self.ale.game_over(): self.newGame() #print "reward %d" % reward return nextstate, reward, self.ale.game_over()
class game(object): def __init__(self, display): self.ale = ALEInterface() # Get & Set the desired settings self.ale.setInt('random_seed', 123) # Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = display if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) # Load the ROM file self.ale.loadROM("ms_pacman.bin") def act(self, action): return self.ale.act(action) def getState(self): return get_feature(self.ale.getScreen()) def getScreen(self): return self.ale.getScreen() def reset_game(self): self.ale.reset_game() def lives(self): return self.ale.lives() def game_over(self): return self.ale.game_over()
class Breakout(object): steps_between_actions = 4 def __init__(self): self.ale = ALEInterface() self.ale.setInt('random_seed', 123) self.ale.setBool("display_screen", False) self.ale.setBool("sound", False) self.ale.loadROM("%s/breakout.bin" % rom_directory) self.current_state = [ self.ale.getScreenRGB(), self.ale.getScreenRGB() ] def start_episode(self): self.ale.reset_game() def take_action(self, action): assert not self.terminated def step(): reward = self.ale.act(action) self.roll_state() return reward reward = sum(step() for _ in xrange(self.steps_between_actions)) return (reward, self.current_state) def roll_state(self): assert len(self.current_state) == 2 self.current_state = [self.current_state[1], self.ale.getScreenRGB()] assert len(self.current_state) == 2 @property def actions(self): return self.ale.getMinimalActionSet() @property def terminated(self): return self.ale.game_over() or self.ale.lives() < 5
def initializeALE(romFile, rec_dir): ale = ALEInterface() max_frames_per_episode = ale.getInt("max_num_frames_per_episode") ale.setInt("random_seed", 123) ale.setFloat("repeat_action_probability", 0.0) ale.setInt("frame_skip", 5) # Set record flags ale.setString(b'record_screen_dir', rec_dir + '/') ale.setString("record_sound_filename", rec_dir + "/sound.wav") # We set fragsize to 64 to ensure proper sound sync ale.setInt("fragsize", 64) # Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = False if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool('sound', True) ale.setBool('display_screen', True) ale.loadROM(romFile) actionSet = ale.getMinimalActionSet() return ale, actionSet
def initializeALE(romFile): ale = ALEInterface() ale.setInt("max_num_frames_per_episode", 18000) ale.setInt("random_seed", 123) ale.setFloat("repeat_action_probability", 0.0) ale.setInt("frame_skip", 5) random_seed = ale.getInt("random_seed") print("random_seed: " + str(random_seed)) # Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = False if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool('sound', True) ale.setBool('display_screen', True) ale.loadROM(romFile) actionSet = ale.getMinimalActionSet() return ale, actionSet
def init_ale(rom, display): ale = ALEInterface() # Get & Set the desired settings ale.setInt(b'random_seed', 123) # Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. USE_SDL = display if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool('sound', True) ale.setBool('display_screen', display) # Load the ROM file ale.loadROM(rom) return ale
class emulator: def __init__(self, rom_name, vis): if vis: import cv2 self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode"); self.ale.setInt("random_seed",123) self.ale.setInt("frame_skip",4) self.ale.loadROM('roms/' + rom_name ) self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i # print(self.legal_actions) self.screen_width,self.screen_height = self.ale.getScreenDims() print("width/height: " +str(self.screen_width) + "/" + str(self.screen_height)) self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow("preview") def get_image(self): numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image def newGame(self): self.ale.reset_game() return self.get_image() def next(self, action_indx): reward = self.ale.act(action_indx) nextstate = self.get_image() # scipy.misc.imsave('test.png',nextstate) if self.vis: cv2.imshow('preview',nextstate) return nextstate, reward, self.ale.game_over()
class Atari: def __init__(self,rom_name): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode") self.ale.setInt("random_seed",123) self.ale.setInt("frame_skip",4) self.ale.loadROM('./' +rom_name) self.screen_width,self.screen_height = self.ale.getScreenDims() self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i #print len(self.legal_actions) self.windowname = rom_name #cv2.startWindowThread() #cv2.namedWindow(rom_name) def get_image(self): numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image def newGame(self): self.ale.reset_game() return self.get_image() def next(self, action): reward = self.ale.act(self.legal_actions[np.argmax(action)]) nextstate = self.get_image() #cv2.imshow(self.windowname,nextstate) if self.ale.game_over(): self.newGame() #print "reward %d" % reward return nextstate, reward, self.ale.game_over()
def ale_load_from_rom(rom_path, display_screen): rng = get_numpy_rng() try: from ale_python_interface import ALEInterface except ImportError as e: raise ImportError('Unable to import the python package of Arcade Learning Environment. ' \ 'ALE may not have been installed correctly. Refer to ' \ '`https://github.com/mgbellemare/Arcade-Learning-Environment` for some' \ 'installation guidance') ale = ALEInterface() ale.setInt(b'random_seed', rng.randint(1000)) if display_screen: import sys if sys.platform == 'darwin': import pygame pygame.init() ale.setBool(b'sound', False) # Sound doesn't work on OSX ale.setBool(b'display_screen', True) else: ale.setBool(b'display_screen', False) ale.setFloat(b'repeat_action_probability', 0) ale.loadROM(str.encode(rom_path)) return ale
def get_random_baseline(gamepath): ale = ALEInterface() ale.setInt('random_seed', 42) recordings_dir = './recordings/breakout/' USE_SDL = True if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX #ale.setString("record_screen_dir", recordings_dir); elif sys.platform.startswith('linux'): ale.setBool('sound', True) ale.setBool('display_screen', True) # Load the ROM file ale.loadROM(gamepath) # Get the list of legal actions legal_actions = ale.getLegalActionSet() # Play 5 episodes rewards = [] for episode in xrange(10): total_reward = 0 while not ale.game_over(): a = legal_actions[randrange(len(legal_actions))] reward = ale.act(a); total_reward += reward rewards.append(total_reward) #print 'Episode', episode, 'ended with score:', total_reward ale.reset_game() avg_reward = sum(rewards) / float(len(rewards)) return avg_reward
class GameState(object): def __init__(self, rand_seed, display=False): self.ale = ALEInterface() self.ale.setInt('random_seed', rand_seed) if display: self._setup_display() self.ale.loadROM(ROM) # height=210, width=160 self.screen = np.empty((210, 160, 1), dtype=np.uint8) no_action = 0 self.reward = self.ale.act(no_action) self.terminal = self.ale.game_over() # screenのshapeは、(210, 160, 1) self.ale.getScreenGrayscale(self.screen) # (210, 160)にreshape reshaped_screen = np.reshape(self.screen, (210, 160)) # height=110, width=84にリサイズ resized_screen = cv2.resize(reshaped_screen, (84, 110)) x_t = resized_screen[18:102,:] x_t = x_t.astype(np.float32) x_t *= (1.0/255.0) self.s_t = np.stack((x_t, x_t, x_t, x_t), axis = 2) # 実際に利用するactionのみを集めておく self.real_actions = self.ale.getMinimalActionSet() def _setup_display(self): if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) def process(self, action): # 18種類のうちの実際に利用するactionに変換 real_action = self.real_actions[action] self.reward = self.ale.act(real_action) #self.reward = self.ale.act(action) self.terminal = self.ale.game_over() # screenのshapeは、(210, 160, 1) self.ale.getScreenGrayscale(self.screen) # (210, 160)にreshape reshaped_screen = np.reshape(self.screen, (210, 160)) # height=210, width=160 # height=110, width=84にリサイズ resized_screen = cv2.resize(reshaped_screen, (84, 110)) x_t1 = resized_screen[18:102,:] x_t1 = np.reshape(x_t1, (84, 84, 1)) x_t1 = x_t1.astype(np.float32) x_t1 *= (1.0/255.0) self.s_t1 = np.append(x_t1, self.s_t[:,:,0:3], axis = 2) if self.terminal: self.ale.reset_game() def update(self): self.s_t = self.s_t1
16, #11010 fire down/right 14, #11011 fire up/down/right (invalid) 11, #11100 fire left/right (invalid) 14, #11101 fire left/right/up (invalid) 16, #11110 fire left/right/down (invalid) 14 #11111 fire up/down/left/right (invalid) ) if(len(sys.argv) < 3): print("Usage ./ale_logger.py <ROM_FILE_NAME> <LOG_OUTPUT_FILE>") sys.exit() ale = ALEInterface() max_frames_per_episode = ale.getInt("max_num_frames_per_episode"); ale.setInt("random_seed",123) random_seed = ale.getInt("random_seed") print("random_seed: " + str(random_seed)) ale.loadROM(sys.argv[1]) legal_actions = ale.getMinimalActionSet() print legal_actions (screen_width,screen_height) = ale.getScreenDims() print("width/height: " +str(screen_width) + "/" + str(screen_height)) (display_width,display_height) = (1024,420) #init pygame pygame.init()
session.run(targetNet.b3.assign(trainNet.b3)) session.run(targetNet.b4.assign(trainNet.b4)) # def printDict(dict): # print 'Options:\n' # for i in dict.keys(): # print " ",i,"=",dict[i] # # print '' # initialization np.random.seed(SEED) ale = ALEInterface() if SEED == None: ale.setInt('random_seed', 0) else: ale.setInt('random_seed', SEED) ale.setInt("frame_skip",frameSkip) ale.setBool('color_averaging', True) ale.setBool('sound', False) ale.setBool('display_screen', False) ale.setFloat("repeat_action_probability", 0.0) ale.loadROM(romPath) legal_actions = ale.getMinimalActionSet() n_actions = len(legal_actions) opt.n_actions = n_actions explorationRateDelta = (initialExplorationRate - finalExplorationRate)/(finalExplorationFrame-startLearningFrame) explorationRate = initialExplorationRate + startLearningFrame*explorationRateDelta
class Environment: """docstring for Environment""" BUFFER_LEN = 2 EPISODE_FRAMES = 18000 EPOCH_COUNT = 200 EPOCH_STEPS = 250000 EVAL_EPS = 0.001 FRAMES_SKIP = 4 FRAME_HEIGHT = 84 FRAME_WIDTH = 84 MAX_NO_OP = 30 MAX_REWARD = 1 def __init__(self, rom_name, rng, display_screen = False): self.api = ALEInterface() self.api.setInt('random_seed', rng.randint(333)) self.api.setBool('display_screen', display_screen) self.api.setFloat('repeat_action_probability', 0.0) self.rom_name = rom_name self.display_screen = display_screen self.rng = rng self.repeat = Environment.FRAMES_SKIP self.buffer_len = Environment.BUFFER_LEN self.height = Environment.FRAME_HEIGHT self.width = Environment.FRAME_WIDTH self.episode_steps = Environment.EPISODE_FRAMES / Environment.FRAMES_SKIP self.merge_id = 0 self.max_reward = Environment.MAX_REWARD self.eval_eps = Environment.EVAL_EPS self.log_dir = '' self.network_dir = '' self.api.loadROM('../rom/' + self.rom_name) self.minimal_actions = self.api.getMinimalActionSet() original_width, original_height = self.api.getScreenDims() self.merge_frame = np.zeros((self.buffer_len , original_height , original_width) , dtype = np.uint8) def get_action_count(self): return len(self.minimal_actions) def train(self, agent, store_freq, folder = None, start_epoch = 0): self._open_log_files(agent, folder) obs = np.zeros((self.height, self.width), dtype = np.uint8) epoch_count = Environment.EPOCH_COUNT for epoch in xrange(start_epoch, epoch_count): self.need_reset = True steps_left = Environment.EPOCH_STEPS print "\n" + "=" * 50 print "Epoch #%d" % (epoch + 1) episode = 0 train_start = time.time() while steps_left > 0: num_step, _ = self._run_episode(agent, steps_left, obs) steps_left -= num_step episode += 1 if steps_left == 0 or episode % 10 == 0: print "Finished episode #%d, steps_left = %d" \ % (episode, steps_left) train_end = time.time() valid_values = agent.get_validate_values() eval_values = self.evaluate(agent) test_end = time.time() train_time = train_end - train_start test_time = test_end - train_end step_per_sec = Environment.EPOCH_STEPS * 1. / max(1, train_time) print "\tFinished epoch #%d, episode trained = %d\n" \ "\tValidate values = %.3f, evaluate reward = %.3f\n"\ "\tTrain time = %.0fs, test time = %.0fs, steps/sec = %.4f" \ % (epoch + 1, episode, valid_values, eval_values\ , train_time, test_time, step_per_sec) self._update_log_files(agent, epoch + 1, episode , valid_values, eval_values , train_time, test_time , step_per_sec, store_freq) gc.collect() def evaluate(self, agent, episodes = 30, obs = None): print "\n***Start evaluating" if obs is None: obs = np.zeros((self.height, self.width), dtype = np.uint8) sum_reward = 0.0 sum_step = 0.0 for episode in xrange(episodes): self.need_reset = True step, reward = self._run_episode(agent, self.episode_steps, obs , self.eval_eps, evaluating = True) sum_reward += reward sum_step += step print "Finished episode %d, reward = %d, step = %d" \ % (episode + 1, reward, step) self.need_reset = True print "Average reward per episode = %.4f" % (sum_reward / episodes) print "Average step per episode = %.4f" % (sum_step / episodes) return sum_reward / episodes def _prepare_game(self): if self.need_reset or self.api.game_over(): self.api.reset_game() self.need_reset = False if Environment.MAX_NO_OP > 0: num_no_op = self.rng.randint(Environment.MAX_NO_OP + 1) \ + self.buffer_len for _ in xrange(num_no_op): self.api.act(0) for _ in xrange(self.buffer_len): self._update_buffer() def _run_episode(self, agent, steps_left, obs , eps = 0.0, evaluating = False): self._prepare_game() start_lives = self.api.lives() step_count = 0 sum_reward = 0 is_terminal = False while step_count < steps_left and not is_terminal: self._get_screen(obs) action_id, _ = agent.get_action(obs, eps, evaluating) reward = self._repeat_action(self.minimal_actions[action_id]) reward_clip = reward if self.max_reward > 0: reward_clip = np.clip(reward, -self.max_reward, self.max_reward) life_lost = not evaluating and self.api.lives() < start_lives is_terminal = self.api.game_over() or life_lost \ or step_count + 1 >= steps_left agent.add_experience(obs, is_terminal, action_id, reward_clip , evaluating) sum_reward += reward step_count += 1 return step_count, sum_reward def _update_buffer(self): self.api.getScreenGrayscale(self.merge_frame[self.merge_id, ...]) self.merge_id = (self.merge_id + 1) % self.buffer_len def _repeat_action(self, action): reward = 0 for i in xrange(self.repeat): reward += self.api.act(action) if i + self.buffer_len >= self.repeat: self._update_buffer() return reward def _get_screen(self, resized_frame): self._resize_frame(self.merge_frame.max(axis = 0), resized_frame) def _resize_frame(self, src_frame, dst_frame): cv2.resize(src = src_frame, dst = dst_frame, dsize = (self.width, self.height), interpolation = cv2.INTER_LINEAR) def _open_log_files(self, agent, folder): time_str = time.strftime("_%m-%d-%H-%M", time.localtime()) base_rom_name = os.path.splitext(os.path.basename(self.rom_name))[0] if folder is not None: self.log_dir = folder self.network_dir = self.log_dir + '/network' else: self.log_dir = '../run_results/' + base_rom_name + time_str self.network_dir = self.log_dir + '/network' info_name = get_next_name(self.log_dir, 'info', 'txt') git_name = get_next_name(self.log_dir, 'git-diff', '') try: os.stat(self.log_dir) except OSError: os.makedirs(self.log_dir) try: os.stat(self.network_dir) except OSError: os.makedirs(self.network_dir) with open(os.path.join(self.log_dir, info_name), 'w') as f: f.write('Commit: ' + subprocess.check_output(['git', 'rev-parse' , 'HEAD'])) f.write('Run command: ') f.write(' '.join(pipes.quote(x) for x in sys.argv)) f.write('\n\n') f.write(agent.get_info()) write_info(f, Environment) write_info(f, agent.__class__) write_info(f, agent.network.__class__) # From https://github.com/spragunr/deep_q_rl/pull/49/files with open(os.path.join(self.log_dir, git_name), 'w') as f: f.write(subprocess.check_output(['git', 'diff', 'HEAD'])) if folder is not None: return with open(os.path.join(self.log_dir, 'results.csv'), 'w') as f: f.write("epoch,episode_train,validate_values,evaluate_reward"\ ",train_time,test_time,steps_per_second\n") mem = psutil.virtual_memory() with open(os.path.join(self.log_dir, 'memory.csv'), 'w') as f: f.write("epoch,available,free,buffers,cached"\ ",available_readable,used_percent\n") f.write("%d,%d,%d,%d,%d,%s,%.1f\n" % \ (0, mem.available, mem.free, mem.buffers, mem.cached , bytes2human(mem.available), mem.percent)) def _update_log_files(self, agent, epoch, episode, valid_values , eval_values, train_time, test_time, step_per_sec , store_freq): print "Updating log files" with open(self.log_dir + '/results.csv', 'a') as f: f.write("%d,%d,%.4f,%.4f,%d,%d,%.4f\n" % \ (epoch, episode, valid_values, eval_values , train_time, test_time, step_per_sec)) mem = psutil.virtual_memory() with open(self.log_dir + '/memory.csv', 'a') as f: f.write("%d,%d,%d,%d,%d,%s,%.1f\n" % \ (epoch, mem.available, mem.free, mem.buffers, mem.cached , bytes2human(mem.available), mem.percent)) agent.dump_network(self.network_dir + ('/%03d' % (epoch)) + '.npz') if (store_freq >= 0 and epoch >= Environment.EPOCH_COUNT) or \ (store_freq > 0 and (epoch % store_freq == 0)): agent.dump_exp(self.network_dir + '/exp.npz') def _setup_record(self, network_file): file_name, _ = os.path.splitext(os.path.basename(network_file)) time_str = time.strftime("_%m-%d-%H-%M", time.localtime()) img_dir = os.path.dirname(network_file) + '/images_' \ + file_name + time_str rom_name, _ = os.path.splitext(self.rom_name) out_name = os.path.dirname(network_file) + '/' + rom_name + '_' \ + file_name + time_str + '.mov' print out_name try: os.stat(img_dir) except OSError: os.makedirs(img_dir) self.api.setString('record_screen_dir', img_dir) self.api.loadROM('../rom/' + self.rom_name) return img_dir, out_name def record_run(self, agent, network_file, episode_id = 1): if episode_id > 1: self.evaluate(agent, episode_id - 1) system_state = self.api.cloneSystemState() img_dir, out_name = self._setup_record(network_file) if episode_id > 1: self.api.restoreSystemState(system_state) self.evaluate(agent, 1) script = \ """ { ffmpeg -r 60 -i %s/%%06d.png -f mov -c:v libx264 %s } || { avconv -r 60 -i %s/%%06d.png -f mov -c:v libx264 %s } """ % (img_dir, out_name, img_dir, out_name) os.system(script)
#!/usr/bin/env python # python_example.py # Author: Ben Goodrich # # This is a direct port to python of the shared library example from # ALE provided in doc/examples/sharedLibraryInterfaceExample.cpp import sys import numpy as np from random import randrange from ale_python_interface import ALEInterface ale = ALEInterface() # Get & Set the desired settings ale.setInt('random_seed', 123) # Shows screen of the game to see what is going on ale.setBool("display_screen", True) ale.setBool("sound", True) # Set USE_SDL to true to display the screen. ALE must be compilied # with SDL enabled for this to work. On OSX, pygame init is used to # proxy-call SDL_main. # USE_SDL = False # if USE_SDL: # if sys.platform == 'darwin': # import pygame # # pygame.init() # ale.setBool('sound', False) # Sound doesn't work on OSX # elif sys.platform.startswith('linux'):
class ALEEnvironment(Environment): def __init__(self, rom_file, args): from ale_python_interface import ALEInterface self.ale = ALEInterface() if args.display_screen: if sys.platform == 'darwin': import pygame pygame.init() self.ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): self.ale.setBool('sound', True) self.ale.setBool('display_screen', True) self.ale.setInt('frame_skip', args.frame_skip) self.ale.setFloat('repeat_action_probability', args.repeat_action_probability) self.ale.setBool('color_averaging', args.color_averaging) if args.random_seed: self.ale.setInt('random_seed', args.random_seed) if args.record_screen_path: if not os.path.exists(args.record_screen_path): logger.info("Creating folder %s" % args.record_screen_path) os.makedirs(args.record_screen_path) logger.info("Recording screens to %s", args.record_screen_path) self.ale.setString('record_screen_dir', args.record_screen_path) if args.record_sound_filename: logger.info("Recording sound to %s", args.record_sound_filename) self.ale.setBool('sound', True) self.ale.setString('record_sound_filename', args.record_sound_filename) self.ale.loadROM(rom_file) if args.minimal_action_set: self.actions = self.ale.getMinimalActionSet() logger.info("Using minimal action set with size %d" % len(self.actions)) else: self.actions = self.ale.getLegalActionSet() logger.info("Using full action set with size %d" % len(self.actions)) logger.debug("Actions: " + str(self.actions)) self.screen_width = args.screen_width self.screen_height = args.screen_height self.life_lost = False def numActions(self): return len(self.actions) def restart(self): # In test mode, the game is simply initialized. In train mode, if the game # is in terminal state due to a life loss but not yet game over, then only # life loss flag is reset so that the next game starts from the current # state. Otherwise, the game is simply initialized. if ( self.mode == 'test' or not self.life_lost or # `reset` called in a middle of episode self.ale.game_over() # all lives are lost ): self.ale.reset_game() self.life_lost = False def act(self, action): lives = self.ale.lives() reward = self.ale.act(self.actions[action]) self.life_lost = (not lives == self.ale.lives()) return reward def getScreen(self): screen = self.ale.getScreenGrayscale() resized = cv2.resize(screen, (self.screen_width, self.screen_height)) return resized def isTerminal(self): if self.mode == 'train': return self.ale.game_over() or self.life_lost return self.ale.game_over()
class MyEnv(Environment): VALIDATION_MODE = 0 def __init__(self, rng, rom="ale/breakout.bin", frame_skip=4, ale_options=[{"key": "random_seed", "value": 0}, {"key": "color_averaging", "value": True}, {"key": "repeat_action_probability", "value": 0.}]): self._mode = -1 self._modeScore = 0.0 self._modeEpisodeCount = 0 self._frameSkip = frame_skip if frame_skip >= 1 else 1 self._randomState = rng self._ale = ALEInterface() for option in ale_options: t = type(option["value"]) if t is int: self._ale.setInt(option["key"], option["value"]) elif t is float: self._ale.setFloat(option["key"], option["value"]) elif t is bool: self._ale.setBool(option["key"], option["value"]) else: raise ValueError("Option {} ({}) is not an int, bool or float.".format(option["key"], t)) self._ale.loadROM(rom) w, h = self._ale.getScreenDims() self._screen = np.empty((h, w), dtype=np.uint8) self._reducedScreen = np.empty((84, 84), dtype=np.uint8) self._actions = self._ale.getMinimalActionSet() def reset(self, mode): if mode == MyEnv.VALIDATION_MODE: if self._mode != MyEnv.VALIDATION_MODE: self._mode = MyEnv.VALIDATION_MODE self._modeScore = 0.0 self._modeEpisodeCount = 0 else: self._modeEpisodeCount += 1 elif self._mode != -1: # and thus mode == -1 self._mode = -1 self._ale.reset_game() for _ in range(self._randomState.randint(15)): self._ale.act(0) self._ale.getScreenGrayscale(self._screen) cv2.resize(self._screen, (84, 84), self._reducedScreen, interpolation=cv2.INTER_NEAREST) return [4 * [84 * [84 * [0]]]] def act(self, action): action = self._actions[action] reward = 0 for _ in range(self._frameSkip): reward += self._ale.act(action) if self.inTerminalState(): break self._ale.getScreenGrayscale(self._screen) cv2.resize(self._screen, (84, 84), self._reducedScreen, interpolation=cv2.INTER_NEAREST) self._modeScore += reward return np.sign(reward) def summarizePerformance(self, test_data_set): if self.inTerminalState() == False: self._modeEpisodeCount += 1 print("== Mean score per episode is {} over {} episodes ==".format(self._modeScore / self._modeEpisodeCount, self._modeEpisodeCount)) def inputDimensions(self): return [(4, 84, 84)] def observationType(self, subject): return np.uint8 def nActions(self): return len(self._actions) def observe(self): return [np.array(self._reducedScreen)] def inTerminalState(self): return self._ale.game_over()
initialExplorationRate = 1.0 finalExplorationRate = 0.1 loadModel = -1 loadModelPath = "model/" + "%02d" % loadModel + ".tfmodel" saveData = False saveModel = False gamma = .99 learning_rate = 0.00025 display_screen = False batchSize = 50 ale = ALEInterface() ale.setInt('random_seed', 123) ale.setInt("frame_skip",frameSkip) USE_SDL = True if USE_SDL: if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX elif sys.platform.startswith('linux'): ale.setBool('sound', False) ale.setBool('display_screen', display_screen) ale.loadROM("rom/Breakout.A26") legal_actions = ale.getMinimalActionSet()
class AtariEnvironment: def __init__(self, args, outputDir): self.outputDir = outputDir self.screenCaptureFrequency = args.screen_capture_freq self.ale = ALEInterface() self.ale.setInt(b'random_seed', 123456) random.seed(123456) # Fix https://groups.google.com/forum/#!topic/deep-q-learning/p4FAIaabwlo self.ale.setFloat(b'repeat_action_probability', 0.0) # Load the ROM file self.ale.loadROM(args.rom) self.actionSet = self.ale.getMinimalActionSet() self.gameNumber = 0 self.stepNumber = 0 self.resetGame() def getNumActions(self): return len(self.actionSet) def getState(self): return self.state def getGameNumber(self): return self.gameNumber def getFrameNumber(self): return self.ale.getFrameNumber() def getEpisodeFrameNumber(self): return self.ale.getEpisodeFrameNumber() def getEpisodeStepNumber(self): return self.episodeStepNumber def getStepNumber(self): return self.stepNumber def getGameScore(self): return self.gameScore def isGameOver(self): return self.ale.game_over() def step(self, action): previousLives = self.ale.lives() reward = 0 isTerminal = 0 self.stepNumber += 1 self.episodeStepNumber += 1 for i in range(4): prevScreenRGB = self.ale.getScreenRGB() reward += self.ale.act(self.actionSet[action]) screenRGB = self.ale.getScreenRGB() # Detect end of episode, I don't think I'm handling this right in terms # of the overall game loop (??) if self.ale.lives() < previousLives or self.ale.game_over(): isTerminal = 1 break if self.gameNumber % self.screenCaptureFrequency == 0: dir = self.outputDir + '/screen_cap/game-%06d' % (self.gameNumber) if not os.path.isdir(dir): os.makedirs(dir) self.ale.saveScreenPNG(dir + '/frame-%06d.png' % (self.getEpisodeFrameNumber())) maxedScreen = np.maximum(screenRGB, prevScreenRGB) self.state = self.state.stateByAddingScreen(maxedScreen, self.ale.getFrameNumber()) self.gameScore += reward return reward, self.state, isTerminal def resetGame(self): if self.ale.game_over(): self.gameNumber += 1 self.ale.reset_game() self.state = State().stateByAddingScreen(self.ale.getScreenRGB(), self.ale.getFrameNumber()) self.gameScore = 0 self.episodeStepNumber = 0 # environment steps vs ALE frames. Will probably be 4*frame number