class emulator: def __init__(self, rom_name, vis, windowname='preview'): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt( "max_num_frames_per_episode") self.ale.setInt("random_seed", 123) self.ale.setInt("frame_skip", 4) self.ale.loadROM('roms/' + rom_name) self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() self.windowname = windowname for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i self.init_frame_number = 0 # print(self.legal_actions) self.screen_width, self.screen_height = self.ale.getScreenDims() print("width/height: " + str(self.screen_width) + "/" + str(self.screen_height)) self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow(self.windowname) def get_image(self): numpy_surface = np.zeros(self.screen_height * self.screen_width * 3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image def newGame(self): # Instead of resetting the game, we load a checkpoint and start from there. # self.ale.reset_game() self.ale.restoreState( self.ale.decodeState(checkpoints[random.randint( 0, 99)].astype('uint8'))) self.init_frame_number = self.ale.getFrameNumber() #self.ale.restoreState(self.ale.decodeState(np.reshape(checkpoint,(1009,1)))) return self.get_image() def next(self, action_indx): reward = self.ale.act(action_indx) nextstate = self.get_image() # scipy.misc.imsave('test.png',nextstate) if self.vis: cv2.imshow(self.windowname, nextstate) return nextstate, reward, self.ale.game_over() def get_frame_number(self): return self.ale.getFrameNumber() - self.init_frame_number
class emulator: def __init__(self, rom_name, vis,windowname='preview'): self.ale = ALEInterface() self.max_frames_per_episode = self.ale.getInt("max_num_frames_per_episode"); self.ale.setInt("random_seed",123) self.ale.setInt("frame_skip",4) self.ale.loadROM('roms/' + rom_name ) self.legal_actions = self.ale.getMinimalActionSet() self.action_map = dict() self.windowname = windowname for i in range(len(self.legal_actions)): self.action_map[self.legal_actions[i]] = i self.init_frame_number = 0 # print(self.legal_actions) self.screen_width,self.screen_height = self.ale.getScreenDims() print("width/height: " +str(self.screen_width) + "/" + str(self.screen_height)) self.vis = vis if vis: cv2.startWindowThread() cv2.namedWindow(self.windowname) def get_image(self): numpy_surface = np.zeros(self.screen_height*self.screen_width*3, dtype=np.uint8) self.ale.getScreenRGB(numpy_surface) image = np.reshape(numpy_surface, (self.screen_height, self.screen_width, 3)) return image def newGame(self): # Instead of resetting the game, we load a checkpoint and start from there. # self.ale.reset_game() self.ale.restoreState(self.ale.decodeState(checkpoints[random.randint(0,99)].astype('uint8'))) self.init_frame_number = self.ale.getFrameNumber() #self.ale.restoreState(self.ale.decodeState(np.reshape(checkpoint,(1009,1)))) return self.get_image() def next(self, action_indx): reward = self.ale.act(action_indx) nextstate = self.get_image() # scipy.misc.imsave('test.png',nextstate) if self.vis: cv2.imshow(self.windowname,nextstate) return nextstate, reward, self.ale.game_over() def get_frame_number(self): return self.ale.getFrameNumber() - self.init_frame_number
def main(): result = { 'name': [], 'grouped_num': [], 'distribution': [], } result_str = '' # all_game_list = ['air_raid-n', 'alien', 'amidar', 'assault', 'asterix', 'asteroids', 'atlantis'] # all_game_list = ['bank_heist', 'battle_zone', 'beam_rider', 'berzerk-n', 'bowling', 'boxing', 'breakout', 'carnival-n'] # all_game_list = ['centipede', 'chopper_command', 'crazy_climber', 'demon_attack', 'double_dunk'] # all_game_list = ['elevator_action-n', 'enduro', 'fishing_derby', 'freeway', 'frostbite', 'gopher', 'gravitar'] # all_game_list = ['hero', 'ice_hockey', 'jamesbond', 'journey_escape-n', 'kangaroo', 'krull', 'kung_fu_master'] # all_game_list = ['montezuma_revenge-n', 'ms_pacman', 'name_this_game', 'phoenix-n', 'pitfall-n', 'pong', 'pooyan-n'] # all_game_list = ['private_eye', 'qbert', 'riverraid', 'road_runner', 'robotank', 'seaquest', 'skiing-n'] # all_game_list = ['solaris-n', 'space_invaders', 'star_gunner', 'tennis', 'time_pilot', 'tutankham', 'up_n_down'] # all_game_list = ['venture', 'video_pinball', 'wizard_of_wor', 'yars_revenge-n', 'zaxxon'] # all_game_list = ['pong', 'assault','ms_pacman'] all_game_list = ['assault'] for game in all_game_list: if '-n' in game: '''games that are not in the nature DQN list''' continue import atari_py game_path = atari_py.get_game_path(game) game_path = str.encode(game_path) env = ALEInterface() env.setFloat('repeat_action_probability'.encode('utf-8'), 0.0) env.setInt(b'random_seed', 3) env.loadROM(game_path) env.reset_game() if test in ['restoreState']: state_after_reset = env.cloneState() if test in ['restoreSystemState']: state_after_reset = env.cloneSystemState() if test in ['setRAM']: ram_after_reset = env.getRAM() state_after_reset = env.cloneSystemState() ram_candidate = np.load( './stochasticity_ram_mask/{}.npy'.format(game), ) print('=====================================================') try: action_sequence = np.load( './action_sequence/action_sequence_{}_{}.npy'.format( sequence, game, )) print('action_sequence loaded') except Exception as e: '''generate a sequence of actions''' action_sequence = np.random.randint( len(env.getMinimalActionSet()), size=sequence, ) np.save( './action_sequence/action_sequence_{}_{}.npy'.format( sequence, game, ), action_sequence, ) print('action_sequence generated') print('=====================================================') bunch_obs = [] distribution = [] episode_length = -1 state_metrix = [] ram_metrix = [] for bunch_i in range(bunch): if test in ['loadROM']: env.setInt(b'random_seed', bunch_i) env.loadROM(game_path) env.reset_game() elif test in ['restoreState']: env.restoreState(state_after_reset) elif test in ['restoreSystemState']: env.restoreSystemState(state_after_reset) elif test in ['setRAM']: env.reset_game() env.restoreSystemState(state_after_reset) env.setRAM(ram_after_reset) env.setRAM(env.getRAM() * (1 - ram_candidate) + ram_candidate * (bunch_i % 255)) state_sequence = [] ram_sequence = [] has_terminated = False for sequence_i in range(sequence): for frame_skip_i in range(frame_skip): if not has_terminated: env.act(env.getMinimalActionSet()[ action_sequence[sequence_i]]) if env.game_over(): episode_length = sequence_i has_terminated = True if has_terminated: break try: clear_print('[{}|{}|{}]'.format(bunch_i, sequence_i, episode_length)) except Exception as e: pass state_sequence += [env.getScreenRGB()] ram_sequence += [process_ram(env.getRAM())] if has_terminated: break if sequence > 0: if episode_length < 0: # raise Exception('Did not terminated') print('# WARNING: Did not terminated') obs = env.getScreenRGB() state_metrix += [copy.deepcopy(state_sequence)] ram_metrix += [copy.deepcopy(ram_sequence)] if_has_identical_one = False for bunch_obs_i in range(len(bunch_obs)): max_value = np.max(np.abs(obs - bunch_obs[bunch_obs_i])) if max_value < 1: if_has_identical_one = True distribution[bunch_obs_i] += 1 break if if_has_identical_one is False: bunch_obs += [obs] distribution += [1] grouped_num = len(bunch_obs) result_str = '{}game:{} grouped_num:{} distribution:{} \n'.format( result_str, game, grouped_num, distribution, ) try: game_list += [game] except Exception as e: game_list = [game] try: grouped_num_list += [grouped_num] except Exception as e: grouped_num_list = [grouped_num] max_lenth = 0 for bunch_i in range(len(state_metrix)): if len(state_metrix[bunch_i]) > max_lenth: max_lenth = len(state_metrix[bunch_i]) for bunch_i in range(len(state_metrix)): state_metrix[bunch_i] += ([ np.zeros(shape=state_metrix[0][0].shape, dtype=state_metrix[0][0].dtype) ] * (max_lenth - len(state_metrix[bunch_i]))) ram_metrix[bunch_i] += ([ np.zeros(shape=ram_metrix[0][0].shape, dtype=ram_metrix[0][0].dtype) ] * (max_lenth - len(state_metrix[bunch_i]))) state_list = [] state_metrix_id = np.zeros((len(state_metrix), len(state_metrix[0])), dtype=int) for bunch_i in range(len(state_metrix)): for sequence_i in range(len(state_metrix[0])): found_in_state_list = False for state_list_id in range(len(state_list)): if np.max(state_list[state_list_id] - state_metrix[bunch_i][sequence_i]) < 1: state_metrix_id[bunch_i][sequence_i] = state_list_id found_in_state_list = True break if not found_in_state_list: state_list += [np.copy(state_metrix[bunch_i][sequence_i])] state_metrix_id[bunch_i][sequence_i] = (len(state_list) - 1) state_metrix_id_unsorted = np.copy(state_metrix_id) state_metrix_id = state_metrix_id.tolist() state_metrix_id.sort(key=lambda row: row[:], reverse=True) state_metrix_id = np.array(state_metrix_id) fig, ax = plt.subplots() im = ax.imshow(state_metrix_id) plt.show() plt.savefig( './results/{}_state_metrix_id.jpg'.format(game), dpi=600, ) state_metrix_figure = np.zeros( ((10 + state_metrix[0][0].shape[0]) * len(state_metrix), state_metrix[0][0].shape[1] * len(state_metrix[0]), state_metrix[0][0].shape[2]), dtype=state_metrix[0][0].dtype) ram_metrix_figure = np.zeros( ((5 + ram_metrix[0][0].shape[0]) * len(state_metrix), ram_metrix[0][0].shape[1] * len(state_metrix[0]), ram_metrix[0][0].shape[2]), dtype=ram_metrix[0][0].dtype) ram_candidate = list(range(env.getRAMSize())) for bunch_i in range(len(state_metrix)): ram_metrix_figure[((bunch_i) * (5 + ram_metrix[0][0].shape[0])):( 5 + (bunch_i) * (5 + ram_metrix[0][0].shape[0])), :, 2] = 255 for bunch_i in range(len(state_metrix)): for sequence_i in range(len(state_metrix[0])): state_metrix_figure[ (10 + (bunch_i) * (10 + state_metrix[0][0].shape[0])):(bunch_i + 1) * (10 + state_metrix[0][0].shape[0]), (sequence_i) * state_metrix[0][0].shape[1]:(sequence_i + 1) * state_metrix[0][0].shape[1]] = state_list[ state_metrix_id[bunch_i][sequence_i]] for bunch_ii in range(state_metrix_id.shape[0]): if np.max(state_metrix_id_unsorted[bunch_ii] - state_metrix_id[bunch_i]) < 1: at_unsorted_bunch = bunch_ii break ram_metrix_figure[( 5 + (bunch_i) * (5 + ram_metrix[0][0].shape[0])):(bunch_i + 1) * (5 + ram_metrix[0][0].shape[0]), (sequence_i) * ram_metrix[0][0].shape[1]:(sequence_i + 1) * ram_metrix[0][0].shape[1]] = ram_metrix[ at_unsorted_bunch][sequence_i] for bunch_i in range(len(state_metrix)): for sequence_i in range(len(state_metrix[0])): if bunch_i > 0: if state_metrix_id[bunch_i][sequence_i] != state_metrix_id[ bunch_i - 1][sequence_i]: # draw a line to seperate the bunches previous = ram_metrix_figure[( 5 + (bunch_i - 1) * (5 + ram_metrix[0][0].shape[0])):( (bunch_i) * (5 + ram_metrix[0][0].shape[0])), sequence_i, 0] later = ram_metrix_figure[( 5 + (bunch_i) * (5 + ram_metrix[0][0].shape[0])):( (bunch_i + 1) * (5 + ram_metrix[0][0].shape[0])), sequence_i, 0] delta = np.abs(previous - later) state_metrix_figure[( (bunch_i) * (10 + state_metrix[0][0].shape[0])):( 10 + (bunch_i) * (10 + state_metrix[0][0].shape[0])), (sequence_i) * state_metrix[0][0].shape[1]:, 0] = 255 ram_metrix_figure[((bunch_i) * (5 + ram_metrix[0][0].shape[0]) ):(5 + (bunch_i) * (5 + ram_metrix[0][0].shape[0])), (sequence_i) * ram_metrix[0][0].shape[1]:, 0] = 255 ram_metrix_figure[((bunch_i) * (5 + ram_metrix[0][0].shape[0]) ):(5 + (bunch_i) * (5 + ram_metrix[0][0].shape[0])), (sequence_i) * ram_metrix[0][0].shape[1]:, 1:] = 0 from PIL import Image Image.fromarray(state_metrix_figure).save( "./results/{}_state_metrix_figure.jpeg".format(game)) Image.fromarray(ram_metrix_figure.astype( state_metrix_figure.dtype)).save( "./results/{}_ram_metrix_figure.jpeg".format(game)) print(result_str) print('===============') for game_i in range(len(game_list)): print(game_list[game_i]) for grouped_num_i in range(len(grouped_num_list)): print(grouped_num_list[grouped_num_i])
class ALEEnvironment(Environment): """ A environment wrapper for the ALE environment """ def __init__(self, rom_name, visible=True): super().__init__('Arcade Learning Environment') frame_skip = 20 self._ale = ALEInterface() self._ale_sampler = ALEInterface() self._ale.setBool(b'display_screen', visible) #self._ale.setInt(b'frame_skip', frame_skip) #self._ale_sampler.setBool(b'display_screen', True) #self._ale_sampler.setInt(b'frame_skip', frame_skip) self._ale.loadROM(rom_name.encode('ascii')) self._ale_sampler.loadROM(rom_name.encode('ascii')) self._action_space = self._ale.getLegalActionSet() self._current_score = 0 def evaluate_rollout(self, solution, discount_factor=0): #temp_state = self._ale.cloneState() temp_ale = self._ale.encodeState(self._ale.cloneState()) temp_state = self._ale_sampler.decodeState(temp_ale) self._ale_sampler.restoreState(temp_state) prev_lives = self._ale.lives() total_rollout_reward = 0 discount = 1 for action in solution: rollout_reward = self._ale_sampler.act(action) if discount_factor is not None: rollout_reward *= discount discount *= discount_factor total_rollout_reward += rollout_reward if self._ale_sampler.game_over(): break score_delta = total_rollout_reward + (self._ale_sampler.lives() - prev_lives) #self._ale.restoreState(temp_state) return score_delta def perform_action(self, action): reward = self._ale.act(action) self._current_score += reward def get_current_score(self): return self._current_score def get_current_lives(self): return self._ale.lives() def get_random_action(self): return np.random.choice(self._action_space) def is_game_over(self): return self._ale.game_over()