def test_environment(): game = DoomGame() # https://github.com/simoninithomas/Deep_reinforcement_learning_Course/blob/master/Deep%20Q%20Learning/Doom/basic.cfg game.load_config('basic.cfg') game.set_doom_scenario_path('basic.wad') game.init() shoot = [0, 0, 1] left = [1, 0, 0] right = [0, 1, 0] actions = [shoot, left, right] episodes = 10 for i in range(episodes): game.new_episode() while not game.is_episode_finished(): state = game.get_state() img = state.screen_buffer misc = state.game_variables action = random.choice(actions) print('Action', action) reward = game.make_action(action) print('Reward', reward) time.sleep(0.02) print('Result', game.get_total_reward()) time.sleep(2) game.close()
class DoomEnv(gym.Env): metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 35 } def __init__(self, level): self.previous_level = -1 self.level = level self.game = DoomGame() self.loader = Loader() self.doom_dir = os.path.dirname(os.path.abspath(__file__)) self._mode = 'algo' # 'algo' or 'human' self.no_render = False # To disable double rendering in human mode self.viewer = None self.is_initialized = False # Indicates that reset() has been called self.curr_seed = 0 self.lock = (DoomLock()).get_lock() # self.action_space = spaces.Discrete(43) # used to be in the old code self.action_space = spaces.MultiBinary(NUM_ACTIONS) self.allowed_actions = list(range(NUM_ACTIONS)) self.screen_height = 120 self.screen_width = 160 self.screen_resolution = ScreenResolution.RES_160X120 self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_height, self.screen_width, 3), dtype=np.uint8) self.seed() self._configure() def _configure(self, lock=None, **kwargs): if 'screen_resolution' in kwargs: logger.warn( 'Deprecated - Screen resolution must now be set using a wrapper. See documentation for details.' ) # Multiprocessing lock if lock is not None: self.lock = lock def _load_level(self): # Closing if is_initialized if self.is_initialized: self.is_initialized = False self.game.close() self.game = DoomGame() # Customizing level if getattr(self, '_customize_game', None) is not None and callable( self._customize_game): self.level = -1 self._customize_game() else: # Loading Paths if not self.is_initialized: self.game.set_vizdoom_path(self.loader.get_vizdoom_path()) self.game.set_doom_game_path(self.loader.get_freedoom_path()) # Common settings self.game.load_config( os.path.join(self.doom_dir, 'assets/%s' % DOOM_SETTINGS[self.level][CONFIG])) self.game.set_doom_scenario_path( self.loader.get_scenario_path( DOOM_SETTINGS[self.level][SCENARIO])) if DOOM_SETTINGS[self.level][MAP] != '': if RANDOMIZE_MAPS > 0 and 'labyrinth' in DOOM_SETTINGS[ self.level][CONFIG].lower(): if 'fix' in DOOM_SETTINGS[self.level][SCENARIO].lower(): # mapId = 'map%02d'%np.random.randint(1, 23) mapId = 'map%02d' % np.random.randint(4, 8) else: mapId = 'map%02d' % np.random.randint( 1, RANDOMIZE_MAPS + 1) print( '\t=> Special Config: Randomly Loading Maps. MapID = ' + mapId) self.game.set_doom_map(mapId) else: print('\t=> Default map loaded. MapID = ' + DOOM_SETTINGS[self.level][MAP]) self.game.set_doom_map(DOOM_SETTINGS[self.level][MAP]) self.game.set_doom_skill(DOOM_SETTINGS[self.level][DIFFICULTY]) self.allowed_actions = DOOM_SETTINGS[self.level][ACTIONS] self.game.set_screen_resolution(self.screen_resolution) self.previous_level = self.level self._closed = False # Algo mode if 'human' != self._mode: if NO_MONSTERS: print('\t=> Special Config: Monsters Removed.') self.game.add_game_args('-nomonsters 1') self.game self.game.set_window_visible(False) self.game.set_mode(Mode.PLAYER) self.no_render = False try: with self.lock: self.game.init() except (ViZDoomUnexpectedExitException, ViZDoomErrorException): raise error.Error( 'VizDoom exited unexpectedly. This is likely caused by a missing multiprocessing lock. ' + 'To run VizDoom across multiple processes, you need to pass a lock when you configure the env ' + '[e.g. env.configure(lock=my_multiprocessing_lock)], or create and close an env ' + 'before starting your processes [e.g. env = gym.make("DoomBasic-v0"); env.close()] to cache a ' + 'singleton lock in memory.') self._start_episode() self.is_initialized = True return self.game.get_state().screen_buffer.copy() # Human mode else: if NO_MONSTERS: print('\t=> Special Config: Monsters Removed.') self.game.add_game_args('-nomonsters 1') self.game.add_game_args('+freelook 1') self.game.set_window_visible(True) self.game.set_mode(Mode.SPECTATOR) self.no_render = True with self.lock: self.game.init() self._start_episode() self.is_initialized = True self._play_human_mode() return np.zeros(shape=self.observation_space.shape, dtype=np.uint8) def _start_episode(self): if self.curr_seed > 0: self.game.set_seed(self.curr_seed) self.curr_seed = 0 self.game.new_episode() return def _play_human_mode(self): while not self.game.is_episode_finished(): self.game.advance_action() state = self.game.get_state() total_reward = self.game.get_total_reward() info = self._get_game_variables(state.game_variables) info["TOTAL_REWARD"] = round(total_reward, 4) print('===============================') print('State: #' + str(state.number)) print('Action: \t' + str(self.game.get_last_action()) + '\t (=> only allowed actions)') print('Reward: \t' + str(self.game.get_last_reward())) print('Total Reward: \t' + str(total_reward)) print('Variables: \n' + str(info)) sleep(0.02857) # 35 fps = 0.02857 sleep between frames print('===============================') print('Done') return def old_step(self, action): """ action: a number in range 0..42 We get this from the simontudo and his predecessors, it transforms a numeric action from space Discrete(43) into a indicator action . However, we can only press one button at the same time. """ # Convert to array action_arr = np.zeros(NUM_ACTIONS, dtype=int) action_arr[action] = 1 action = action_arr assert self.is_initialized, "Doom env not reset, call .reset()" # action is a list of numbers but DoomGame.make_action expects a list of ints if len(self.allowed_actions) > 0: list_action = [ int(action[action_idx]) for action_idx in self.allowed_actions ] else: list_action = [int(x) for x in action] try: reward = self.game.make_action(list_action) state = self.game.get_state() if self.game.is_episode_finished(): info = {"TOTAL_REWARD": round(self.game.get_total_reward(), 4)} is_finished = True return np.zeros(shape=self.observation_space.shape, dtype=np.uint8), reward, is_finished, info else: info = self._get_game_variables(state.game_variables) info["TOTAL_REWARD"] = round(self.game.get_total_reward(), 4) is_finished = False return state.screen_buffer.copy(), reward, is_finished, info except vizdoom.vizdoom.ViZDoomIsNotRunningException: return np.zeros(shape=self.observation_space.shape, dtype=np.uint8), 0, True, {} def step(self, action): """ action: iterable of length 43, contains indicators of whether given buttons was pressed. Written by me. """ list_action = [int(x) for x in action] try: reward = self.game.make_action(list_action) state = self.game.get_state() if self.game.is_episode_finished(): info = {"TOTAL_REWARD": round(self.game.get_total_reward(), 4)} is_finished = True return np.zeros(shape=self.observation_space.shape, dtype=np.uint8), reward, is_finished, info else: info = self._get_game_variables(state.game_variables) info["TOTAL_REWARD"] = round(self.game.get_total_reward(), 4) is_finished = False return state.screen_buffer.copy(), reward, is_finished, info except vizdoom.vizdoom.ViZDoomIsNotRunningException: return np.zeros(shape=self.observation_space.shape, dtype=np.uint8), 0, True, {} def reset(self): if self.is_initialized and not self._closed: self._start_episode() screen_buffer = self.game.get_state().screen_buffer if screen_buffer is None: raise error.Error( 'VizDoom incorrectly initiated. This is likely caused by a missing multiprocessing lock. ' + 'To run VizDoom across multiple processes, you need to pass a lock when you configure the env ' + '[e.g. env.configure(lock=my_multiprocessing_lock)], or create and close an env ' + 'before starting your processes [e.g. env = gym.make("DoomBasic-v0"); env.close()] to cache a ' + 'singleton lock in memory.') return screen_buffer.copy() else: return self._load_level() def render(self, mode='human', close=False): if close: if self.viewer is not None: self.viewer.close() self.viewer = None # If we don't None out this reference pyglet becomes unhappy return try: if 'human' == mode and self.no_render: return state = self.game.get_state() img = state.screen_buffer # VizDoom returns None if the episode is finished, let's make it # an empty image so the recorder doesn't stop if img is None: img = np.zeros(shape=self.observation_space.shape, dtype=np.uint8) if mode == 'rgb_array': return img elif mode is 'human': from gym.envs.classic_control import rendering if self.viewer is None: self.viewer = rendering.SimpleImageViewer() self.viewer.imshow(img) except vizdoom.vizdoom.ViZDoomIsNotRunningException: return np.zeros(shape=self.observation_space.shape, dtype=np.uint8) except AttributeError: return np.zeros(shape=self.observation_space.shape, dtype=np.uint8) def close(self): # Lock required for VizDoom to close processes properly with self.lock: self.game.close() def seed(self, seed=None): self.curr_seed = seeding.hash_seed(seed) % 2**32 return [self.curr_seed] def _get_game_variables(self, state_variables): info = {"LEVEL": self.level} if state_variables is None: return info info['KILLCOUNT'] = state_variables[0] info['ITEMCOUNT'] = state_variables[1] info['SECRETCOUNT'] = state_variables[2] info['FRAGCOUNT'] = state_variables[3] info['HEALTH'] = state_variables[4] info['ARMOR'] = state_variables[5] info['DEAD'] = state_variables[6] info['ON_GROUND'] = state_variables[7] info['ATTACK_READY'] = state_variables[8] info['ALTATTACK_READY'] = state_variables[9] info['SELECTED_WEAPON'] = state_variables[10] info['SELECTED_WEAPON_AMMO'] = state_variables[11] info['AMMO1'] = state_variables[12] info['AMMO2'] = state_variables[13] info['AMMO3'] = state_variables[14] info['AMMO4'] = state_variables[15] info['AMMO5'] = state_variables[16] info['AMMO6'] = state_variables[17] info['AMMO7'] = state_variables[18] info['AMMO8'] = state_variables[19] info['AMMO9'] = state_variables[20] info['AMMO0'] = state_variables[21] return info
class DoomEnv(gym.Env, EzPickle): metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 35 } def __init__(self, level='deathmatch', obs_type='ram'): # super(DoomEnv, self).__init__() EzPickle.__init__(self, level.split('.')[0], obs_type) assert obs_type in ('ram', 'image') level = level.split('.')[0] Config.init(level) self.curr_seed = 0 self.game = DoomGame() self.lock = (DoomLock()).get_lock() self.level = level self.obs_type = obs_type self.tick = 4 self._mode = 'algo' self.is_render_in_human_mode = True self.is_game_initialized = False self.is_level_loaded = False self.viewer = None self.set_game(self.level, resolution=None, render=True) print() # todo: add frame skip option by using tick def step(self, action): reward = 0.0 # self.tick = 4 if self._mode == 'algo': if self.tick: reward = self.game.make_action(action, self.tick) else: reward = self.game.make_action(action) # self.game.set_action(action) # self.game.advance_action(4) # reward = self.game.get_last_reward() return self.get_obs(), reward, self.isDone(), self.get_info() def reset(self): if not self.is_game_initialized: self.__load_level() self.__init_game() self.__start_episode() return self.get_obs() def render(self, mode='human', **kwargs): if 'close' in kwargs and kwargs['close']: if self.viewer is not None: self.viewer.close() self.viewer = None return if mode == 'human' and not self.is_render_in_human_mode: return img = self.get_image() if mode == 'rgb_array': return img elif mode is 'human': if self.viewer is None: self.viewer = rendering.SimpleImageViewer() self.viewer.imshow(img) def close(self): with self.lock: self.game.close() def seed(self, seed=None): self.curr_seed = seeding.hash_seed(seed) % 2**32 return [self.curr_seed] # ================================== GETTERS SETTERS =============================================================== def set_game(self, level, resolution, render): self.__configure() self.__load_level(level) self.__set_resolution(resolution) self.__set_obs_and_ac_space() self.__set_player(render) def __configure(self, lock=None, **kwargs): self.seed() if lock is not None: self.lock = lock def __load_level(self, level=None): if level is not None: self.level = level.split('.')[0] self.is_level_loaded = False if self.is_level_loaded: return if self.is_game_initialized: self.is_game_initialized = False self.game.close() self.game = DoomGame() if not self.is_game_initialized: self.game.set_vizdoom_path(Config.VIZDOOM_PATH) self.game.set_doom_game_path(Config.FREEDOOM_PATH) # Common settings self.record_file_path = Config.RECORD_FILE_PATH self.game.load_config(Config.VIZDOOM_SCENARIO_PATH + Config.DOOM_SETTINGS[self.level][Config.CONFIG]) self.game.set_doom_scenario_path( Config.VIZDOOM_SCENARIO_PATH + Config.DOOM_SETTINGS[self.level][Config.SCENARIO]) if Config.DOOM_SETTINGS[self.level][Config.MAP] != '': self.game.set_doom_map( Config.DOOM_SETTINGS[self.level][Config.MAP]) self.game.set_doom_skill( Config.DOOM_SETTINGS[self.level][Config.DIFFICULTY]) self.allowed_actions = Config.DOOM_SETTINGS[self.level][Config.ACTIONS] self.available_game_variables = Config.DOOM_SETTINGS[self.level][ Config.GAME_VARIABLES] self.is_level_loaded = True def __set_resolution(self, resolution=None): if resolution is None: resolution = Config.DEFAULT_SCREEN_RESOLUTION resolution_l = resolution.lower() if resolution_l not in resolutions: raise gym.error.Error( 'Error - The specified resolution "{}" is not supported by Vizdoom.\n The list of valid' 'resolutions: {}'.format(resolution, resolutions)) if '_' in resolution_l: resolution_l = resolution_l.split('_')[1] self.scr_width = int(resolution_l.split("x")[0]) self.scr_height = int(resolution_l.split("x")[1]) self.game.set_screen_resolution( getattr(ScreenResolution, 'RES_{}X{}'.format(self.scr_width, self.scr_height))) self.screen_format = self.game.get_screen_format() self.screen_height = self.game.get_screen_height() self.screen_width = self.game.get_screen_width() def __set_obs_and_ac_space(self): if self.obs_type == 'ram': self.observation_space = spaces.Box( low=0, high=255, dtype=np.uint8, shape=(len(self.available_game_variables), )) elif self.obs_type == 'image': # self.observation_space = self.screen_resized self.observation_space = spaces.Box(low=0, high=255, shape=(self.scr_height, self.scr_width, 3), dtype=np.uint8) else: raise error.Error('Unrecognized observation type: {}'.format( self.obs_type)) if self.screen_format in inverted_screen_formats: self.dummy_screen = np.zeros(shape=(3, self.scr_height, self.scr_width), dtype=np.uint8) else: self.dummy_screen = np.zeros(shape=(self.scr_height, self.scr_width, 3), dtype=np.uint8) self.dummy_ram = [0] * len(self.available_game_variables) self.available_action_codes = [ list(a) for a in it.product([0, 1], repeat=self.game.get_available_buttons_size()) ] # self.__delete_conflict_actions() self.action_space = spaces.MultiDiscrete( [len(self.available_action_codes)]) def __set_player(self, render=True): self.game.set_window_visible(render) self.game.set_mode(Mode.PLAYER) def __init_game(self): try: with self.lock: self.game.init() self.is_game_initialized = True except (ViZDoomUnexpectedExitException, ViZDoomErrorException): raise error.Error('Could not start the game.') def __start_episode(self): if self.curr_seed > 0: self.game.set_seed(self.curr_seed) self.curr_seed = 0 if self.record_file_path: self.game.new_episode(self.record_file_path) else: self.game.new_episode() return def getState(self): return self.game.get_state() def getLastAction(self): return self.game.get_last_action() def getButtonsNames(self, action): return action_to_buttons(self.allowed_actions, action) def get_info(self): info = { "LEVEL": self.level, "TOTAL_REWARD": round(self.game.get_total_reward(), 4) } state_variables = self.get_ram() for i in range(len(self.available_game_variables)): info[self.available_game_variables[i]] = state_variables[i] return info def get_ram(self): if not self.is_game_initialized: raise NotImplementedError( "The game was not initialized. Run env.reset() first!") try: ram = self.getState().game_variables except AttributeError: ram = self.dummy_ram return ram def get_image(self): try: screen = self.getState().screen_buffer.copy() except AttributeError: screen = self.dummy_screen return self.invert_screen(screen) def get_obs(self): if self.obs_type == 'ram': return self.get_ram() elif self.obs_type == 'image': return self.get_image() def isDone(self): return self.game.is_episode_finished() or self.game.is_player_dead( ) or self.getState() is None # =========================================== ============================================================== def invert_screen(self, img): if self.screen_format in inverted_screen_formats: return np.rollaxis(img, 0, 3) else: return img def __delete_conflict_actions(self): if self._mode == 'human': return action_codes_copy = self.available_action_codes.copy() print("Initial actions size: " + str(len(action_codes_copy))) for i in tqdm.trange(len(self.available_action_codes)): action = self.available_action_codes[i] ac_names = action_to_buttons(self.allowed_actions, action) if all(elem in ac_names for elem in ['MOVE_LEFT', 'MOVE_RIGHT']) or all( elem in ac_names for elem in ['MOVE_BACKWARD', 'MOVE_FORWARD']) or all( elem in ac_names for elem in ['TURN_RIGHT', 'TURN_LEFT']) or all( elem in ac_names for elem in ['SELECT_NEXT_WEAPON', 'SELECT_PREV_WEAPON']): action_codes_copy.remove(action) print("Final actions size: " + str(len(action_codes_copy))) self.available_action_codes = action_codes_copy def __initHumanPlayer(self): self._mode = 'human' self.__load_level() self.game.add_game_args('+freelook 1') self.game.set_window_visible(True) self.game.set_mode(Mode.SPECTATOR) self.is_render_in_human_mode = False self.__init_game() def advanceAction(self, tick=0): try: if tick: self.game.advance_action(tick) else: self.game.advance_action() return True except ViZDoomUnexpectedExitException: return False def playHuman(self): self.__initHumanPlayer() while not self.game.is_episode_finished( ) and not self.game.is_player_dead(): self.advanceAction() state = self.getState() if state is None: if self.record_file_path is None: self.game.new_episode() else: self.game.new_episode(self.record_file_path) state = self.getState() total_reward = self.game.get_total_reward() info = self.get_info() info["TOTAL_REWARD"] = round(total_reward, 4) print('===============================') print('State: #' + str(state.number)) print('Action: \t' + str(self.game.get_last_action()) + '\t (=> only allowed actions)') print('Reward: \t' + str(self.game.get_last_reward())) print('Total Reward: \t' + str(total_reward)) print('Variables: \n' + str(info)) sleep(0.02857) # 35 fps = 0.02857 sleep between frames print('===============================') print('Done') return
class DoomEnvironment: def __init__(self, config, visible, skiprate): self._game = DoomGame() self._game.load_config(config) self._game.set_window_visible(visible) self._game.set_mode(Mode.PLAYER) self._game.init() n_actions = self._game.get_available_buttons_size() self._actions = [list(a) for a in it.product([0, 1], repeat=n_actions)] self._skiprate = skiprate def make_visible(self): self._game.close() self._game.set_window_visible(True) self._game.set_mode(Mode.ASYNC_PLAYER) self._game.init() def get_n_buttons(self): return self._game.get_available_buttons_size() def observe(self): observation = self._game.get_state() screen = observation.screen_buffer game_variables = observation.game_variables return screen, game_variables def step(self, action_id): """Takes id of single action and performs it for self.skiprate frames :param action_id: index of action to perform :return: reward, is_done """ reward = self._game.make_action(self._actions[action_id], self._skiprate) return reward, self._game.is_episode_finished() def advance_action_step(self, action_id): """Takes id of single action and performs it for self.skiprate frames and renders every frame :param action_id: index of action to perform :return: is_done """ reward = 0.0 for _ in range(self._skiprate): reward += self._game.make_action(self._actions[action_id]) # it is vital to break if done for correct reward shaping if self._game.is_episode_finished(): break return reward, self._game.is_episode_finished() def reset(self): self._game.new_episode() def get_episode_reward(self): """Careful! Returns ___non-shaped___ episode reward""" return self._game.get_total_reward()
s = game.get_state() img = s.image_buffer misc = s.game_variables # Makes a random action and save the reward. r = game.make_action(choice(actions)) # Makes a "prolonged" action and skip frames: # skiprate = 3 # r = game.make_action(choice(actions), skiprate) # The same could be achieved with: # game.set_action(choice(actions)) # skiprate = 3 # game.advance_action(skiprate) # r = game.get_last_reward() print("State #" + str(s.number)) print("Game Variables:", misc) print("Performed action:", game.get_last_action()) print("Last Reward:", r) print("=====================") # Sleep some time because processing is too fast to watch. if sleep_time > 0: sleep(sleep_time) print("Episode finished!") print("total reward:", game.get_total_reward()) print("************************")
class DoomScenario: """ DoomScenario class runs instances of Vizdoom according to scenario configuration (.cfg) files. Scenario Configuration files for this project are located in the /src/configs/ folder. """ def __init__(self, config_filename): ''' Method initiates Vizdoom with desired configuration file. ''' self.config_filename = config_filename self.game = DoomGame() self.game.load_config("configs/" + config_filename) self.game.set_window_visible(False) self.game.init() self.res = (self.game.get_screen_height(), self.game.get_screen_width()) self.actions = [ list(a) for a in it.product([0, 1], repeat=self.game.get_available_buttons_size()) ] self.pbar = None self.game.new_episode() def play(self, action, tics): ''' Method advances state with desired action for a number of tics. ''' self.game.set_action(action) self.game.advance_action(tics, True) if self.pbar: self.pbar.update(int(tics)) def get_processed_state(self, depth_radius, depth_contrast): ''' Method processes the Vizdoom RGB and depth buffer into a composite one channel image that can be used by the Models. depth_radius defines how far the depth buffer sees with 1.0 being as far as ViZDoom allows. depth_contrast defines how much of the depth buffer is in the final processed image as compared to the greyscaled RGB buffer. **processed = (1-depth_contrast)* grey_buffer + depth_contrast*depth_buffer ''' state = self.game.get_state() if not self.game.is_episode_finished(): img = state.screen_buffer # screen pixels # print(img) screen_buffer = np.array(img).astype('float32') / 255 # print(screen_buffer.shape) # (3, 120, 160) try: # Grey Scaling grey_buffer = np.dot(np.transpose(screen_buffer, (1, 2, 0)), [0.21, 0.72, 0.07]) # print(grey_buffer.shape) # (120, 160) # Depth Radius depth_buffer = np.array(state.depth_buffer).astype('float32') / 255 depth_buffer[(depth_buffer > depth_radius)] = depth_radius #Effects depth radius depth_buffer_filtered = (depth_buffer - np.amin(depth_buffer)) / ( np.amax(depth_buffer) - np.amin(depth_buffer)) # Depth Contrast processed_buffer = ( (1 - depth_contrast) * grey_buffer) + (depth_contrast * (1 - depth_buffer)) processed_buffer = (processed_buffer - np.amin(processed_buffer) ) / (np.amax(processed_buffer) - np.amin(processed_buffer)) processed_buffer = np.round(processed_buffer, 6) processed_buffer = processed_buffer.reshape(self.res[-2:]) except: processed_buffer = np.zeros(self.res[-2:]) return processed_buffer # balance the depth & RGB data def run(self, agent, save_replay='', verbose=False, return_data=False): ''' Method runs a instance of DoomScenario. ''' if return_data: data_S = [] data_a = [] if verbose: print("\nRunning Simulation:", self.config_filename) self.pbar = tqdm(total=self.game.get_episode_timeout()) # Initiate New Instance self.game.close() self.game.set_window_visible(False) self.game.add_game_args("+vid_forcesurface 1 ") self.game.init() if save_replay != '': self.game.new_episode("../data/replay_data/" + save_replay) else: self.game.new_episode() # Run Simulation while not self.game.is_episode_finished(): S = agent.get_state_data(self) q = agent.model.online_network.predict(S) if np.random.random() < 0.1: q = np.random.choice(len(q[0]), 1, p=softmax(q[0], 1))[0] else: q = int(np.argmax(q[0])) a = agent.model.predict(self, q) if return_data: delta = np.zeros((len(self.actions))) a_ = np.cast['int'](a) delta[a_] = 1 data_S.append(S.reshape(S.shape[1], S.shape[2], S.shape[3])) data_a.append(delta) if not self.game.is_episode_finished(): self.play(a, agent.frame_skips + 1) if agent.model.__class__.__name__ == 'HDQNModel' and not self.game.is_episode_finished( ): if q >= len(agent.model.actions): for i in range(agent.model.skill_frame_skip): if not self.game.is_episode_finished(): a = agent.model.predict(self, q) self.play(a, agent.frame_skips + 1) else: break # Reset Agent and Return Score agent.frames = None if agent.model.__class__.__name__ == 'HDQNModel': agent.model.sub_model_frames = None score = self.game.get_total_reward() if verbose: self.pbar.close() print("Total Score:", score) if return_data: data_S = np.array(data_S) data_a = np.array(data_a) return [data_S, data_a] return score def replay(self, filename, verbose=False, doom_like=False): ''' Method runs a replay of the simulations at 800 x 600 resolution. ''' print("\nRunning Replay:", filename) # Initiate Replay self.game.close() self.game.set_screen_resolution(ScreenResolution.RES_800X600) self.game.set_window_visible(True) self.game.add_game_args("+vid_forcesurface 1") if doom_like: self.game.set_render_hud(True) self.game.set_render_minimal_hud(False) self.game.set_render_crosshair(False) self.game.set_render_weapon(True) self.game.set_render_particles(True) self.game.init() self.game.replay_episode("../data/replay_data/" + filename) # Run Replay while not self.game.is_episode_finished(): if verbose: print("Reward:", self.game.get_last_reward()) self.game.advance_action() # Print Score score = self.game.get_total_reward() print("Total Score:", score) self.game.close() def apprentice_run(self, test=False): ''' Method runs an apprentice data gathering. ''' # Initiate New Instance self.game.close() self.game.set_mode(Mode.SPECTATOR) self.game.set_screen_resolution(ScreenResolution.RES_800X600) self.game.set_window_visible(True) self.game.set_ticrate(30) self.game.init() self.game.new_episode() # Run Simulation while not self.game.is_episode_finished(): self.game.advance_action() self.game.close()
# Every episode, agent learns from sample returns loss = agent.train_model() # Save model every 10000 iterations if t % 2000 == 0: print("Save model") agent.model.save_weights(model_path, overwrite=True) state = "" if t <= agent.observe: state = "Observe mode" else: state = "Train mode" if (is_terminated): total_reward = game.get_total_reward() # Print performance statistics at every episode end print("Episode", i, "/ ACTION", action_idx, "/ total reward", total_reward, "/ LOSS", loss) with open(REWARDS_FILE, 'a') as fp: fp.write('{},{}\n'.format(i, total_reward)) # Save Agent's Performance Statistics if GAME % agent.stats_window_size == 0 and t > agent.observe: print("Update Rolling Statistics") agent.mavg_score.append(np.mean(np.array(life_buffer))) agent.var_score.append(np.var(np.array(life_buffer))) # Reset rolling stats buffer life_buffer = []
def play(self): # Create DoomGame instance. It will run the game and communicate with you. print("Initializing doom...") game = DoomGame() game.load_config("./examples/config/deepdoomplayer.cfg") game.init() print("Doom initialized.") episodes = 1 training_steps_per_epoch = 100 sleep_time = 0.100 train_episodes_finished = 0 train_rewards = [] for epoch in range(episodes): train_loss = [] game.new_episode() while (train_episodes_finished < 20): sleep(sleep_time) if game.is_episode_finished(): r = game.get_total_reward() train_rewards.append(r) game.new_episode() train_episodes_finished += 1 self._last_state = None self.last_action[1] = 1 # first frame must be handled differently if self.last_state is None: # the _last_state will contain the image data from the last self.state_frames frames self.last_state = np.stack(tuple( self.convert_image(game.get_state().image_buffer) for _ in range(self.state_frames)), axis=2) continue reward = game.make_action( DeepDoomPlayer.define_keys_to_action_pressed( self.last_action), 7) reward *= 0.01 imagebuffer = game.get_state().image_buffer if imagebuffer is None: terminal = True screen_resized_binary = np.zeros((40, 40)) imagebufferlast = imagebuffer if imagebuffer is not None: terminal = False screen_resized_binary = self.convert_image(imagebuffer) # add dimension screen_resized_binary = np.expand_dims(screen_resized_binary, axis=2) current_state = np.append(self.last_state[:, :, 1:], screen_resized_binary, axis=2) self.last_state = current_state self.last_action = self.choose_next_action_only_on_q() print(train_episodes_finished, "training episodes played.") print("Training results:") train_rewards = np.array(train_rewards) print("mean:", train_rewards.mean(), "std:", train_rewards.std(), "max:", train_rewards.max(), "min:", train_rewards.min()) # It will be done automatically anyway but sometimes you need to do it in the middle of the program... game.close() self._last_state = None
def play(self): # Create DoomGame instance. It will run the game and communicate with you. print ("Initializing doom...") game = DoomGame() game.load_config("./examples/config/deepdoomplayer.cfg") game.init() print ("Doom initialized.") episodes = 1 training_steps_per_epoch = 100 sleep_time = 0.100 train_episodes_finished = 0 train_rewards = [] for epoch in range(episodes): train_loss = [] game.new_episode() while(train_episodes_finished < 20 ): sleep(sleep_time) if game.is_episode_finished(): r = game.get_total_reward() train_rewards.append(r) game.new_episode() train_episodes_finished += 1 self._last_state = None self.last_action[1] = 1 # first frame must be handled differently if self.last_state is None: # the _last_state will contain the image data from the last self.state_frames frames self.last_state = np.stack(tuple(self.convert_image(game.get_state().image_buffer) for _ in range(self.state_frames)), axis=2) continue reward = game.make_action(DeepDoomPlayer.define_keys_to_action_pressed(self.last_action), 7) reward *= 0.01 imagebuffer = game.get_state().image_buffer if imagebuffer is None: terminal = True screen_resized_binary = np.zeros((40,40)) imagebufferlast = imagebuffer if imagebuffer is not None: terminal = False screen_resized_binary = self.convert_image(imagebuffer) # add dimension screen_resized_binary = np.expand_dims(screen_resized_binary, axis=2) current_state = np.append(self.last_state[:, :, 1:], screen_resized_binary, axis=2) self.last_state = current_state self.last_action = self.choose_next_action_only_on_q() print (train_episodes_finished, "training episodes played.") print ("Training results:") train_rewards = np.array(train_rewards) print ("mean:", train_rewards.mean(), "std:", train_rewards.std(), "max:", train_rewards.max(), "min:", train_rewards.min()) # It will be done automatically anyway but sometimes you need to do it in the middle of the program... game.close() self._last_state = None
def start(self): """ this will get passed hier """ # Create DoomGame instance. It will run the game and communicate with you. print ("Initializing doom...") game = DoomGame() game.load_config("./examples/config/learningtensorflow.cfg") game.init() print ("Doom initialized.") train_rewards = [] for epoch in range(DeepDoom.episodes): print ("\nEpoch", epoch) train_time = 0 train_episodes_finished = 0 train_loss = [] #start saving after 20 epoch if epoch > 20: if not os.path.exists(DeepDoom.checkpoint_path): os.mkdir(DeepDoom.checkpoint_path) self.saver.save(self.session, DeepDoom.checkpoint_path, global_step=epoch ) train_start = time() game.new_episode() for learning_step in tqdm(range(DeepDoom.training_steps_per_epoch)): if game.is_episode_finished(): #print("game is finished") r = game.get_total_reward() train_rewards.append(r) game.new_episode() train_episodes_finished += 1 self.last_state = None #sleep(sleep_time) # first frame must be handled differently if self.last_state is None: #print ("ich bin hier") # the last_state will contain the image data from the last self.state_frames frames self.last_state = np.stack(tuple(self.convert_image(game.get_state().image_buffer) for _ in range(self.state_frames)), axis=2) continue reward = game.make_action(DeepDoom.define_keys_to_action_pressed(self.last_action), 7) reward *= 0.01 #if screen_array is not None: imagebuffer = game.get_state().image_buffer if imagebuffer is None: terminal = True #print(reward) screen_resized_binary = np.zeros((40,40)) imagebufferlast = imagebuffer if imagebuffer is not None: terminal = False screen_resized_binary = self.convert_image(imagebuffer) # add dimension screen_resized_binary = np.expand_dims(screen_resized_binary, axis=2) current_state = np.append(self.last_state[:, :, 1:], screen_resized_binary, axis=2) self.observations.append((self.last_state, self.last_action, reward, current_state, terminal)) #zeugs.write("oberservations %s \n" %len(self.observations)) if len(self.observations) > self.memory_size: self.observations.popleft() #sleep(sleep_time) # only train if done observing if len(self.observations) > self.observation_steps: #print("train") self.train() self.time += 1 self.last_state = current_state self.last_action = self.choose_next_action() if self.probability_of_random_action > self.final_random_action_prob \ and len(self.observations) > self.observation_steps: self.probability_of_random_action -= \ (self.initial_random_action_prob - self.final_random_action_prob) / self.explore_steps print (train_episodes_finished, "training episodes played.") print ("Training results:") train_rewards = np.array(train_rewards) train_end = time() train_time = train_end - train_start mean_loss = np.mean(train_loss) print ("mean:", train_rewards.mean(), "std:", train_rewards.std(), "max:", train_rewards.max(), "min:", train_rewards.min(), "epsilon:", self.probability_of_random_action) print ("t:", str(round(train_time, 2)) + "s") train_rewards = [] # It will be done automatically anyway but sometimes you need to do it in the middle of the program... game.close() self.last_state = None
class VizdoomEnv(gym.Env): def __init__(self, level): # init game self.game = DoomGame() self.game.set_screen_resolution(ScreenResolution.RES_640X480) scenarios_dir = os.path.join(os.path.dirname(__file__), 'scenarios') self.game.load_config(os.path.join(scenarios_dir, CONFIGS[level][0])) self.game.set_window_visible(False) self.game.init() self.state = None self.action_space = spaces.Discrete(CONFIGS[level][1]) self.observation_space = spaces.Box( 0, 255, (self.game.get_screen_height(), self.game.get_screen_width(), self.game.get_screen_channels()), dtype=np.uint8) self.viewer = None def step(self, action): # convert action to vizdoom action space (one hot) act = np.zeros(self.action_space.n) act[action] = 1 act = np.uint8(act) act = act.tolist() reward = self.game.make_action(act) state = self.game.get_state() done = self.game.is_episode_finished() info = {} if not done: observation = np.transpose(state.screen_buffer, (1, 2, 0)) else: observation = np.uint8(np.zeros(self.observation_space.shape)) info = {"episode": {"r": self.game.get_total_reward()}} return observation, reward, done, info def seed(self, seed): self.game.set_seed(seed) def close(self): self.game.close() def reset(self): self.game.new_episode() self.state = self.game.get_state() img = self.state.screen_buffer return np.transpose(img, (1, 2, 0)) def render(self, mode='human'): try: img = self.game.get_state().screen_buffer img = np.transpose(img, [1, 2, 0]) if self.viewer is None: self.viewer = rendering.SimpleImageViewer() self.viewer.imshow(img) except AttributeError: pass @staticmethod def get_keys_to_action(): # you can press only one key at a time! keys = { (): 2, (ord('a'), ): 0, (ord('d'), ): 1, (ord('w'), ): 3, (ord('s'), ): 4, (ord('q'), ): 5, (ord('e'), ): 6 } return keys