def __init__(self, vizdoom_dir=os.path.expanduser('~/ViZDoom'), window_visible=True, scenario='basic', skipcount=10, resolution_width=640, sleep=0.0, seed=None): self.skipcount = skipcount self.sleep = sleep sys.path.append(os.path.join(vizdoom_dir, "examples/python")) from vizdoom import DoomGame from vizdoom import ScreenFormat from vizdoom import ScreenResolution game = DoomGame() if seed is not None: assert seed >= 0 and seed < 2 ** 16, \ "ViZDoom's random seed must be represented by unsigned int" else: # Use numpy's random state seed = np.random.randint(0, 2 ** 16) game.set_seed(seed) # Load a config file game.load_config(os.path.join( vizdoom_dir, "examples", 'config', scenario + '.cfg')) # Replace default relative paths with actual paths game.set_vizdoom_path(os.path.join(vizdoom_dir, "bin/vizdoom")) game.set_doom_game_path( os.path.join(vizdoom_dir, 'scenarios/freedoom2.wad')) game.set_doom_scenario_path( os.path.join(vizdoom_dir, 'scenarios', scenario + '.wad')) # Set screen settings resolutions = {640: ScreenResolution.RES_640X480, 320: ScreenResolution.RES_320X240, 160: ScreenResolution.RES_160X120} game.set_screen_resolution(resolutions[resolution_width]) game.set_screen_format(ScreenFormat.RGB24) game.set_window_visible(window_visible) game.set_sound_enabled(window_visible) game.init() self.game = game # Use one-hot actions self.n_actions = game.get_available_buttons_size() self.actions = [] for i in range(self.n_actions): self.actions.append([i == j for j in range(self.n_actions)])
class DoomEnv(gym.Env, EzPickle): metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 35 } def __init__(self, level='deathmatch', obs_type='ram'): # super(DoomEnv, self).__init__() EzPickle.__init__(self, level.split('.')[0], obs_type) assert obs_type in ('ram', 'image') level = level.split('.')[0] Config.init(level) self.curr_seed = 0 self.game = DoomGame() self.lock = (DoomLock()).get_lock() self.level = level self.obs_type = obs_type self.tick = 4 self._mode = 'algo' self.is_render_in_human_mode = True self.is_game_initialized = False self.is_level_loaded = False self.viewer = None self.set_game(self.level, resolution=None, render=True) print() # todo: add frame skip option by using tick def step(self, action): reward = 0.0 # self.tick = 4 if self._mode == 'algo': if self.tick: reward = self.game.make_action(action, self.tick) else: reward = self.game.make_action(action) # self.game.set_action(action) # self.game.advance_action(4) # reward = self.game.get_last_reward() return self.get_obs(), reward, self.isDone(), self.get_info() def reset(self): if not self.is_game_initialized: self.__load_level() self.__init_game() self.__start_episode() return self.get_obs() def render(self, mode='human', **kwargs): if 'close' in kwargs and kwargs['close']: if self.viewer is not None: self.viewer.close() self.viewer = None return if mode == 'human' and not self.is_render_in_human_mode: return img = self.get_image() if mode == 'rgb_array': return img elif mode is 'human': if self.viewer is None: self.viewer = rendering.SimpleImageViewer() self.viewer.imshow(img) def close(self): with self.lock: self.game.close() def seed(self, seed=None): self.curr_seed = seeding.hash_seed(seed) % 2**32 return [self.curr_seed] # ================================== GETTERS SETTERS =============================================================== def set_game(self, level, resolution, render): self.__configure() self.__load_level(level) self.__set_resolution(resolution) self.__set_obs_and_ac_space() self.__set_player(render) def __configure(self, lock=None, **kwargs): self.seed() if lock is not None: self.lock = lock def __load_level(self, level=None): if level is not None: self.level = level.split('.')[0] self.is_level_loaded = False if self.is_level_loaded: return if self.is_game_initialized: self.is_game_initialized = False self.game.close() self.game = DoomGame() if not self.is_game_initialized: self.game.set_vizdoom_path(Config.VIZDOOM_PATH) self.game.set_doom_game_path(Config.FREEDOOM_PATH) # Common settings self.record_file_path = Config.RECORD_FILE_PATH self.game.load_config(Config.VIZDOOM_SCENARIO_PATH + Config.DOOM_SETTINGS[self.level][Config.CONFIG]) self.game.set_doom_scenario_path( Config.VIZDOOM_SCENARIO_PATH + Config.DOOM_SETTINGS[self.level][Config.SCENARIO]) if Config.DOOM_SETTINGS[self.level][Config.MAP] != '': self.game.set_doom_map( Config.DOOM_SETTINGS[self.level][Config.MAP]) self.game.set_doom_skill( Config.DOOM_SETTINGS[self.level][Config.DIFFICULTY]) self.allowed_actions = Config.DOOM_SETTINGS[self.level][Config.ACTIONS] self.available_game_variables = Config.DOOM_SETTINGS[self.level][ Config.GAME_VARIABLES] self.is_level_loaded = True def __set_resolution(self, resolution=None): if resolution is None: resolution = Config.DEFAULT_SCREEN_RESOLUTION resolution_l = resolution.lower() if resolution_l not in resolutions: raise gym.error.Error( 'Error - The specified resolution "{}" is not supported by Vizdoom.\n The list of valid' 'resolutions: {}'.format(resolution, resolutions)) if '_' in resolution_l: resolution_l = resolution_l.split('_')[1] self.scr_width = int(resolution_l.split("x")[0]) self.scr_height = int(resolution_l.split("x")[1]) self.game.set_screen_resolution( getattr(ScreenResolution, 'RES_{}X{}'.format(self.scr_width, self.scr_height))) self.screen_format = self.game.get_screen_format() self.screen_height = self.game.get_screen_height() self.screen_width = self.game.get_screen_width() def __set_obs_and_ac_space(self): if self.obs_type == 'ram': self.observation_space = spaces.Box( low=0, high=255, dtype=np.uint8, shape=(len(self.available_game_variables), )) elif self.obs_type == 'image': # self.observation_space = self.screen_resized self.observation_space = spaces.Box(low=0, high=255, shape=(self.scr_height, self.scr_width, 3), dtype=np.uint8) else: raise error.Error('Unrecognized observation type: {}'.format( self.obs_type)) if self.screen_format in inverted_screen_formats: self.dummy_screen = np.zeros(shape=(3, self.scr_height, self.scr_width), dtype=np.uint8) else: self.dummy_screen = np.zeros(shape=(self.scr_height, self.scr_width, 3), dtype=np.uint8) self.dummy_ram = [0] * len(self.available_game_variables) self.available_action_codes = [ list(a) for a in it.product([0, 1], repeat=self.game.get_available_buttons_size()) ] # self.__delete_conflict_actions() self.action_space = spaces.MultiDiscrete( [len(self.available_action_codes)]) def __set_player(self, render=True): self.game.set_window_visible(render) self.game.set_mode(Mode.PLAYER) def __init_game(self): try: with self.lock: self.game.init() self.is_game_initialized = True except (ViZDoomUnexpectedExitException, ViZDoomErrorException): raise error.Error('Could not start the game.') def __start_episode(self): if self.curr_seed > 0: self.game.set_seed(self.curr_seed) self.curr_seed = 0 if self.record_file_path: self.game.new_episode(self.record_file_path) else: self.game.new_episode() return def getState(self): return self.game.get_state() def getLastAction(self): return self.game.get_last_action() def getButtonsNames(self, action): return action_to_buttons(self.allowed_actions, action) def get_info(self): info = { "LEVEL": self.level, "TOTAL_REWARD": round(self.game.get_total_reward(), 4) } state_variables = self.get_ram() for i in range(len(self.available_game_variables)): info[self.available_game_variables[i]] = state_variables[i] return info def get_ram(self): if not self.is_game_initialized: raise NotImplementedError( "The game was not initialized. Run env.reset() first!") try: ram = self.getState().game_variables except AttributeError: ram = self.dummy_ram return ram def get_image(self): try: screen = self.getState().screen_buffer.copy() except AttributeError: screen = self.dummy_screen return self.invert_screen(screen) def get_obs(self): if self.obs_type == 'ram': return self.get_ram() elif self.obs_type == 'image': return self.get_image() def isDone(self): return self.game.is_episode_finished() or self.game.is_player_dead( ) or self.getState() is None # =========================================== ============================================================== def invert_screen(self, img): if self.screen_format in inverted_screen_formats: return np.rollaxis(img, 0, 3) else: return img def __delete_conflict_actions(self): if self._mode == 'human': return action_codes_copy = self.available_action_codes.copy() print("Initial actions size: " + str(len(action_codes_copy))) for i in tqdm.trange(len(self.available_action_codes)): action = self.available_action_codes[i] ac_names = action_to_buttons(self.allowed_actions, action) if all(elem in ac_names for elem in ['MOVE_LEFT', 'MOVE_RIGHT']) or all( elem in ac_names for elem in ['MOVE_BACKWARD', 'MOVE_FORWARD']) or all( elem in ac_names for elem in ['TURN_RIGHT', 'TURN_LEFT']) or all( elem in ac_names for elem in ['SELECT_NEXT_WEAPON', 'SELECT_PREV_WEAPON']): action_codes_copy.remove(action) print("Final actions size: " + str(len(action_codes_copy))) self.available_action_codes = action_codes_copy def __initHumanPlayer(self): self._mode = 'human' self.__load_level() self.game.add_game_args('+freelook 1') self.game.set_window_visible(True) self.game.set_mode(Mode.SPECTATOR) self.is_render_in_human_mode = False self.__init_game() def advanceAction(self, tick=0): try: if tick: self.game.advance_action(tick) else: self.game.advance_action() return True except ViZDoomUnexpectedExitException: return False def playHuman(self): self.__initHumanPlayer() while not self.game.is_episode_finished( ) and not self.game.is_player_dead(): self.advanceAction() state = self.getState() if state is None: if self.record_file_path is None: self.game.new_episode() else: self.game.new_episode(self.record_file_path) state = self.getState() total_reward = self.game.get_total_reward() info = self.get_info() info["TOTAL_REWARD"] = round(total_reward, 4) print('===============================') print('State: #' + str(state.number)) print('Action: \t' + str(self.game.get_last_action()) + '\t (=> only allowed actions)') print('Reward: \t' + str(self.game.get_last_reward())) print('Total Reward: \t' + str(total_reward)) print('Variables: \n' + str(info)) sleep(0.02857) # 35 fps = 0.02857 sleep between frames print('===============================') print('Done') return
game.load_config("../../examples/config/basic.cfg") # game.load_config("../../examples/config/deadly_corridor.cfg") # game.load_config("../../examples/config/deathmatch.cfg") # game.load_config("../../examples/config/defend_the_center.cfg") # game.load_config("../../examples/config/defend_the_line.cfg") # game.load_config("../../examples/config/health_gathering.cfg") # game.load_config("../../examples/config/my_way_home.cfg") # game.load_config("../../examples/config/predict_position.cfg") # game.load_config("../../examples/config/take_cover.cfg") # Makes the screen bigger to see more details. game.set_screen_resolution(ScreenResolution.RES_640X480) game.init() # Creates all possible actions depending on how many buttons there are. actions_num = game.get_available_buttons_size() actions = [] for perm in it.product([False, True], repeat=actions_num): actions.append(list(perm)) episodes = 10 sleep_time = 0.028 for i in range(episodes): print("Episode #" + str(i + 1)) # Not needed for the first episdoe but the loop is nicer. game.new_episode() while not game.is_episode_finished(): # Gets the state and possibly to something with it
sess = tf.Session(config=config) K.set_session(sess) game = DoomGame() game.load_config("scenarios/deathmatch.cfg") game.set_sound_enabled(False) game.set_screen_resolution(ScreenResolution.RES_640X480) game.set_window_visible(False) game.init() game.new_episode() game_state = game.get_state() misc = game_state.game_variables # [KILLCOUNT, AMMO, HEALTH] prev_misc = misc action_size = game.get_available_buttons_size() img_rows, img_cols = 64, 64 img_channels = 3 # Color channel trace_length = 4 # Temporal Dimension state_size = (trace_length, img_rows, img_cols, img_channels) agent = DoubleDQNAgent(state_size, action_size, trace_length) agent.model = Networks.drqn(state_size, action_size, agent.learning_rate) agent.target_model = Networks.drqn(state_size, action_size, agent.learning_rate) s_t = game_state.screen_buffer # 480 x 640 s_t = preprocessImg(s_t, size=(img_rows, img_cols))
class ViZDoom(Environment): """ [ViZDoom](https://github.com/mwydmuch/ViZDoom) environment adapter (specification key: `vizdoom`). Args: level (string): ViZDoom configuration file (<span style="color:#C00000"><b>required</b></span>). include_variables (bool): Whether to include game variables to state (<span style="color:#00C000"><b>default</b></span>: false). factored_action (bool): Whether to use factored action representation (<span style="color:#00C000"><b>default</b></span>: false). visualize (bool): Whether to visualize interaction (<span style="color:#00C000"><b>default</b></span>: false). frame_skip (int > 0): Number of times to repeat an action without observing (<span style="color:#00C000"><b>default</b></span>: 12). seed (int): Random seed (<span style="color:#00C000"><b>default</b></span>: none). """ def __init__(self, level, visualize=False, include_variables=False, factored_action=False, frame_skip=12, seed=None): from vizdoom import DoomGame, Mode, ScreenFormat, ScreenResolution self.config_file = level self.include_variables = include_variables self.factored_action = factored_action self.visualize = visualize self.frame_skip = frame_skip self.environment = DoomGame() self.environment.load_config(self.config_file) if self.visualize: self.environment.set_window_visible(True) self.environment.set_mode(Mode.ASYNC_PLAYER) else: self.environment.set_window_visible(False) self.environment.set_mode(Mode.PLAYER) # e.g. CRCGCB, RGB24, GRAY8 self.environment.set_screen_format(ScreenFormat.RGB24) # e.g. RES_320X240, RES_640X480, RES_1920X1080 self.environment.set_screen_resolution(ScreenResolution.RES_640X480) self.environment.set_depth_buffer_enabled(False) self.environment.set_labels_buffer_enabled(False) self.environment.set_automap_buffer_enabled(False) if seed is not None: self.environment.setSeed(seed) self.environment.init() self.state_shape = (640, 480, 3) self.num_variables = self.environment.get_available_game_variables_size( ) self.num_buttons = self.environment.get_available_buttons_size() self.actions = [ tuple(a) for a in itertools.product([0, 1], repeat=self.num_buttons) ] def __str__(self): return super().__str__() + '({})'.format(self.config_file) def states(self): if self.include_variables: return OrderedDict(screen=dict(type='float', shape=self.state_shape), variables=dict(type='float', shape=self.num_variables)) else: return dict(type='float', shape=self.state_shape) def actions(self): if self.factored_action: return dict(type='bool', shape=self.num_buttons) else: return dict(type='int', shape=(), num_values=len(self.actions)) def close(self): self.environment.close() self.environment = None def get_states(self): state = self.environment.get_state() screen = state.screen_buffer.astype(dtype=np.float32) / 255.0 if self.include_variables: return OrderedDict(screen=screen, variables=state.game_variables) else: return screen def reset(self): self.environment.new_episode() return self.get_states() def execute(self, actions): if self.factored_action: action = np.where(actions, 1.0, 0.0) else: action = self.actions[actions] if self.visualize: self.environment.set_action(action) reward = 0.0 for _ in range(self.frame_skip): self.environment.advance_action() reward += self.environment.get_last_reward() else: reward = self.environment.make_action(action, self.frame_skip) terminal = self.environment.is_episode_finished() states = self.get_states() return states, terminal, reward
game.init() game.new_episode() game_state = game.get_state() if not game.is_episode_finished(): labels = game_state.labels_buffer # if labels is not None: # plt.imshow(labels) # plt.show() misc = game_state.game_variables # [Health] prev_misc = misc action_size = game.get_available_buttons_size( ) # [Turn Left, Turn Right, Move Forward] measurement_size = n_measures # [Health, Medkit, Poison] timesteps = [1, 2, 4, 8, 16, 32] goal_size = measurement_size * len(timesteps) img_rows, img_cols = 84, 84 # Convert image into Black and white img_channels = 1 if depth_perception: img_channels += 1 # We stack 1 frame (then we will put 2 other channels: depth map and segmented image) if mask_perception: img_channels += 1 state_size = (img_rows, img_cols, img_channels) agent = DFPAgent(state_size, measurement_size, action_size, timesteps)
class VizDoomEnv(Env): ''' Wrapper for vizdoom to use as an OpenAI gym environment. ''' metadata = {'render.modes': ['human', 'rgb_array']} def __init__(self, cfg_name, repeat=1): super().__init__() self.game = DoomGame() self.game.load_config(f'./slm_lab/env/vizdoom/cfgs/{cfg_name}.cfg') self._viewer = None self.repeat = 1 # TODO In future, need to update action to handle (continuous) DELTA buttons using gym's Box space self.action_space = spaces.MultiDiscrete([2] * self.game.get_available_buttons_size()) self.action_space.dtype = 'uint8' output_shape = (self.game.get_screen_channels(), self.game.get_screen_height(), self.game.get_screen_width()) self.observation_space = spaces.Box(low=0, high=255, shape=output_shape, dtype='uint8') self.game.init() def close(self): self.game.close() if self._viewer is not None: self._viewer.close() self._viewer = None def seed(self, seed=None): self.game.set_seed(seed) def step(self, action): reward = self.game.make_action(list(action), self.repeat) state = self.game.get_state() done = self.game.is_episode_finished() # info = self._get_game_variables(state.game_variables) info = {} if state is not None: observation = state.screen_buffer else: observation = np.zeros(shape=self.observation_space.shape, dtype=np.uint8) return observation, reward, done, info def reset(self): self.game.new_episode() return self.game.get_state().screen_buffer def render(self, mode='human', close=False): if close: if self._viewer is not None: self._viewer.close() self._viewer = None return img = None state = self.game.get_state() if state is not None: img = state.screen_buffer if img is None: # at the end of the episode img = np.zeros(shape=self.observation_space.shape, dtype=np.uint8) if mode == 'rgb_array': return img elif mode is 'human': if self._viewer is None: self._viewer = rendering.SimpleImageViewer() self._viewer.imshow(util.to_opencv_image(img)) def _get_game_variables(self, state_variables): info = {} if state_variables is not None: info['KILLCOUNT'] = state_variables[0] info['ITEMCOUNT'] = state_variables[1] info['SECRETCOUNT'] = state_variables[2] info['FRAGCOUNT'] = state_variables[3] info['HEALTH'] = state_variables[4] info['ARMOR'] = state_variables[5] info['DEAD'] = state_variables[6] info['ON_GROUND'] = state_variables[7] info['ATTACK_READY'] = state_variables[8] info['ALTATTACK_READY'] = state_variables[9] info['SELECTED_WEAPON'] = state_variables[10] info['SELECTED_WEAPON_AMMO'] = state_variables[11] info['AMMO1'] = state_variables[12] info['AMMO2'] = state_variables[13] info['AMMO3'] = state_variables[14] info['AMMO4'] = state_variables[15] info['AMMO5'] = state_variables[16] info['AMMO6'] = state_variables[17] info['AMMO7'] = state_variables[18] info['AMMO8'] = state_variables[19] info['AMMO9'] = state_variables[20] info['AMMO0'] = state_variables[21] return info
def train(conf): #to get total time of training start_time = time.time() #set the seeds for reproductability random.seed(conf.seed) np.random.seed(conf.seed) tf.set_random_seed(conf.seed) # Avoid Tensorflow eats up GPU memory config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) K.set_session(sess) game = DoomGame() game.load_config("VizDoom/scenarios/defend_the_center.cfg") game.set_sound_enabled(True) game.set_screen_resolution(ScreenResolution.RES_640X480) game.set_window_visible(False) game.set_living_reward(0.1) game.init() game.new_episode() game_state = game.get_state() misc = game_state.game_variables # [KILLCOUNT, AMMO, HEALTH] prev_misc = misc action_size = game.get_available_buttons_size() img_rows, img_cols = 64, 64 # Convert image into Black and white img_channels = 4 # We stack 4 frames state_size = (img_rows, img_cols, img_channels) agent = DoubleDQNAgent(state_size, action_size, conf) agent.model = Networks.dueling_dqn(state_size, action_size, agent.learning_rate) agent.target_model = Networks.dueling_dqn(state_size, action_size, agent.learning_rate) x_t = game_state.screen_buffer # 480 x 640 x_t = preprocessImg(x_t, size=(img_rows, img_cols)) s_t = np.stack(([x_t] * 4), axis=2) # It becomes 64x64x4 s_t = np.expand_dims(s_t, axis=0) # 1x64x64x4 is_terminated = game.is_episode_finished() # Start training epsilon = agent.initial_epsilon GAME = 0 t = 0 max_life = 0 # Maximum episode life (Proxy for agent performance) life = 0 # Buffer to compute rolling statistics life_buffer, ammo_buffer, kills_buffer = [], [], [] #Buffer for stats a posteriori scores, episodes, steps, kills, ammos = [], [], [], [], [] step = 0 episode = conf.episode e = 0 score = 0 while e < episode: loss = 0 Q_max = 0 r_t = 0 a_t = np.zeros([action_size]) # Epsilon Greedy action_idx = agent.get_action(s_t) a_t[action_idx] = 1 a_t = a_t.astype(int) r_t = game.make_action(a_t.tolist(), agent.frame_per_action) game_state = game.get_state() # Observe again after we take the action is_terminated = game.is_episode_finished() # r_t = game.get_last_reward() #each frame we get reward of 0.1, so 4 frames will be 0.4 score += r_t step += 1 if (is_terminated): if (life > max_life): max_life = life GAME += 1 life_buffer.append(life) ammo_buffer.append(misc[1]) kills_buffer.append(misc[0]) kills.append(misc[0]) ammos.append(misc[1]) print("Episode Finish ", misc) game.new_episode() game_state = game.get_state() misc = game_state.game_variables x_t1 = game_state.screen_buffer scores.append(score) score = 0 steps.append(step) episodes.append(e) e += 1 x_t1 = game_state.screen_buffer misc = game_state.game_variables x_t1 = preprocessImg(x_t1, size=(img_rows, img_cols)) x_t1 = np.reshape(x_t1, (1, img_rows, img_cols, 1)) s_t1 = np.append(x_t1, s_t[:, :, :, :3], axis=3) r_t = agent.shape_reward(r_t, misc, prev_misc, t) if (is_terminated): life = 0 else: life += 1 # Update the cache prev_misc = misc # save the sample <s, a, r, s'> to the replay memory and decrease epsilon agent.replay_memory(s_t, action_idx, r_t, s_t1, is_terminated, t) # Do the training if t > agent.observe and t % agent.timestep_per_train == 0: Q_max, loss = agent.train_replay() s_t = s_t1 t += 1 # print info state = "" if t <= agent.observe: state = "observe" elif t > agent.observe and agent.epsilon > agent.final_epsilon: state = "explore" else: state = "train" if (is_terminated): print("TIME", t, "/ GAME", GAME, "/ STATE", state, \ "/ EPSILON", agent.epsilon, "/ ACTION", action_idx, "/ REWARD", r_t, \ "/ Q_MAX %e" % np.max(Q_max), "/ LIFE", max_life, "/ LOSS", loss) # Save Agent's Performance Statistics if GAME % agent.stats_window_size == 0 and t > agent.observe: print("Update Rolling Statistics") agent.mavg_score.append(np.mean(np.array(life_buffer))) agent.var_score.append(np.var(np.array(life_buffer))) agent.mavg_ammo_left.append(np.mean(np.array(ammo_buffer))) agent.mavg_kill_counts.append(np.mean(np.array(kills_buffer))) # Reset rolling stats buffer life_buffer, ammo_buffer, kills_buffer = [], [], [] total_time = time.time() - start_time return steps, scores, total_time, kills, ammos
class VizDoom(gym.Env): """ Wraps a VizDoom environment """ def __init__(self, cfg_path, number_maps, scaled_resolution=(42, 42), action_frame_repeat=4, clip=(-1, 1), seed=None, data_augmentation=False): """ Gym environment for training reinforcement learning agents. :param cfg_path: name of the mission (.cfg) to run :param number_maps: number of maps which are contained within the cfg file :param scaled_resolution: resolution (height, width) of the observation to be returned with each step :param action_frame_repeat: how many game tics should an action be active :param clip: how much the reward returned on each step should be clipped to :param seed: seed for random, used to determine the other that the doom maps should be shown. :param data_augmentation: bool to determine whether or not to use data augmentation (adding randomly colored, randomly sized boxes to observation) """ self.cfg_path = str(cfg_path) if not os.path.exists(self.cfg_path): raise ValueError("Cfg file not found", cfg_path) if not self.cfg_path.endswith('.cfg'): raise ValueError("cfg_path must end with .cfg") self.number_maps = number_maps self.scaled_resolution = scaled_resolution self.action_frame_repeat = action_frame_repeat self.clip = clip self.data_augmentation = data_augmentation if seed: random.seed(seed) super(VizDoom, self).__init__() self._logger = logging.getLogger(__name__) self._logger.info("Creating environment: VizDoom (%s)", self.cfg_path) # Create an instace on VizDoom game, initalise it from a scenario config file self.env = DoomGame() self.env.load_config(self.cfg_path) self.env.init() # Perform config validation: # Only RGB format with a seperate channel per colour is supported # assert self.env.get_screen_format() == ScreenFormat.RGB24 # Only discreete actions are supported (no delta actions) available_actions = self.env.get_available_buttons() not_supported_actions = [ Button.LOOK_UP_DOWN_DELTA, Button.TURN_LEFT_RIGHT_DELTA, Button.MOVE_LEFT_RIGHT_DELTA, Button.MOVE_UP_DOWN_DELTA, Button.MOVE_FORWARD_BACKWARD_DELTA ] assert len((set(available_actions) - set(not_supported_actions))) == len(available_actions) # Allow only one button to be pressed at a given step self.action_space = gym.spaces.Discrete( self.env.get_available_buttons_size()) rows = scaled_resolution[1] columns = scaled_resolution[0] self.observation_space = gym.spaces.Box(0.0, 255.0, shape=(columns, rows, 3), dtype=np.float32) self._rgb_array = None self.reset() def _process_image(self, shape=None): """ Convert the vizdoom environment observation numpy are into the desired resolution and shape :param shape: desired shape in the format (rows, columns) :return: resized and rescaled image in the format (rows, columns, channels) """ if shape is None: rows, columns, _ = self.observation_space.shape else: rows, columns = shape # PIL resize has indexing opposite to numpy array img = VizDoom._resize(self._rgb_array.transpose(1, 2, 0), (columns, rows)) return img @staticmethod def _augment_data(img): """ Augment input image with N randomly colored boxes of dimension x by y where N is randomly sampled between 0 and 6 and x and y are randomly sampled from between 0.1 and 0.35 :param img: input image to be augmented - format (rows, columns, channels) :return img: augmented image - format (rows, columns, channels) """ dimx = img.shape[0] dimy = img.shape[1] max_rand_dim = .25 min_rand_dim = .1 num_blotches = np.random.randint(0, 6) for _ in range(num_blotches): # locations in [0,1] rand = np.random.rand rx = rand() ry = rand() rdx = rand() * max_rand_dim + min_rand_dim rdy = rand() * max_rand_dim + min_rand_dim rx, rdx = [round(r * dimx) for r in (rx, rdx)] ry, rdy = [round(r * dimy) for r in (ry, rdy)] for c in range(3): img[rx:rx + rdx, ry:ry + rdy, c] = np.random.randint(0, 255) return img @staticmethod def _resize(img, shape): """Resize the specified image. :param img: image to resize :param shape: desired shape in the format (rows, columns) :return: resized image """ if not (OPENCV_AVAILABLE or PILLOW_AVAILABLE): raise ValueError('No image library backend found.' ' Install either ' 'OpenCV or Pillow to support image processing.') if OPENCV_AVAILABLE: return cv2.resize(img, shape, interpolation=cv2.INTER_AREA) if PILLOW_AVAILABLE: return np.array(PIL.Image.fromarray(img).resize(shape)) raise NotImplementedError def reset(self): """ Resets environment to start a new mission. If there is more than one maze it will randomly select a new maze. :return: initial observation of the environment as an rgb array in the format (rows, columns, channels) """ if self.number_maps is not 0: self.doom_map = random.choice( ["map" + str(i).zfill(2) for i in range(self.number_maps)]) self.env.set_doom_map(self.doom_map) self.env.new_episode() self._rgb_array = self.env.get_state().screen_buffer observation = self._process_image() return observation def get_target_idx(self): return int(self.env.get_game_variable(GameVariable.USER5)) def step(self, action): """Perform the specified action for the self.action_frame_repeat ticks within the environment. :param action: the index of the action to perform. The actions are specified when the cfg is created. The defaults are "MOVE_FORWARD TURN_LEFT TURN_RIGHT" :return: tuple following the gym interface, containing: - observation as a numpy array of shape (rows, height, channels) - scalar clipped reward - boolean which is true when the environment is done - {} """ one_hot_action = np.zeros(self.action_space.n, dtype=int) one_hot_action[action] = 1 reward = self.env.make_action(list(one_hot_action), self.action_frame_repeat) done = self.env.is_episode_finished() # state is available only if the episode is still running if not done: self._rgb_array = self.env.get_state().screen_buffer observation = self._process_image() if self.data_augmentation: observation = VizDoom._augment_data(observation) if self.clip: reward = np.clip(reward, self.clip[0], self.clip[1]) return observation, reward, done, {} def step_record(self, action, record_path, record_shape=(120, 140)): """Perform the specified action for the self.action_frame_repeat ticks within the environment. :param action: the index of the action to perform. The actions are specified when the cfg is created. The defaults are "MOVE_FORWARD TURN_LEFT TURN_RIGHT" :param record_path: the path to save the image of the environment to :param record_shape: the shape of the image to save :return: tuple following the gym interface, containing: - observation as a numpy array of shape (rows, height, channels) - scalar clipped reward - boolean which is true when the environment is done - {} """ one_hot_action = np.zeros(self.action_space.n, dtype=int) one_hot_action[action] = 1 reward = 0 for _ in range(self.action_frame_repeat // 2): reward += self.env.make_action(list(one_hot_action), 2) env_state = self.env.get_state() if env_state: self._rgb_array = self.env.get_state().screen_buffer imageio.imwrite( os.path.join(record_path, str(datetime.datetime.now()) + ".png"), self._process_image(record_shape)) done = self.env.is_episode_finished() # state is available only if the episode is still running if not done: self._rgb_array = self.env.get_state().screen_buffer observation = self._process_image() if self.clip: reward = np.clip(reward, self.clip[0], self.clip[1]) return observation, reward, done, {} def close(self): """Close environment""" self.env.close() def render(self, mode='rgb_array'): """Render frame""" if mode == 'rgb_array': return self._rgb_array raise NotImplementedError def create_env(self): """ Returns a function to create an environment with the generated mazes. Used for vectorising the environment. For example as used by Stable Baselines :return: a function to create an environment with the generated mazes """ return lambda: VizDoom(self.cfg_path, number_maps=self.number_maps, scaled_resolution=self.scaled_resolution, action_frame_repeat=self.action_frame_repeat)
def train(conf): #to get total time of training start_time = time.time() game = DoomGame() game.load_config("VizDoom/scenarios/defend_the_center.cfg") game.set_sound_enabled(True) game.set_screen_resolution(ScreenResolution.RES_640X480) game.set_window_visible(False) game.set_living_reward(0.1) game.init() game.new_episode() game_state = game.get_state() misc = game_state.game_variables # [KILLCOUNT, AMMO, HEALTH] prev_misc = misc action_size = game.get_available_buttons_size() agent = RandomAgent(action_size, conf) episode = conf.episode # Start training GAME = 0 t = 0 max_life = 0 # Maximum episode life (Proxy for agent performance) life = 0 scores, episodes, steps, kills, ammos = [], [], [], [], [] step = 0 episode = conf.episode e = 0 score = 0 while e < episode: loss = 0 Q_max = 0 r_t = 0 a_t = np.zeros([action_size]) action_idx = agent.select_action() a_t[action_idx] = 1 a_t = a_t.astype(int) r_t = game.make_action(a_t.tolist(), 4) game_state = game.get_state() # Observe again after we take the action is_terminated = game.is_episode_finished() score += r_t step += 1 if (is_terminated): if (life > max_life): max_life = life GAME += 1 kills.append(misc[0]) ammos.append(misc[1]) print ("Episode Finish ", misc) # print(scores) game.new_episode() game_state = game.get_state() misc = game_state.game_variables x_t1 = game_state.screen_buffer scores.append(score) score = 0 steps.append(step) episodes.append(e) e += 1 misc = game_state.game_variables r_t = agent.shape_reward(r_t, misc, prev_misc, t) if (is_terminated): life = 0 else: life += 1 # Update the cache prev_misc = misc t += 1 total_time = time.time() - start_time return steps, scores, total_time, kills, ammos # return steps, returns, total_time
class VizDoomGym(gym.Env): """ Wraps a VizDoom environment """ def __init__(self): raise NotImplementedError def _init(self, mission_file: str, scaled_resolution: list): """ :param mission_file: name of the mission (.cfg) to run, :param scaled_resolution: resolution (height, width) of the video frames to run training on """ super(VizDoomGym, self).__init__() self.mission_file = mission_file self._logger = logging.getLogger(__name__) self._logger.info("Creating environment: VizDoom (%s)", self.mission_file) self.deathmatch = True # distance we need the agent to travel per time-step, otherwise we penalise self.distance_threshold = 15 self.prev_properties = None self.properties = None self.cum_kills = np.array([0]) # Create an instace on VizDoom game, initalise it from a scenario config file self.env = DoomGame() self.env.load_config(self.mission_file) self.env.set_window_visible(False) self.env.set_screen_format(ScreenFormat.RGB24) if self.deathmatch: self.env.add_game_args("-deathmatch") self.env.set_doom_skill(4) self._action_frame_repeat = 4 self.env.init() # Perform config validation: # Only RGB format with a seperate channel per colour is supported assert self.env.get_screen_format() == ScreenFormat.RGB24 # Only discrete actions are supported (no delta actions) self.available_actions = self.env.get_available_buttons() not_supported_actions = [ Button.LOOK_UP_DOWN_DELTA, Button.TURN_LEFT_RIGHT_DELTA, Button.MOVE_LEFT_RIGHT_DELTA, Button.MOVE_UP_DOWN_DELTA, Button.MOVE_FORWARD_BACKWARD_DELTA ] # print(available_actions) assert len((set(self.available_actions) - set(not_supported_actions))) \ == len(self.available_actions) self.metadata['render_modes'] = ['rgb_array'] # Allow only one button to be pressed at a given step self.action_space = gym.spaces.Discrete( self.env.get_available_buttons_size() - 1) self.rows = scaled_resolution[0] self.columns = scaled_resolution[1] self.observation_space = gym.spaces.Box(low=0.0, high=1.0, shape=(self.rows, self.columns, 3), dtype=np.float32) self._rgb_array = None self.steps = 0 self.global_steps = 0 self.reset() def _process_image(self, img): # PIL resize has indexing opposite to numpy array img = np.array(Image.fromarray(img).resize((self.columns, self.rows))) img = img.astype(np.float32) img = img / 255.0 return img def update_game_variables(self): """ Check and update game variables. """ # read game variables new_v = { k: self.env.get_game_variable(v) for k, v in game_variables.items() } assert all(v.is_integer() or k[-2:] in ['_x', '_y', '_z'] for k, v in new_v.items()) new_v = { k: (int(v) if v.is_integer() else float(v)) for k, v in new_v.items() } health = new_v['health'] armor = new_v['armor'] # check game variables assert 0 <= health <= 200 or health < 0 and self.env.is_player_dead() assert 0 <= armor <= 200, (health, armor) # update actor properties self.prev_properties = self.properties self.properties = new_v def update_reward(self): """ Update reward. """ # we need to know the current and previous properties assert self.prev_properties is not None and self.properties is not None reward = 0 # kill d = self.properties['score'] - self.prev_properties['score'] if d > 0: self.cum_kills += d reward += d * default_reward_values['KILL'] # death if self.env.is_player_dead(): reward += default_reward_values['DEATH'] # suicide if self.properties['frag_count'] < self.prev_properties['frag_count']: reward += default_reward_values['SUICIDE'] # found / lost health d = self.properties['health'] - self.prev_properties['health'] if d != 0: if d > 0: reward += default_reward_values['MEDIKIT'] else: reward += default_reward_values['INJURED'] # found / lost armor d = self.properties['armor'] - self.prev_properties['armor'] if d != 0: if d > 0: reward += default_reward_values['ARMOR'] # found / lost ammo d = self.properties['sel_ammo'] - self.prev_properties['sel_ammo'] if d != 0: if d > 0: reward += default_reward_values['AMMO'] else: reward += default_reward_values['USE_AMMO'] # distance # turn_left = (Button.TURN_LEFT == self.available_actions[action]) # turn_right = (Button.TURN_RIGHT == self.available_actions[action]) # if not (turn_left or turn_right): diff_x = self.properties['position_x'] - self.prev_properties[ 'position_x'] diff_y = self.properties['position_y'] - self.prev_properties[ 'position_y'] distance = np.sqrt(diff_x**2 + diff_y**2) if distance > self.distance_threshold: reward += default_reward_values['DISTANCE'] * distance else: reward += default_reward_values['STANDSTILL'] # living reward += default_reward_values['LIVING'] return reward # def increase_difficulty(self): # self.curr_skill += 1 # self.env.close() # self.env.set_doom_skill(self.curr_skill) # self.env.init() # print('changing skill to', self.curr_skill) # def update_map(self): # self.map_level += 1 # map_str = 'map0' + str(self.map_level) # # go with initial wad file if there's still maps on it # self.env.close() # self.env.set_doom_map(map_str) # self.env.init() def sub_reset(self): """Reset environment""" self.steps = 0 self.cum_kills = np.array([0]) self.prev_properties = None self.properties = None self.env.new_episode() self._rgb_array = self.env.get_state().screen_buffer observation = self._process_image(self._rgb_array) return observation def reset(self): observation = self.sub_reset() return observation def sub_step(self, action): """Take step""" one_hot_action = np.zeros(self.action_space.n, dtype=int) one_hot_action[action] = 1 # ALWAYS SPRINTING one_hot_action = np.append(one_hot_action, [1]) assert len(one_hot_action) == len(self.env.get_available_buttons()) _ = self.env.make_action(list(one_hot_action), self._action_frame_repeat) self.update_game_variables() if self.steps > 1: reward = self.update_reward() else: reward = 0 self.steps += 1 self.global_steps += 1 done = self.env.is_episode_finished() # state is available only if the episode is still running if not done: self._rgb_array = self.env.get_state().screen_buffer observation = self._process_image(self._rgb_array) return observation, reward, done def step(self, action): observation, reward, done = self.sub_step(action) return observation, reward, done, {} def close(self): """Close environment""" self.env.close() def seed(self, seed=None): """Seed""" if seed: self.env.set_seed(seed) def render(self, mode='human'): """Render frame""" if mode == 'rgb_array': return self._rgb_array raise NotImplementedError