示例#1
1
    def __init__(self, vizdoom_dir=os.path.expanduser('~/ViZDoom'),
                 window_visible=True, scenario='basic', skipcount=10,
                 resolution_width=640, sleep=0.0, seed=None):

        self.skipcount = skipcount
        self.sleep = sleep

        sys.path.append(os.path.join(vizdoom_dir, "examples/python"))
        from vizdoom import DoomGame
        from vizdoom import ScreenFormat
        from vizdoom import ScreenResolution

        game = DoomGame()

        if seed is not None:
            assert seed >= 0 and seed < 2 ** 16, \
                "ViZDoom's random seed must be represented by unsigned int"
        else:
            # Use numpy's random state
            seed = np.random.randint(0, 2 ** 16)
        game.set_seed(seed)

        # Load a config file
        game.load_config(os.path.join(
            vizdoom_dir, "examples", 'config', scenario + '.cfg'))

        # Replace default relative paths with actual paths
        game.set_vizdoom_path(os.path.join(vizdoom_dir, "bin/vizdoom"))
        game.set_doom_game_path(
            os.path.join(vizdoom_dir, 'scenarios/freedoom2.wad'))
        game.set_doom_scenario_path(
            os.path.join(vizdoom_dir, 'scenarios', scenario + '.wad'))

        # Set screen settings
        resolutions = {640: ScreenResolution.RES_640X480,
                       320: ScreenResolution.RES_320X240,
                       160: ScreenResolution.RES_160X120}
        game.set_screen_resolution(resolutions[resolution_width])
        game.set_screen_format(ScreenFormat.RGB24)
        game.set_window_visible(window_visible)
        game.set_sound_enabled(window_visible)

        game.init()
        self.game = game

        # Use one-hot actions
        self.n_actions = game.get_available_buttons_size()
        self.actions = []
        for i in range(self.n_actions):
            self.actions.append([i == j for j in range(self.n_actions)])
示例#2
1
class DoomEnv(gym.Env, EzPickle):
    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second': 35
    }

    def __init__(self, level='deathmatch', obs_type='ram'):
        # super(DoomEnv, self).__init__()
        EzPickle.__init__(self, level.split('.')[0], obs_type)
        assert obs_type in ('ram', 'image')
        level = level.split('.')[0]
        Config.init(level)

        self.curr_seed = 0
        self.game = DoomGame()
        self.lock = (DoomLock()).get_lock()

        self.level = level
        self.obs_type = obs_type
        self.tick = 4

        self._mode = 'algo'

        self.is_render_in_human_mode = True
        self.is_game_initialized = False
        self.is_level_loaded = False

        self.viewer = None

        self.set_game(self.level, resolution=None, render=True)
        print()

    # todo: add frame skip option by using tick
    def step(self, action):
        reward = 0.0
        # self.tick = 4
        if self._mode == 'algo':
            if self.tick:
                reward = self.game.make_action(action, self.tick)
            else:
                reward = self.game.make_action(action)

            # self.game.set_action(action)
            # self.game.advance_action(4)
            # reward = self.game.get_last_reward()

        return self.get_obs(), reward, self.isDone(), self.get_info()

    def reset(self):
        if not self.is_game_initialized:
            self.__load_level()
            self.__init_game()

        self.__start_episode()
        return self.get_obs()

    def render(self, mode='human', **kwargs):
        if 'close' in kwargs and kwargs['close']:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return

        if mode == 'human' and not self.is_render_in_human_mode:
            return
        img = self.get_image()

        if mode == 'rgb_array':
            return img
        elif mode is 'human':
            if self.viewer is None:
                self.viewer = rendering.SimpleImageViewer()
            self.viewer.imshow(img)

    def close(self):
        with self.lock:
            self.game.close()

    def seed(self, seed=None):
        self.curr_seed = seeding.hash_seed(seed) % 2**32
        return [self.curr_seed]

    # ================================== GETTERS SETTERS ===============================================================
    def set_game(self, level, resolution, render):
        self.__configure()
        self.__load_level(level)
        self.__set_resolution(resolution)
        self.__set_obs_and_ac_space()
        self.__set_player(render)

    def __configure(self, lock=None, **kwargs):
        self.seed()
        if lock is not None:
            self.lock = lock

    def __load_level(self, level=None):
        if level is not None:
            self.level = level.split('.')[0]
            self.is_level_loaded = False

        if self.is_level_loaded:
            return
        if self.is_game_initialized:
            self.is_game_initialized = False
            self.game.close()
            self.game = DoomGame()

        if not self.is_game_initialized:
            self.game.set_vizdoom_path(Config.VIZDOOM_PATH)
            self.game.set_doom_game_path(Config.FREEDOOM_PATH)

        # Common settings
        self.record_file_path = Config.RECORD_FILE_PATH
        self.game.load_config(Config.VIZDOOM_SCENARIO_PATH +
                              Config.DOOM_SETTINGS[self.level][Config.CONFIG])
        self.game.set_doom_scenario_path(
            Config.VIZDOOM_SCENARIO_PATH +
            Config.DOOM_SETTINGS[self.level][Config.SCENARIO])

        if Config.DOOM_SETTINGS[self.level][Config.MAP] != '':
            self.game.set_doom_map(
                Config.DOOM_SETTINGS[self.level][Config.MAP])
        self.game.set_doom_skill(
            Config.DOOM_SETTINGS[self.level][Config.DIFFICULTY])

        self.allowed_actions = Config.DOOM_SETTINGS[self.level][Config.ACTIONS]
        self.available_game_variables = Config.DOOM_SETTINGS[self.level][
            Config.GAME_VARIABLES]

        self.is_level_loaded = True

    def __set_resolution(self, resolution=None):
        if resolution is None:
            resolution = Config.DEFAULT_SCREEN_RESOLUTION
        resolution_l = resolution.lower()
        if resolution_l not in resolutions:
            raise gym.error.Error(
                'Error - The specified resolution "{}" is not supported by Vizdoom.\n The list of valid'
                'resolutions: {}'.format(resolution, resolutions))
        if '_' in resolution_l:
            resolution_l = resolution_l.split('_')[1]
        self.scr_width = int(resolution_l.split("x")[0])
        self.scr_height = int(resolution_l.split("x")[1])
        self.game.set_screen_resolution(
            getattr(ScreenResolution,
                    'RES_{}X{}'.format(self.scr_width, self.scr_height)))

        self.screen_format = self.game.get_screen_format()
        self.screen_height = self.game.get_screen_height()
        self.screen_width = self.game.get_screen_width()

    def __set_obs_and_ac_space(self):
        if self.obs_type == 'ram':
            self.observation_space = spaces.Box(
                low=0,
                high=255,
                dtype=np.uint8,
                shape=(len(self.available_game_variables), ))
        elif self.obs_type == 'image':
            # self.observation_space = self.screen_resized
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(self.scr_height,
                                                       self.scr_width, 3),
                                                dtype=np.uint8)
        else:
            raise error.Error('Unrecognized observation type: {}'.format(
                self.obs_type))

        if self.screen_format in inverted_screen_formats:
            self.dummy_screen = np.zeros(shape=(3, self.scr_height,
                                                self.scr_width),
                                         dtype=np.uint8)
        else:
            self.dummy_screen = np.zeros(shape=(self.scr_height,
                                                self.scr_width, 3),
                                         dtype=np.uint8)

        self.dummy_ram = [0] * len(self.available_game_variables)

        self.available_action_codes = [
            list(a)
            for a in it.product([0, 1],
                                repeat=self.game.get_available_buttons_size())
        ]
        # self.__delete_conflict_actions()
        self.action_space = spaces.MultiDiscrete(
            [len(self.available_action_codes)])

    def __set_player(self, render=True):
        self.game.set_window_visible(render)
        self.game.set_mode(Mode.PLAYER)

    def __init_game(self):
        try:
            with self.lock:
                self.game.init()
                self.is_game_initialized = True
        except (ViZDoomUnexpectedExitException, ViZDoomErrorException):
            raise error.Error('Could not start the game.')

    def __start_episode(self):
        if self.curr_seed > 0:
            self.game.set_seed(self.curr_seed)
            self.curr_seed = 0
        if self.record_file_path:
            self.game.new_episode(self.record_file_path)
        else:
            self.game.new_episode()
        return

    def getState(self):
        return self.game.get_state()

    def getLastAction(self):
        return self.game.get_last_action()

    def getButtonsNames(self, action):
        return action_to_buttons(self.allowed_actions, action)

    def get_info(self):
        info = {
            "LEVEL": self.level,
            "TOTAL_REWARD": round(self.game.get_total_reward(), 4)
        }

        state_variables = self.get_ram()
        for i in range(len(self.available_game_variables)):
            info[self.available_game_variables[i]] = state_variables[i]

        return info

    def get_ram(self):
        if not self.is_game_initialized:
            raise NotImplementedError(
                "The game was not initialized. Run env.reset() first!")
        try:
            ram = self.getState().game_variables
        except AttributeError:
            ram = self.dummy_ram
        return ram

    def get_image(self):
        try:
            screen = self.getState().screen_buffer.copy()
        except AttributeError:
            screen = self.dummy_screen
        return self.invert_screen(screen)

    def get_obs(self):
        if self.obs_type == 'ram':
            return self.get_ram()
        elif self.obs_type == 'image':
            return self.get_image()

    def isDone(self):
        return self.game.is_episode_finished() or self.game.is_player_dead(
        ) or self.getState() is None

    # ===========================================  ==============================================================

    def invert_screen(self, img):
        if self.screen_format in inverted_screen_formats:
            return np.rollaxis(img, 0, 3)
        else:
            return img

    def __delete_conflict_actions(self):
        if self._mode == 'human':
            return
        action_codes_copy = self.available_action_codes.copy()

        print("Initial actions size: " + str(len(action_codes_copy)))
        for i in tqdm.trange(len(self.available_action_codes)):
            action = self.available_action_codes[i]
            ac_names = action_to_buttons(self.allowed_actions, action)

            if all(elem in ac_names
                   for elem in ['MOVE_LEFT', 'MOVE_RIGHT']) or all(
                       elem in ac_names
                       for elem in ['MOVE_BACKWARD', 'MOVE_FORWARD']) or all(
                           elem in ac_names
                           for elem in ['TURN_RIGHT', 'TURN_LEFT']) or all(
                               elem in ac_names for elem in
                               ['SELECT_NEXT_WEAPON', 'SELECT_PREV_WEAPON']):
                action_codes_copy.remove(action)

        print("Final actions size: " + str(len(action_codes_copy)))
        self.available_action_codes = action_codes_copy

    def __initHumanPlayer(self):
        self._mode = 'human'
        self.__load_level()

        self.game.add_game_args('+freelook 1')
        self.game.set_window_visible(True)
        self.game.set_mode(Mode.SPECTATOR)
        self.is_render_in_human_mode = False

        self.__init_game()

    def advanceAction(self, tick=0):
        try:
            if tick:
                self.game.advance_action(tick)
            else:
                self.game.advance_action()
            return True
        except ViZDoomUnexpectedExitException:
            return False

    def playHuman(self):
        self.__initHumanPlayer()

        while not self.game.is_episode_finished(
        ) and not self.game.is_player_dead():
            self.advanceAction()

            state = self.getState()
            if state is None:
                if self.record_file_path is None:
                    self.game.new_episode()
                else:
                    self.game.new_episode(self.record_file_path)
                state = self.getState()

            total_reward = self.game.get_total_reward()
            info = self.get_info()
            info["TOTAL_REWARD"] = round(total_reward, 4)
            print('===============================')
            print('State: #' + str(state.number))
            print('Action: \t' + str(self.game.get_last_action()) +
                  '\t (=> only allowed actions)')
            print('Reward: \t' + str(self.game.get_last_reward()))
            print('Total Reward: \t' + str(total_reward))
            print('Variables: \n' + str(info))
            sleep(0.02857)  # 35 fps = 0.02857 sleep between frames
        print('===============================')
        print('Done')
        return
示例#3
0
game.load_config("../../examples/config/basic.cfg")
# game.load_config("../../examples/config/deadly_corridor.cfg")
# game.load_config("../../examples/config/deathmatch.cfg")
# game.load_config("../../examples/config/defend_the_center.cfg")
# game.load_config("../../examples/config/defend_the_line.cfg")
# game.load_config("../../examples/config/health_gathering.cfg")
# game.load_config("../../examples/config/my_way_home.cfg")
# game.load_config("../../examples/config/predict_position.cfg")
# game.load_config("../../examples/config/take_cover.cfg")

# Makes the screen bigger to see more details.
game.set_screen_resolution(ScreenResolution.RES_640X480)
game.init()

# Creates all possible actions depending on how many buttons there are.
actions_num = game.get_available_buttons_size()
actions = []
for perm in it.product([False, True], repeat=actions_num):
    actions.append(list(perm))

episodes = 10
sleep_time = 0.028

for i in range(episodes):
    print("Episode #" + str(i + 1))

    # Not needed for the first episdoe but the loop is nicer.
    game.new_episode()
    while not game.is_episode_finished():

        # Gets the state and possibly to something with it
示例#4
0
    sess = tf.Session(config=config)
    K.set_session(sess)

    game = DoomGame()
    game.load_config("scenarios/deathmatch.cfg")
    game.set_sound_enabled(False)
    game.set_screen_resolution(ScreenResolution.RES_640X480)
    game.set_window_visible(False)
    game.init()

    game.new_episode()
    game_state = game.get_state()
    misc = game_state.game_variables  # [KILLCOUNT, AMMO, HEALTH]
    prev_misc = misc

    action_size = game.get_available_buttons_size()

    img_rows, img_cols = 64, 64
    img_channels = 3  # Color channel
    trace_length = 4  # Temporal Dimension

    state_size = (trace_length, img_rows, img_cols, img_channels)
    agent = DoubleDQNAgent(state_size, action_size, trace_length)

    agent.model = Networks.drqn(state_size, action_size, agent.learning_rate)
    agent.target_model = Networks.drqn(state_size, action_size,
                                       agent.learning_rate)

    s_t = game_state.screen_buffer  # 480 x 640
    s_t = preprocessImg(s_t, size=(img_rows, img_cols))
示例#5
0
class ViZDoom(Environment):
    """
    [ViZDoom](https://github.com/mwydmuch/ViZDoom) environment adapter (specification key:
    `vizdoom`).

    Args:
        level (string): ViZDoom configuration file
            (<span style="color:#C00000"><b>required</b></span>).
        include_variables (bool): Whether to include game variables to state
            (<span style="color:#00C000"><b>default</b></span>: false).
        factored_action (bool): Whether to use factored action representation
            (<span style="color:#00C000"><b>default</b></span>: false).
        visualize (bool): Whether to visualize interaction
            (<span style="color:#00C000"><b>default</b></span>: false).
        frame_skip (int > 0): Number of times to repeat an action without observing
            (<span style="color:#00C000"><b>default</b></span>: 12).
        seed (int): Random seed
            (<span style="color:#00C000"><b>default</b></span>: none).
    """
    def __init__(self,
                 level,
                 visualize=False,
                 include_variables=False,
                 factored_action=False,
                 frame_skip=12,
                 seed=None):
        from vizdoom import DoomGame, Mode, ScreenFormat, ScreenResolution

        self.config_file = level
        self.include_variables = include_variables
        self.factored_action = factored_action
        self.visualize = visualize
        self.frame_skip = frame_skip

        self.environment = DoomGame()
        self.environment.load_config(self.config_file)
        if self.visualize:
            self.environment.set_window_visible(True)
            self.environment.set_mode(Mode.ASYNC_PLAYER)
        else:
            self.environment.set_window_visible(False)
            self.environment.set_mode(Mode.PLAYER)
        # e.g. CRCGCB, RGB24, GRAY8
        self.environment.set_screen_format(ScreenFormat.RGB24)
        # e.g. RES_320X240, RES_640X480, RES_1920X1080
        self.environment.set_screen_resolution(ScreenResolution.RES_640X480)
        self.environment.set_depth_buffer_enabled(False)
        self.environment.set_labels_buffer_enabled(False)
        self.environment.set_automap_buffer_enabled(False)
        if seed is not None:
            self.environment.setSeed(seed)
        self.environment.init()

        self.state_shape = (640, 480, 3)
        self.num_variables = self.environment.get_available_game_variables_size(
        )
        self.num_buttons = self.environment.get_available_buttons_size()
        self.actions = [
            tuple(a)
            for a in itertools.product([0, 1], repeat=self.num_buttons)
        ]

    def __str__(self):
        return super().__str__() + '({})'.format(self.config_file)

    def states(self):
        if self.include_variables:
            return OrderedDict(screen=dict(type='float',
                                           shape=self.state_shape),
                               variables=dict(type='float',
                                              shape=self.num_variables))
        else:
            return dict(type='float', shape=self.state_shape)

    def actions(self):
        if self.factored_action:
            return dict(type='bool', shape=self.num_buttons)
        else:
            return dict(type='int', shape=(), num_values=len(self.actions))

    def close(self):
        self.environment.close()
        self.environment = None

    def get_states(self):
        state = self.environment.get_state()
        screen = state.screen_buffer.astype(dtype=np.float32) / 255.0
        if self.include_variables:
            return OrderedDict(screen=screen, variables=state.game_variables)
        else:
            return screen

    def reset(self):
        self.environment.new_episode()
        return self.get_states()

    def execute(self, actions):
        if self.factored_action:
            action = np.where(actions, 1.0, 0.0)
        else:
            action = self.actions[actions]
        if self.visualize:
            self.environment.set_action(action)
            reward = 0.0
            for _ in range(self.frame_skip):
                self.environment.advance_action()
                reward += self.environment.get_last_reward()
        else:
            reward = self.environment.make_action(action, self.frame_skip)
        terminal = self.environment.is_episode_finished()
        states = self.get_states()
        return states, terminal, reward
示例#6
0
    game.init()

    game.new_episode()
    game_state = game.get_state()

    if not game.is_episode_finished():
        labels = game_state.labels_buffer
        # if labels is not None:
        # plt.imshow(labels)
        # plt.show()

    misc = game_state.game_variables  # [Health]
    prev_misc = misc

    action_size = game.get_available_buttons_size(
    )  # [Turn Left, Turn Right, Move Forward]
    measurement_size = n_measures  # [Health, Medkit, Poison]
    timesteps = [1, 2, 4, 8, 16, 32]
    goal_size = measurement_size * len(timesteps)

    img_rows, img_cols = 84, 84
    # Convert image into Black and white
    img_channels = 1
    if depth_perception:
        img_channels += 1  # We stack 1 frame (then we will put 2 other channels: depth map and segmented image)
    if mask_perception:
        img_channels += 1

    state_size = (img_rows, img_cols, img_channels)
    agent = DFPAgent(state_size, measurement_size, action_size, timesteps)
示例#7
0
class VizDoomEnv(Env):
    '''
    Wrapper for vizdoom to use as an OpenAI gym environment.
    '''
    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self, cfg_name, repeat=1):
        super().__init__()
        self.game = DoomGame()
        self.game.load_config(f'./slm_lab/env/vizdoom/cfgs/{cfg_name}.cfg')
        self._viewer = None
        self.repeat = 1
        # TODO In future, need to update action to handle (continuous) DELTA buttons using gym's Box space
        self.action_space = spaces.MultiDiscrete([2] * self.game.get_available_buttons_size())
        self.action_space.dtype = 'uint8'
        output_shape = (self.game.get_screen_channels(), self.game.get_screen_height(), self.game.get_screen_width())
        self.observation_space = spaces.Box(low=0, high=255, shape=output_shape, dtype='uint8')
        self.game.init()

    def close(self):
        self.game.close()
        if self._viewer is not None:
            self._viewer.close()
            self._viewer = None

    def seed(self, seed=None):
        self.game.set_seed(seed)

    def step(self, action):
        reward = self.game.make_action(list(action), self.repeat)
        state = self.game.get_state()
        done = self.game.is_episode_finished()
        # info = self._get_game_variables(state.game_variables)
        info = {}
        if state is not None:
            observation = state.screen_buffer
        else:
            observation = np.zeros(shape=self.observation_space.shape, dtype=np.uint8)
        return observation, reward, done, info

    def reset(self):
        self.game.new_episode()
        return self.game.get_state().screen_buffer

    def render(self, mode='human', close=False):
        if close:
            if self._viewer is not None:
                self._viewer.close()
                self._viewer = None
            return
        img = None
        state = self.game.get_state()
        if state is not None:
            img = state.screen_buffer
        if img is None:
            # at the end of the episode
            img = np.zeros(shape=self.observation_space.shape, dtype=np.uint8)
        if mode == 'rgb_array':
            return img
        elif mode is 'human':
            if self._viewer is None:
                self._viewer = rendering.SimpleImageViewer()
            self._viewer.imshow(util.to_opencv_image(img))

    def _get_game_variables(self, state_variables):
        info = {}
        if state_variables is not None:
            info['KILLCOUNT'] = state_variables[0]
            info['ITEMCOUNT'] = state_variables[1]
            info['SECRETCOUNT'] = state_variables[2]
            info['FRAGCOUNT'] = state_variables[3]
            info['HEALTH'] = state_variables[4]
            info['ARMOR'] = state_variables[5]
            info['DEAD'] = state_variables[6]
            info['ON_GROUND'] = state_variables[7]
            info['ATTACK_READY'] = state_variables[8]
            info['ALTATTACK_READY'] = state_variables[9]
            info['SELECTED_WEAPON'] = state_variables[10]
            info['SELECTED_WEAPON_AMMO'] = state_variables[11]
            info['AMMO1'] = state_variables[12]
            info['AMMO2'] = state_variables[13]
            info['AMMO3'] = state_variables[14]
            info['AMMO4'] = state_variables[15]
            info['AMMO5'] = state_variables[16]
            info['AMMO6'] = state_variables[17]
            info['AMMO7'] = state_variables[18]
            info['AMMO8'] = state_variables[19]
            info['AMMO9'] = state_variables[20]
            info['AMMO0'] = state_variables[21]
        return info
def train(conf):

    #to get total time of training
    start_time = time.time()

    #set the seeds for reproductability
    random.seed(conf.seed)
    np.random.seed(conf.seed)
    tf.set_random_seed(conf.seed)

    # Avoid Tensorflow eats up GPU memory
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    K.set_session(sess)

    game = DoomGame()
    game.load_config("VizDoom/scenarios/defend_the_center.cfg")
    game.set_sound_enabled(True)
    game.set_screen_resolution(ScreenResolution.RES_640X480)
    game.set_window_visible(False)
    game.set_living_reward(0.1)
    game.init()

    game.new_episode()
    game_state = game.get_state()
    misc = game_state.game_variables  # [KILLCOUNT, AMMO, HEALTH]
    prev_misc = misc

    action_size = game.get_available_buttons_size()

    img_rows, img_cols = 64, 64
    # Convert image into Black and white
    img_channels = 4  # We stack 4 frames

    state_size = (img_rows, img_cols, img_channels)
    agent = DoubleDQNAgent(state_size, action_size, conf)

    agent.model = Networks.dueling_dqn(state_size, action_size,
                                       agent.learning_rate)
    agent.target_model = Networks.dueling_dqn(state_size, action_size,
                                              agent.learning_rate)

    x_t = game_state.screen_buffer  # 480 x 640
    x_t = preprocessImg(x_t, size=(img_rows, img_cols))
    s_t = np.stack(([x_t] * 4), axis=2)  # It becomes 64x64x4
    s_t = np.expand_dims(s_t, axis=0)  # 1x64x64x4

    is_terminated = game.is_episode_finished()

    # Start training
    epsilon = agent.initial_epsilon
    GAME = 0
    t = 0
    max_life = 0  # Maximum episode life (Proxy for agent performance)
    life = 0

    # Buffer to compute rolling statistics
    life_buffer, ammo_buffer, kills_buffer = [], [], []

    #Buffer for stats a posteriori
    scores, episodes, steps, kills, ammos = [], [], [], [], []
    step = 0
    episode = conf.episode
    e = 0
    score = 0

    while e < episode:

        loss = 0
        Q_max = 0
        r_t = 0
        a_t = np.zeros([action_size])

        # Epsilon Greedy
        action_idx = agent.get_action(s_t)
        a_t[action_idx] = 1
        a_t = a_t.astype(int)
        r_t = game.make_action(a_t.tolist(), agent.frame_per_action)

        game_state = game.get_state()  # Observe again after we take the action
        is_terminated = game.is_episode_finished()

        # r_t = game.get_last_reward()  #each frame we get reward of 0.1, so 4 frames will be 0.4

        score += r_t
        step += 1

        if (is_terminated):
            if (life > max_life):
                max_life = life
            GAME += 1
            life_buffer.append(life)
            ammo_buffer.append(misc[1])
            kills_buffer.append(misc[0])
            kills.append(misc[0])
            ammos.append(misc[1])
            print("Episode Finish ", misc)
            game.new_episode()
            game_state = game.get_state()
            misc = game_state.game_variables
            x_t1 = game_state.screen_buffer

            scores.append(score)
            score = 0
            steps.append(step)
            episodes.append(e)
            e += 1

        x_t1 = game_state.screen_buffer
        misc = game_state.game_variables

        x_t1 = preprocessImg(x_t1, size=(img_rows, img_cols))
        x_t1 = np.reshape(x_t1, (1, img_rows, img_cols, 1))
        s_t1 = np.append(x_t1, s_t[:, :, :, :3], axis=3)

        r_t = agent.shape_reward(r_t, misc, prev_misc, t)

        if (is_terminated):
            life = 0
        else:
            life += 1

        # Update the cache
        prev_misc = misc

        # save the sample <s, a, r, s'> to the replay memory and decrease epsilon
        agent.replay_memory(s_t, action_idx, r_t, s_t1, is_terminated, t)

        # Do the training
        if t > agent.observe and t % agent.timestep_per_train == 0:
            Q_max, loss = agent.train_replay()

        s_t = s_t1
        t += 1

        # print info
        state = ""
        if t <= agent.observe:
            state = "observe"
        elif t > agent.observe and agent.epsilon > agent.final_epsilon:
            state = "explore"
        else:
            state = "train"

        if (is_terminated):
            print("TIME", t, "/ GAME", GAME, "/ STATE", state, \
                  "/ EPSILON", agent.epsilon, "/ ACTION", action_idx, "/ REWARD", r_t, \
                  "/ Q_MAX %e" % np.max(Q_max), "/ LIFE", max_life, "/ LOSS", loss)

            # Save Agent's Performance Statistics
            if GAME % agent.stats_window_size == 0 and t > agent.observe:
                print("Update Rolling Statistics")
                agent.mavg_score.append(np.mean(np.array(life_buffer)))
                agent.var_score.append(np.var(np.array(life_buffer)))
                agent.mavg_ammo_left.append(np.mean(np.array(ammo_buffer)))
                agent.mavg_kill_counts.append(np.mean(np.array(kills_buffer)))

                # Reset rolling stats buffer
                life_buffer, ammo_buffer, kills_buffer = [], [], []

    total_time = time.time() - start_time

    return steps, scores, total_time, kills, ammos
示例#9
0
class VizDoom(gym.Env):
    """
    Wraps a VizDoom environment
    """
    def __init__(self,
                 cfg_path,
                 number_maps,
                 scaled_resolution=(42, 42),
                 action_frame_repeat=4,
                 clip=(-1, 1),
                 seed=None,
                 data_augmentation=False):
        """
        Gym environment for training reinforcement learning agents.
        :param cfg_path: name of the mission (.cfg) to run
        :param number_maps: number of maps which are contained within the cfg file
        :param scaled_resolution: resolution (height, width) of the observation to be returned with each step
        :param action_frame_repeat: how many game tics should an action be active
        :param clip: how much the reward returned on each step should be clipped to
        :param seed: seed for random, used to determine the other that the doom maps should be shown.
        :param data_augmentation: bool to determine whether or not to use data augmentation
            (adding randomly colored, randomly sized boxes to observation)
        """

        self.cfg_path = str(cfg_path)
        if not os.path.exists(self.cfg_path):
            raise ValueError("Cfg file not found", cfg_path)

        if not self.cfg_path.endswith('.cfg'):
            raise ValueError("cfg_path must end with .cfg")

        self.number_maps = number_maps
        self.scaled_resolution = scaled_resolution
        self.action_frame_repeat = action_frame_repeat
        self.clip = clip
        self.data_augmentation = data_augmentation

        if seed:
            random.seed(seed)

        super(VizDoom, self).__init__()
        self._logger = logging.getLogger(__name__)
        self._logger.info("Creating environment: VizDoom (%s)", self.cfg_path)

        # Create an instace on VizDoom game, initalise it from a scenario config file
        self.env = DoomGame()
        self.env.load_config(self.cfg_path)
        self.env.init()

        # Perform config validation:
        # Only RGB format with a seperate channel per colour is supported
        # assert self.env.get_screen_format() == ScreenFormat.RGB24
        # Only discreete actions are supported (no delta actions)
        available_actions = self.env.get_available_buttons()
        not_supported_actions = [
            Button.LOOK_UP_DOWN_DELTA, Button.TURN_LEFT_RIGHT_DELTA,
            Button.MOVE_LEFT_RIGHT_DELTA, Button.MOVE_UP_DOWN_DELTA,
            Button.MOVE_FORWARD_BACKWARD_DELTA
        ]
        assert len((set(available_actions) -
                    set(not_supported_actions))) == len(available_actions)

        # Allow only one button to be pressed at a given step
        self.action_space = gym.spaces.Discrete(
            self.env.get_available_buttons_size())

        rows = scaled_resolution[1]
        columns = scaled_resolution[0]
        self.observation_space = gym.spaces.Box(0.0,
                                                255.0,
                                                shape=(columns, rows, 3),
                                                dtype=np.float32)
        self._rgb_array = None
        self.reset()

    def _process_image(self, shape=None):
        """
        Convert the vizdoom environment observation numpy are into the desired resolution and shape
        :param shape: desired shape in the format (rows, columns)
        :return: resized and rescaled image in the format (rows, columns, channels)
        """
        if shape is None:
            rows, columns, _ = self.observation_space.shape
        else:
            rows, columns = shape
        # PIL resize has indexing opposite to numpy array
        img = VizDoom._resize(self._rgb_array.transpose(1, 2, 0),
                              (columns, rows))
        return img

    @staticmethod
    def _augment_data(img):
        """
        Augment input image with N randomly colored boxes of dimension x by y
        where N is randomly sampled between 0 and 6
        and x and y are randomly sampled from between 0.1 and 0.35
        :param img: input image to be augmented - format (rows, columns, channels)
        :return img: augmented image - format (rows, columns, channels)
        """
        dimx = img.shape[0]
        dimy = img.shape[1]
        max_rand_dim = .25
        min_rand_dim = .1
        num_blotches = np.random.randint(0, 6)

        for _ in range(num_blotches):
            # locations in [0,1]
            rand = np.random.rand
            rx = rand()
            ry = rand()
            rdx = rand() * max_rand_dim + min_rand_dim
            rdy = rand() * max_rand_dim + min_rand_dim

            rx, rdx = [round(r * dimx) for r in (rx, rdx)]
            ry, rdy = [round(r * dimy) for r in (ry, rdy)]
            for c in range(3):
                img[rx:rx + rdx, ry:ry + rdy, c] = np.random.randint(0, 255)
        return img

    @staticmethod
    def _resize(img, shape):
        """Resize the specified image.
        :param img: image to resize
        :param shape: desired shape in the format (rows, columns)
        :return: resized image
        """
        if not (OPENCV_AVAILABLE or PILLOW_AVAILABLE):
            raise ValueError('No image library backend found.'
                             ' Install either '
                             'OpenCV or Pillow to support image processing.')

        if OPENCV_AVAILABLE:
            return cv2.resize(img, shape, interpolation=cv2.INTER_AREA)

        if PILLOW_AVAILABLE:
            return np.array(PIL.Image.fromarray(img).resize(shape))

        raise NotImplementedError

    def reset(self):
        """
        Resets environment to start a new mission.
        If there is more than one maze it will randomly select a new maze.
        :return: initial observation of the environment as an rgb array in the format (rows, columns, channels)
        """
        if self.number_maps is not 0:
            self.doom_map = random.choice(
                ["map" + str(i).zfill(2) for i in range(self.number_maps)])
            self.env.set_doom_map(self.doom_map)
        self.env.new_episode()
        self._rgb_array = self.env.get_state().screen_buffer
        observation = self._process_image()
        return observation

    def get_target_idx(self):
        return int(self.env.get_game_variable(GameVariable.USER5))

    def step(self, action):
        """Perform the specified action for the self.action_frame_repeat ticks within the environment.
        :param action: the index of the action to perform. The actions are specified when the cfg is created. The
        defaults are "MOVE_FORWARD TURN_LEFT TURN_RIGHT"
        :return: tuple following the gym interface, containing:
            - observation as a numpy array of shape (rows, height, channels)
            - scalar clipped reward
            - boolean which is true when the environment is done
            - {}
        """
        one_hot_action = np.zeros(self.action_space.n, dtype=int)
        one_hot_action[action] = 1

        reward = self.env.make_action(list(one_hot_action),
                                      self.action_frame_repeat)
        done = self.env.is_episode_finished()
        # state is available only if the episode is still running
        if not done:
            self._rgb_array = self.env.get_state().screen_buffer
        observation = self._process_image()

        if self.data_augmentation:
            observation = VizDoom._augment_data(observation)

        if self.clip:
            reward = np.clip(reward, self.clip[0], self.clip[1])

        return observation, reward, done, {}

    def step_record(self, action, record_path, record_shape=(120, 140)):
        """Perform the specified action for the self.action_frame_repeat ticks within the environment.
        :param action: the index of the action to perform. The actions are specified when the cfg is created. The
        defaults are "MOVE_FORWARD TURN_LEFT TURN_RIGHT"
        :param record_path: the path to save the image of the environment to
        :param record_shape: the shape of the image to save
        :return: tuple following the gym interface, containing:
            - observation as a numpy array of shape (rows, height, channels)
            - scalar clipped reward
            - boolean which is true when the environment is done
            - {}
        """
        one_hot_action = np.zeros(self.action_space.n, dtype=int)
        one_hot_action[action] = 1

        reward = 0
        for _ in range(self.action_frame_repeat // 2):
            reward += self.env.make_action(list(one_hot_action), 2)
            env_state = self.env.get_state()
            if env_state:
                self._rgb_array = self.env.get_state().screen_buffer
                imageio.imwrite(
                    os.path.join(record_path,
                                 str(datetime.datetime.now()) + ".png"),
                    self._process_image(record_shape))

        done = self.env.is_episode_finished()
        # state is available only if the episode is still running
        if not done:
            self._rgb_array = self.env.get_state().screen_buffer
        observation = self._process_image()

        if self.clip:
            reward = np.clip(reward, self.clip[0], self.clip[1])

        return observation, reward, done, {}

    def close(self):
        """Close environment"""
        self.env.close()

    def render(self, mode='rgb_array'):
        """Render frame"""
        if mode == 'rgb_array':
            return self._rgb_array

        raise NotImplementedError

    def create_env(self):
        """
        Returns a function to create an environment with the generated mazes.
        Used for vectorising the environment. For example as used by Stable Baselines
        :return: a function to create an environment with the generated mazes
        """
        return lambda: VizDoom(self.cfg_path,
                               number_maps=self.number_maps,
                               scaled_resolution=self.scaled_resolution,
                               action_frame_repeat=self.action_frame_repeat)
示例#10
0
def train(conf):
    #to get total time of training
    start_time = time.time()  

    game = DoomGame()
    game.load_config("VizDoom/scenarios/defend_the_center.cfg")
    game.set_sound_enabled(True)
    game.set_screen_resolution(ScreenResolution.RES_640X480)
    game.set_window_visible(False)
    game.set_living_reward(0.1)
    game.init()

    game.new_episode()
    game_state = game.get_state()
    misc = game_state.game_variables  # [KILLCOUNT, AMMO, HEALTH]
    prev_misc = misc

    action_size = game.get_available_buttons_size()

    agent = RandomAgent(action_size, conf)

    episode = conf.episode


    # Start training
    GAME = 0
    t = 0
    max_life = 0 # Maximum episode life (Proxy for agent performance)
    life = 0

    scores, episodes, steps, kills, ammos = [], [], [], [], []
    step = 0
    episode = conf.episode
    e = 0
    score = 0

    while e < episode:
        loss = 0
        Q_max = 0
        r_t = 0
        a_t = np.zeros([action_size])
        action_idx = agent.select_action()

        a_t[action_idx] = 1
        a_t = a_t.astype(int)

        r_t = game.make_action(a_t.tolist(), 4)

        game_state = game.get_state()  # Observe again after we take the action
        is_terminated = game.is_episode_finished()

        score += r_t
        step += 1

        if (is_terminated):
            if (life > max_life):
                max_life = life
            GAME += 1
            kills.append(misc[0])
            ammos.append(misc[1])
            print ("Episode Finish ", misc)
            # print(scores)
            game.new_episode()
            game_state = game.get_state()
            misc = game_state.game_variables
            x_t1 = game_state.screen_buffer

            scores.append(score)
            score = 0
            steps.append(step)
            episodes.append(e)
            e += 1

        misc = game_state.game_variables
        r_t = agent.shape_reward(r_t, misc, prev_misc, t)

        if (is_terminated):
            life = 0
        else:
            life += 1

        # Update the cache
        prev_misc = misc

        t += 1

    total_time = time.time() - start_time

    return steps, scores, total_time, kills, ammos

    # return steps, returns, total_time
示例#11
0
class VizDoomGym(gym.Env):
    """
    Wraps a VizDoom environment
    """
    def __init__(self):
        raise NotImplementedError

    def _init(self, mission_file: str, scaled_resolution: list):
        """
        :param mission_file: name of the mission (.cfg) to run,
        :param scaled_resolution: resolution (height, width) of the video frames
                                  to run training on
        """
        super(VizDoomGym, self).__init__()
        self.mission_file = mission_file
        self._logger = logging.getLogger(__name__)
        self._logger.info("Creating environment: VizDoom (%s)",
                          self.mission_file)

        self.deathmatch = True
        # distance we need the agent to travel per time-step, otherwise we penalise
        self.distance_threshold = 15

        self.prev_properties = None
        self.properties = None

        self.cum_kills = np.array([0])

        # Create an instace on VizDoom game, initalise it from a scenario config file
        self.env = DoomGame()
        self.env.load_config(self.mission_file)
        self.env.set_window_visible(False)
        self.env.set_screen_format(ScreenFormat.RGB24)
        if self.deathmatch:
            self.env.add_game_args("-deathmatch")

        self.env.set_doom_skill(4)
        self._action_frame_repeat = 4
        self.env.init()

        # Perform config validation:
        # Only RGB format with a seperate channel per colour is supported
        assert self.env.get_screen_format() == ScreenFormat.RGB24
        # Only discrete actions are supported (no delta actions)
        self.available_actions = self.env.get_available_buttons()
        not_supported_actions = [
            Button.LOOK_UP_DOWN_DELTA, Button.TURN_LEFT_RIGHT_DELTA,
            Button.MOVE_LEFT_RIGHT_DELTA, Button.MOVE_UP_DOWN_DELTA,
            Button.MOVE_FORWARD_BACKWARD_DELTA
        ]
        # print(available_actions)
        assert len((set(self.available_actions) - set(not_supported_actions))) \
            == len(self.available_actions)

        self.metadata['render_modes'] = ['rgb_array']

        # Allow only one button to be pressed at a given step
        self.action_space = gym.spaces.Discrete(
            self.env.get_available_buttons_size() - 1)

        self.rows = scaled_resolution[0]
        self.columns = scaled_resolution[1]
        self.observation_space = gym.spaces.Box(low=0.0,
                                                high=1.0,
                                                shape=(self.rows, self.columns,
                                                       3),
                                                dtype=np.float32)

        self._rgb_array = None
        self.steps = 0
        self.global_steps = 0
        self.reset()

    def _process_image(self, img):
        # PIL resize has indexing opposite to numpy array
        img = np.array(Image.fromarray(img).resize((self.columns, self.rows)))
        img = img.astype(np.float32)
        img = img / 255.0
        return img

    def update_game_variables(self):
        """
        Check and update game variables.
        """
        # read game variables
        new_v = {
            k: self.env.get_game_variable(v)
            for k, v in game_variables.items()
        }
        assert all(v.is_integer() or k[-2:] in ['_x', '_y', '_z']
                   for k, v in new_v.items())
        new_v = {
            k: (int(v) if v.is_integer() else float(v))
            for k, v in new_v.items()
        }
        health = new_v['health']
        armor = new_v['armor']

        # check game variables
        assert 0 <= health <= 200 or health < 0 and self.env.is_player_dead()
        assert 0 <= armor <= 200, (health, armor)

        # update actor properties
        self.prev_properties = self.properties
        self.properties = new_v

    def update_reward(self):
        """
        Update reward.
        """

        # we need to know the current and previous properties
        assert self.prev_properties is not None and self.properties is not None

        reward = 0

        # kill
        d = self.properties['score'] - self.prev_properties['score']
        if d > 0:
            self.cum_kills += d
            reward += d * default_reward_values['KILL']

        # death
        if self.env.is_player_dead():
            reward += default_reward_values['DEATH']

        # suicide
        if self.properties['frag_count'] < self.prev_properties['frag_count']:
            reward += default_reward_values['SUICIDE']

        # found / lost health
        d = self.properties['health'] - self.prev_properties['health']
        if d != 0:
            if d > 0:
                reward += default_reward_values['MEDIKIT']
            else:
                reward += default_reward_values['INJURED']

        # found / lost armor
        d = self.properties['armor'] - self.prev_properties['armor']
        if d != 0:
            if d > 0:
                reward += default_reward_values['ARMOR']

        # found / lost ammo
        d = self.properties['sel_ammo'] - self.prev_properties['sel_ammo']
        if d != 0:
            if d > 0:
                reward += default_reward_values['AMMO']
            else:
                reward += default_reward_values['USE_AMMO']

        # distance
        # turn_left = (Button.TURN_LEFT == self.available_actions[action])
        # turn_right = (Button.TURN_RIGHT == self.available_actions[action])
        # if not (turn_left or turn_right):
        diff_x = self.properties['position_x'] - self.prev_properties[
            'position_x']
        diff_y = self.properties['position_y'] - self.prev_properties[
            'position_y']
        distance = np.sqrt(diff_x**2 + diff_y**2)
        if distance > self.distance_threshold:
            reward += default_reward_values['DISTANCE'] * distance
        else:
            reward += default_reward_values['STANDSTILL']

        # living
        reward += default_reward_values['LIVING']

        return reward

    # def increase_difficulty(self):
    #     self.curr_skill += 1
    #     self.env.close()
    #     self.env.set_doom_skill(self.curr_skill)
    #     self.env.init()
    #     print('changing skill to', self.curr_skill)

    # def update_map(self):
    #     self.map_level += 1
    #     map_str = 'map0' + str(self.map_level)
    #     # go with initial wad file if there's still maps on it
    #     self.env.close()
    #     self.env.set_doom_map(map_str)
    #     self.env.init()

    def sub_reset(self):
        """Reset environment"""
        self.steps = 0
        self.cum_kills = np.array([0])
        self.prev_properties = None
        self.properties = None
        self.env.new_episode()
        self._rgb_array = self.env.get_state().screen_buffer
        observation = self._process_image(self._rgb_array)
        return observation

    def reset(self):
        observation = self.sub_reset()
        return observation

    def sub_step(self, action):
        """Take step"""
        one_hot_action = np.zeros(self.action_space.n, dtype=int)
        one_hot_action[action] = 1

        # ALWAYS SPRINTING
        one_hot_action = np.append(one_hot_action, [1])
        assert len(one_hot_action) == len(self.env.get_available_buttons())

        _ = self.env.make_action(list(one_hot_action),
                                 self._action_frame_repeat)

        self.update_game_variables()

        if self.steps > 1:
            reward = self.update_reward()
        else:
            reward = 0

        self.steps += 1
        self.global_steps += 1
        done = self.env.is_episode_finished()
        # state is available only if the episode is still running
        if not done:
            self._rgb_array = self.env.get_state().screen_buffer
        observation = self._process_image(self._rgb_array)
        return observation, reward, done

    def step(self, action):
        observation, reward, done = self.sub_step(action)
        return observation, reward, done, {}

    def close(self):
        """Close environment"""
        self.env.close()

    def seed(self, seed=None):
        """Seed"""
        if seed:
            self.env.set_seed(seed)

    def render(self, mode='human'):
        """Render frame"""
        if mode == 'rgb_array':
            return self._rgb_array
        raise NotImplementedError