예제 #1
2
 def _setup_game(scenario: str, visible: bool):
     game = DoomGame()
     game.load_config('data/vizdoom/%s.cfg' % scenario)
     game.set_doom_scenario_path('data/vizdoom/%s.wad' % scenario)
     game.set_window_visible(visible)
     game.set_sound_enabled(False)
     game.init()
     return game
예제 #2
2
def create_environment():
    game = DoomGame()
    game.load_config("basic.cfg")
    game.set_doom_scenario_path("basic.wad")
    game.init()

    left = [1, 0, 0]
    right = [0, 1, 0]
    shoot = [0, 0, 1]
    possible_actions = [left, right, shoot]
    return game, possible_actions
예제 #3
1
파일: deep_q.py 프로젝트: bmwant/solenie
def test_environment():
    game = DoomGame()
    # https://github.com/simoninithomas/Deep_reinforcement_learning_Course/blob/master/Deep%20Q%20Learning/Doom/basic.cfg
    game.load_config('basic.cfg')
    game.set_doom_scenario_path('basic.wad')
    game.init()
    shoot = [0, 0, 1]
    left = [1, 0, 0]
    right = [0, 1, 0]
    actions = [shoot, left, right]

    episodes = 10
    for i in range(episodes):
        game.new_episode()
        while not game.is_episode_finished():
            state = game.get_state()
            img = state.screen_buffer
            misc = state.game_variables
            action = random.choice(actions)
            print('Action', action)
            reward = game.make_action(action)
            print('Reward', reward)
            time.sleep(0.02)
        print('Result', game.get_total_reward())
        time.sleep(2)
    game.close()
예제 #4
1
def create_environment():
    game = DoomGame()
    game.load_config('defend_the_center.cfg')
    game.set_doom_scenario_path('defend_the_center.wad')

    game.init()
    possible_actions = np.identity(3, dtype=int).tolist()
    return game, possible_actions
예제 #5
1
파일: doom_env.py 프로젝트: xjwxjw/async-rl
    def __init__(self,
                 vizdoom_dir=os.path.expanduser('~/ViZDoom'),
                 window_visible=True,
                 scenario='basic',
                 skipcount=10,
                 resolution_width=640,
                 sleep=0.0,
                 seed=None):

        self.skipcount = skipcount
        self.sleep = sleep

        sys.path.append(os.path.join(vizdoom_dir, "examples/python"))
        from vizdoom import DoomGame
        from vizdoom import ScreenFormat
        from vizdoom import ScreenResolution

        game = DoomGame()

        if seed is not None:
            assert seed >= 0 and seed < 2 ** 16, \
                "ViZDoom's random seed must be represented by unsigned int"
        else:
            # Use numpy's random state
            seed = np.random.randint(0, 2**16)
        game.set_seed(seed)

        # Load a config file
        game.load_config(
            os.path.join(vizdoom_dir, "examples", 'config', scenario + '.cfg'))

        # Replace default relative paths with actual paths
        game.set_vizdoom_path(os.path.join(vizdoom_dir, "bin/vizdoom"))
        game.set_doom_game_path(
            os.path.join(vizdoom_dir, 'scenarios/freedoom2.wad'))
        game.set_doom_scenario_path(
            os.path.join(vizdoom_dir, 'scenarios', scenario + '.wad'))

        # Set screen settings
        resolutions = {
            640: ScreenResolution.RES_640X480,
            320: ScreenResolution.RES_320X240,
            160: ScreenResolution.RES_160X120
        }
        game.set_screen_resolution(resolutions[resolution_width])
        game.set_screen_format(ScreenFormat.RGB24)
        game.set_window_visible(window_visible)
        game.set_sound_enabled(window_visible)

        game.init()
        self.game = game

        # Use one-hot actions
        self.n_actions = game.get_available_buttons_size()
        self.actions = []
        for i in range(self.n_actions):
            self.actions.append([i == j for j in range(self.n_actions)])
예제 #6
1
파일: deep_q.py 프로젝트: bmwant/solenie
def create_environment(episode_render=True):
    game = DoomGame()
    game.load_config('basic.cfg')

    game.set_doom_scenario_path('basic.wad')
    game.set_window_visible(episode_render)
    game.init()

    left = [1, 0, 0]
    right = [0, 1, 0]
    shoot = [0, 0, 1]
    possible_actions = [left, right, shoot]

    return game, possible_actions
예제 #7
1
파일: doom_env.py 프로젝트: BlGene/async-rl
    def __init__(self, vizdoom_dir=os.path.expanduser('~/ViZDoom'),
                 window_visible=True, scenario='basic', skipcount=10,
                 resolution_width=640, sleep=0.0, seed=None):

        self.skipcount = skipcount
        self.sleep = sleep

        sys.path.append(os.path.join(vizdoom_dir, "examples/python"))
        from vizdoom import DoomGame
        from vizdoom import ScreenFormat
        from vizdoom import ScreenResolution

        game = DoomGame()

        if seed is not None:
            assert seed >= 0 and seed < 2 ** 16, \
                "ViZDoom's random seed must be represented by unsigned int"
        else:
            # Use numpy's random state
            seed = np.random.randint(0, 2 ** 16)
        game.set_seed(seed)

        # Load a config file
        game.load_config(os.path.join(
            vizdoom_dir, "examples", 'config', scenario + '.cfg'))

        # Replace default relative paths with actual paths
        game.set_vizdoom_path(os.path.join(vizdoom_dir, "bin/vizdoom"))
        game.set_doom_game_path(
            os.path.join(vizdoom_dir, 'scenarios/freedoom2.wad'))
        game.set_doom_scenario_path(
            os.path.join(vizdoom_dir, 'scenarios', scenario + '.wad'))

        # Set screen settings
        resolutions = {640: ScreenResolution.RES_640X480,
                       320: ScreenResolution.RES_320X240,
                       160: ScreenResolution.RES_160X120}
        game.set_screen_resolution(resolutions[resolution_width])
        game.set_screen_format(ScreenFormat.RGB24)
        game.set_window_visible(window_visible)
        game.set_sound_enabled(window_visible)

        game.init()
        self.game = game

        # Use one-hot actions
        self.n_actions = game.get_available_buttons_size()
        self.actions = []
        for i in range(self.n_actions):
            self.actions.append([i == j for j in range(self.n_actions)])
예제 #8
1
class Scenario:
    """ Extend this abstract scenario class to define new VizDoom scenarios """

    class PerformanceIndicator(Enum):
        FRAMES_ALIVE = auto()
        KILL_COUNT = auto()

    class Task(Enum):
        DEFAULT = auto()

    def __init__(self,
                 name: str,
                 root_dir: str,
                 task: str,
                 trained_task: str,
                 window_visible: bool,
                 n_tasks: int,
                 render_hud: bool,
                 name_addition: str,
                 sound_enabled = False,
                 variable_history_size = 5,
                 screen_resolution = ScreenResolution.RES_640X480
                 ):
        self.variable_history_size = variable_history_size

        # Naming
        self.name = name
        self.root_dir = root_dir
        self.name_addition = name_addition

        # Tasks
        self.n_tasks = n_tasks
        self.task = task.lower()
        self.trained_task = trained_task

        # VizDoom
        self.game = DoomGame()
        self.game.load_config(self.config_path)
        self.game.set_doom_scenario_path(self.scenario_path)
        self.game.set_sound_enabled(sound_enabled)
        self.game.set_window_visible(window_visible)
        self.game.set_screen_resolution(screen_resolution)
        self.game.set_render_hud(render_hud)

        # Include the available tasks to the enum
        for task in self.task_list:
            extend_enum(Scenario.Task, task, auto())

    def get_stats_path(self, model_name) -> str:
        if self.trained_task:
            sub_folder = f'test/{self.task}/{model_name}'
        elif self.n_tasks > 1:
            sub_folder = f'multi/{self.name_addition}/{self.task}' if self.name_addition else f'multi/{self.task}'
        else:
            sub_folder = f'train/{self.task}{self.name_addition}'
        return f'{self.root_dir}/statistics/{self.name}/{sub_folder}.json'

    @property
    def config_path(self) -> str:
        """ Path to the configuration file of this scenario """
        return f'{self.root_dir}/scenarios/{self.name}/{self.name}.cfg'

    @property
    def scenario_path(self) -> str:
        """ Path to the IWAD file of the designated task for this scenario """
        return f'{self.root_dir}/scenarios/{self.name}/{self.task}.wad'

    @property
    def statistics_fields(self) -> List[str]:
        """ Default metrics that will be included in the rolling statistics """
        return ['frames_alive', 'duration', 'reward']

    @property
    def task_list(self) -> List[str]:
        """ List of the available tasks for the given scenario """
        raise NotImplementedError

    @property
    def n_spawn_points(self) -> int:
        """ Number of points the agents can spawn at for the given scenario """
        raise NotImplementedError

    def shape_reward(self, reward: float, game_vars: deque) -> float:
        """
        Override this method to include scenario specific reward shaping
        :param reward: The reward from the previous iteration of the game
        :param game_vars: Game variables of the last [variable_history_size] episodes
        """
        return reward

    def additional_stats(self, game_vars: deque) -> Dict:
        """
        Implement this method to provide extra scenario specific statistics
        :param game_vars: Game variables of the last [variable_history_size] episodes
        :return: Dictionary of additional statistics
        """
        return {}

    def get_measurements(self, game_variables: deque, terminated: bool) -> np.ndarray:
        """
        Retrieve the measurement after transition for the direct future prediction algorithm
        :param game_variables: Game variables of the last [variable_history_size] iterations
        :param terminated: Indicator of whether the episode has terminated this iteration
        :return: The relevant measurements of the corresponding scenario
        """
        raise NotImplementedError

    def get_performance_indicator(self) -> PerformanceIndicator:
        """
        Every scenario has a key performance indicator (KPI) to determine the running performance
        of the agent. This is used for dynamic task prioritization, where the loss is scaled
        proportional to the task difficulty. This indicator determines the complexity of the task.
        :return: The type of the performance indicator of the implementing scenario
        """
        raise NotImplementedError
 def _vizdoom_setup(self, wad):
   game = DoomGame()
   game.load_config(DEFAULT_CONFIG)
   game.set_doom_scenario_path(wad)
   game.init()
   self.game = game
예제 #10
0
def doom_game():

  game = DoomGame()
  #game.load_config("../scenarios/basic.cfg") 
  game.load_config("../scenarios/defend_the_center.cfg")
  #game.set_doom_map("map01")
  game.set_screen_resolution(ScreenResolution.RES_320X240)
  #game.set_screen_resolution(ScreenResolution.RES_640X480)
  game.set_render_hud(False)
  game.set_render_crosshair(False)
  game.set_render_weapon(True)
  game.set_render_decals(False)
  game.set_render_particles(False)
  #game.add_available_button(Button.MOVE_LEFT)
  #game.add_available_button(Button.MOVE_RIGHT)
  game.add_available_button(Button.TURN_LEFT)
  game.add_available_button(Button.TURN_RIGHT)
  game.add_available_button(Button.ATTACK)
  game.set_episode_timeout(2100)
  game.set_episode_start_time(10)
  game.set_window_visible(True) #False)
  game.set_sound_enabled(False)
  game.set_living_reward(0.2) # -1 for basic
  game.set_mode(Mode.PLAYER)
  game.init()
  return game
예제 #11
0
    def __init__(self,
                 state_size=(64, 64, 4),
                 scenario='defend_the_center.cfg',
                 record_episode=False):
        game = DoomGame()
        path_to_scenario = os.path.join(current_dir, scenario)
        game.load_config(path_to_scenario)
        game.set_sound_enabled(True)
        game.set_screen_resolution(ScreenResolution.RES_640X480)
        game.set_window_visible(False)
        game.set_available_game_variables(
            [GameVariable.KILLCOUNT, GameVariable.AMMO2, GameVariable.HEALTH])
        game.init()
        self.game = game

        self.skiprate = 4

        self.state = None
        self.state_size = state_size
        self.action_size = self.game.get_available_buttons_size()

        self.steps = 0
        self.life = deque(maxlen=30)
        self.kills = deque(maxlen=30)

        self.record_episode = record_episode
        self.game_rec = []
 def configure_doom(config_name, episode_timeout):
     game = DoomGame()
     game.load_config(config_name)
     game.set_window_visible(False)
     game.set_labels_buffer_enabled(True)
     game.set_episode_timeout(episode_timeout)
     game.init()
     return game
예제 #13
0
class DoomEnvironment:
    def __init__(self, config, visible, skiprate):
        self._game = DoomGame()
        self._game.load_config(config)
        self._game.set_window_visible(visible)
        self._game.set_mode(Mode.PLAYER)
        self._game.init()

        n_actions = self._game.get_available_buttons_size()
        self._actions = [list(a) for a in it.product([0, 1], repeat=n_actions)]
        self._skiprate = skiprate

    def make_visible(self):
        self._game.close()
        self._game.set_window_visible(True)
        self._game.set_mode(Mode.ASYNC_PLAYER)
        self._game.init()

    def get_n_buttons(self):
        return self._game.get_available_buttons_size()

    def observe(self):
        observation = self._game.get_state()
        screen = observation.screen_buffer
        game_variables = observation.game_variables
        return screen, game_variables

    def step(self, action_id):
        """Takes id of single action and performs it for self.skiprate frames

        :param action_id: index of action to perform
        :return: reward, is_done
        """
        reward = self._game.make_action(self._actions[action_id],
                                        self._skiprate)
        return reward, self._game.is_episode_finished()

    def advance_action_step(self, action_id):
        """Takes id of single action and performs it for self.skiprate frames
        and renders every frame

        :param action_id: index of action to perform
        :return: is_done
        """
        reward = 0.0
        for _ in range(self._skiprate):
            reward += self._game.make_action(self._actions[action_id])
            # it is vital to break if done for correct reward shaping
            if self._game.is_episode_finished():
                break
        return reward, self._game.is_episode_finished()

    def reset(self):
        self._game.new_episode()

    def get_episode_reward(self):
        """Careful! Returns ___non-shaped___ episode reward"""
        return self._game.get_total_reward()
def initialize_vizdoom(config):
    game = DoomGame()
    game.load_config(config)
    game.set_window_visible(False)
    game.set_mode(Mode.PLAYER)
    game.set_screen_format(ScreenFormat.GRAY8)
    game.set_screen_resolution(ScreenResolution.RES_640X480)
    game.init()
    return game
예제 #15
0
def initGameWithParams(configFilePath):
    game = DoomGame()
    game.load_config(configFilePath)
    game.set_window_visible(False)
    game.set_mode(Mode.PLAYER)
    game.set_screen_format(ScreenFormat.CRCGCB)
    game.set_screen_resolution(ScreenResolution.RES_400X225)
    game.init()
    return game
예제 #16
0
 def __init__(self, cfg, name='Player1', color='0', map='map01'):
     game = DoomGame()
     game_args = ""
     game_args += " -name %s" % name
     game_args += " -colorset %s" % color
     game.add_game_args(game_args)
     game.load_config(cfg)
     game.set_death_penalty(1)
     game.set_doom_map(map)
     self.env = game
     self.observation_space = spaces.Box(0, 255, game.get_screen_size())
     self.action_space = spaces.Discrete(game.get_available_buttons_size())
     self.reward_range = None
예제 #17
0
def initialize_vizdoom(CONFIG_FILE_PATH, seed):
    print("Initializing doom...")
    game = DoomGame()
    game.load_config(CONFIG_FILE_PATH)
    game.set_window_visible(True)
    game.set_mode(Mode.PLAYER)
    game.set_screen_format(ScreenFormat.GRAY8)
    game.set_screen_resolution(ScreenResolution.RES_640X480)
    game.set_labels_buffer_enabled(True)
    game.set_seed(seed)
    game.init()
    print("Doom initialized.")
    return game
예제 #18
0
 def __init__(self,
              cfg,
              name='Player1',
              color='0',
              host=True,
              map='map01',
              dm=True,
              port=None,
              num_players=7):
     game = DoomGame()
     game.load_config(cfg)
     game_args = ""
     if host:
         # This machine will function as a host for a multiplayer game with this many
         # players (including this machine). It will wait for other machines to connect using
         # the -join parameter and then start the game when everyone is connected.
         game_args += "-host %s " % num_players
         # The game (episode) will end after this many minutes have elapsed.
         game_args += "+timelimit 10.0 "
         # Players will respawn automatically after they die.
         game_args += "+sv_forcerespawn 1 "
         # Autoaim is disabled for all players.
         game_args += "+sv_noautoaim 1 "
         # Players will be invulnerable for two second after spawning.
         game_args += "+sv_respawnprotect 1 "
         # Players will be spawned as far as possible from any other players.
         game_args += "+sv_spawnfarthest 1 "
         # Disables crouching.
         game_args += "+sv_nocrouch 1 "
         # Sets delay between respanws (in seconds).
         game_args += "+viz_respawn_delay 10 "
         game_args += "+viz_nocheat 1"
         if dm:
             # Deathmatch rules are used for the game.
             game_args += " -deathmatch"
         if port is not None:
             game_args += " -port %s" % port
     else:
         game_args += " -join 127.0.0.1"
         if port is not None:
             game_args += ":%s" % port
     game_args += " -name %s" % name
     game_args += " -colorset %s" % color
     game.add_game_args(game_args)
     game.set_death_penalty(1)
     game.set_doom_map(map)
     self.env = game
     self.observation_space = spaces.Box(0, 255, game.get_screen_size())
     self.action_space = spaces.Discrete(game.get_available_buttons_size())
     self.reward_range = None
예제 #19
0
 def __init__(self, configuration):
     configuration = CONFIGURATIONS[configuration]
     game = DoomGame()
     game.load_config(
         os.path.join(CONFIGURATIONS_DIR, configuration + ".cfg"))
     game.set_screen_resolution(ScreenResolution.RES_160X120)
     game.set_window_visible(False)
     game.init()
     action_dim = game.get_available_buttons_size()
     action_space = AttrDict()
     action_space.low = [0 for i in range(action_dim)]
     action_space.high = [1 for i in range(action_dim)]
     self.action_space = action_space
     self.game = game
예제 #20
0
    def __init__(self,
                 emulator_id,
                 game,
                 resource_folder,
                 gray=False,
                 reward_coef=1 / 100,
                 action_repeat=6,
                 history_window=1,
                 screen_size=(60, 90),
                 verbose=0,
                 visualize=False,
                 **unknown):
        if verbose >= 2:
            logging.debug('Initializing Vizdoom.{}. emulator_id={}'.format(
                game, emulator_id))
            logging.debug('Emulator#{} received unknown args: {}'.format(
                emulator_id, unknown))
        doom_game = DoomGame()
        config_file_path = join_path(resource_folder, game + '.cfg')
        doom_game.load_config(config_file_path)
        doom_game.set_window_visible(visualize)
        doom_game.set_screen_resolution(self.SCREEN_RESOLUTION)
        doom_game.set_screen_format(
            ScreenFormat.GRAY8 if gray else ScreenFormat.BGR24)
        doom_game.set_mode(self.MODE)
        if self.MODE == Mode.SPECTATOR:
            doom_game.add_game_args("+freelook 1")

        # with a fixed seed all episodes in this environment will be identical
        #doom_game.set_seed(args.random_seed)
        # doom_game.add_available_game_variable(vizdoom.GameVariable.AMMO2)
        doom_game.init()
        self.game = doom_game
        self.legal_actions, self.noop = self._define_actions(self.game)
        self._preprocess = cv2_resize
        self.screen_size = screen_size
        self.reward_coef = reward_coef
        self.action_repeat = action_repeat
        self.history_window = history_window

        num_channels = doom_game.get_screen_channels()
        self.observation_shape = (self.history_window *
                                  num_channels, ) + self.screen_size

        self.history = create_history_observation(self.history_window)
        # If episode is done WorkerProcess drops last returned state and
        #  returns the initial state of a new episode.
        # Therefore it doesn't really matter that terminal_screen is None
        self.terminal_obs = None
예제 #21
0
class VizDoomEnv(gym.Env):

  def __init__(self, config='my_way_home.cfg', repeat_action=1, render=False):
    self._game = DoomGame()
    self._game.load_config(config)
    self._game.set_mode(Mode.PLAYER)
    self._game.set_screen_format(ScreenFormat.GRAY8)
    self._game.set_screen_resolution(ScreenResolution.RES_640X480)
    self._game.set_window_visible(render)
    self._game.init()
    self._actions = self._get_actions()
    self._repeat_action = repeat_action
    self._is_rendered = False

  def _get_actions(self):
    num_actions = self._game.get_available_buttons_size()
    actions = []
    for perm in itertools.product([False, True], repeat=num_actions):
      actions.append(list(perm))
    return actions

  def _get_observation(self):
    state = self._game.get_state()
    if state is not None:
      return state.screen_buffer
    return None

  def _get_terminal(self):
    return self._game.is_episode_finished()

  def reset(self):
    self._game.new_episode()
    return self._get_observation()

  def step(self, action):
    action_ = self._actions[action]
    reward = self._game.make_action(action_, self._repeat_action)
    return self._get_observation(), reward, self._get_terminal(), []

  def render(self, mode='human'):
    self._game.set_window_visible(True)

  def close(self):
    self._game.close()
예제 #22
0
파일: dfp.py 프로젝트: kaiolae/dfp_banana
    # save the model which is under training
    def save_model(self, name):
        self.model.save_weights(name)


if __name__ == "__main__":

    # Avoid Tensorflow eats up GPU memory
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    K.set_session(sess)

    game = DoomGame()
    game.load_config("../../scenarios/health_gathering.cfg")
    game.set_sound_enabled(True)
    game.set_screen_resolution(ScreenResolution.RES_640X480)
    game.set_window_visible(False)
    game.init()

    game.new_episode()
    game_state = game.get_state()
    misc = game_state.game_variables  # [Health]
    prev_misc = misc

    action_size = game.get_available_buttons_size() # [Turn Left, Turn Right, Move Forward]
    measurement_size = 3 # [Health, Medkit, Poison]
    timesteps = [1,2,4,8,16,32]
    goal_size = measurement_size * len(timesteps)
예제 #23
0
    def load_model(self, name):
        self.actor.load_weights(name + "_actor.h5", overwrite=True)
        self.critic.load_weights(name + "_critic.h5", overwrite=True)


if __name__ == "__main__":

    # Avoid Tensorflow eats up GPU memory
    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.compat.v1.Session(config=config)
    K.set_session(sess)

    game = DoomGame()
    game.load_config("../../scenarios/defend_the_center.cfg")
    game.set_sound_enabled(True)
    game.set_screen_resolution(ScreenResolution.RES_640X480)
    game.set_window_visible(False)
    game.init()

    # Maximum number of episodes
    max_episodes = 1000000

    game.new_episode()
    game_state = game.get_state()
    misc = game_state.game_variables  # [KILLCOUNT, AMMO, HEALTH]
    prev_misc = misc

    action_size = game.get_available_buttons_size()
예제 #24
0
파일: scenarios.py 프로젝트: Marqt/ViZDoom
from __future__ import print_function

import itertools as it
from random import choice
from time import sleep
from vizdoom import DoomGame, ScreenResolution

game = DoomGame()

# Choose scenario config file you wish to watch.
# Don't load two configs cause the second will overrite the first one.
# Multiple config files are ok but combining these ones doesn't make much sense.

# game.load_config("../../scenarios/basic.cfg")
# game.load_config("../../scenarios/simpler_basic.cfg")
game.load_config("../../scenarios/rocket_basic.cfg")
# game.load_config("../../scenarios/deadly_corridor.cfg")
# game.load_config("../../scenarios/deathmatch.cfg")
# game.load_config("../../scenarios/defend_the_center.cfg")
# game.load_config("../../scenarios/defend_the_line.cfg")
# game.load_config("../../scenarios/health_gathering.cfg")
# game.load_config("../../scenarios/my_way_home.cfg")
# game.load_config("../../scenarios/predict_position.cfg")
# game.load_config("../../scenarios/take_cover.cfg")

# Makes the screen bigger to see more details.
game.set_screen_resolution(ScreenResolution.RES_640X480)
game.set_window_visible(True)
game.init()

# Creates all possible actions depending on how many buttons there are.
#parser = argparse.ArgumentParser()
#parser.add_argument('title', type=str, help="title statistics file")

if __name__ == '__main__':

    title = sys.argv[1]
    #args = parser.parse_args(sys.argv[1:])

    # Avoid Tensorflow eats up GPU memory
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    K.set_session(sess)

    game = DoomGame()
    game.load_config("vizdoom/scenarios/health_gathering.cfg")
    game.set_sound_enabled(True)
    game.set_screen_resolution(ScreenResolution.RES_640X480)
    game.set_window_visible(False)
    game.init()

    game.new_episode()
    game_state = game.get_state()
    misc = game_state.game_variables  # [Health]
    prev_misc = misc

    action_size = game.get_available_buttons_size() # [Turn Left, Turn Right, Move Forward]
    measurement_size = 3 # [Health, Medkit, Poison]
    timesteps = [1,2,4,8,16,32]
    goal_size = measurement_size * len(timesteps)
예제 #26
0
class VizDoom(gym.Env):
    """
    Wraps a VizDoom environment
    """
    def __init__(self,
                 cfg_path,
                 number_maps,
                 scaled_resolution=(42, 42),
                 action_frame_repeat=4,
                 clip=(-1, 1),
                 seed=None,
                 data_augmentation=False):
        """
        Gym environment for training reinforcement learning agents.

        :param cfg_path: name of the mission (.cfg) to run
        :param number_maps: number of maps which are contained within the cfg file
        :param scaled_resolution: resolution (height, width) of the observation to be returned with each step
        :param action_frame_repeat: how many game tics should an action be active
        :param clip: how much the reward returned on each step should be clipped to
        :param seed: seed for random, used to determine the other that the doom maps should be shown.
        :param data_augmentation: bool to determine whether or not to use data augmentation
            (adding randomly colored, randomly sized boxes to observation)
        """

        self.cfg_path = str(cfg_path)
        if not os.path.exists(self.cfg_path):
            raise ValueError("Cfg file not found", cfg_path)

        if not self.cfg_path.endswith('.cfg'):
            raise ValueError("cfg_path must end with .cfg")

        self.number_maps = number_maps
        self.scaled_resolution = scaled_resolution
        self.action_frame_repeat = action_frame_repeat
        self.clip = clip
        self.data_augmentation = data_augmentation

        if seed:
            random.seed(seed)

        super(VizDoom, self).__init__()
        self._logger = logging.getLogger(__name__)
        self._logger.info("Creating environment: VizDoom (%s)", self.cfg_path)

        # Create an instace on VizDoom game, initalise it from a scenario config file
        self.env = DoomGame()
        self.env.load_config(self.cfg_path)
        self.env.init()

        # Perform config validation:
        # Only RGB format with a seperate channel per colour is supported
        # assert self.env.get_screen_format() == ScreenFormat.RGB24
        # Only discreete actions are supported (no delta actions)
        available_actions = self.env.get_available_buttons()
        not_supported_actions = [
            Button.LOOK_UP_DOWN_DELTA, Button.TURN_LEFT_RIGHT_DELTA,
            Button.MOVE_LEFT_RIGHT_DELTA, Button.MOVE_UP_DOWN_DELTA,
            Button.MOVE_FORWARD_BACKWARD_DELTA
        ]
        assert len((set(available_actions) -
                    set(not_supported_actions))) == len(available_actions)

        # Allow only one button to be pressed at a given step
        self.action_space = gym.spaces.Discrete(
            self.env.get_available_buttons_size())

        rows = scaled_resolution[1]
        columns = scaled_resolution[0]
        self.observation_space = gym.spaces.Box(0.0,
                                                255.0,
                                                shape=(columns, rows, 3),
                                                dtype=np.float32)
        self._rgb_array = None
        self.reset()

    def _process_image(self, shape=None):
        """
        Convert the vizdoom environment observation numpy are into the desired resolution and shape
        :param shape: desired shape in the format (rows, columns)
        :return: resized and rescaled image in the format (rows, columns, channels)
        """
        if shape is None:
            rows, columns, _ = self.observation_space.shape
        else:
            rows, columns = shape
        # PIL resize has indexing opposite to numpy array
        img = VizDoom._resize(self._rgb_array.transpose(1, 2, 0),
                              (columns, rows))
        return img

    @staticmethod
    def _augment_data(img):
        """
        Augment input image with N randomly colored boxes of dimension x by y
        where N is randomly sampled between 0 and 6
        and x and y are randomly sampled from between 0.1 and 0.35
        :param img: input image to be augmented - format (rows, columns, channels)
        :return img: augmented image - format (rows, columns, channels)
        """
        dimx = img.shape[0]
        dimy = img.shape[1]
        max_rand_dim = .25
        min_rand_dim = .1
        num_blotches = np.random.randint(0, 6)

        for _ in range(num_blotches):
            # locations in [0,1]
            rand = np.random.rand
            rx = rand()
            ry = rand()
            rdx = rand() * max_rand_dim + min_rand_dim
            rdy = rand() * max_rand_dim + min_rand_dim

            rx, rdx = [round(r * dimx) for r in (rx, rdx)]
            ry, rdy = [round(r * dimy) for r in (ry, rdy)]
            for c in range(3):
                img[rx:rx + rdx, ry:ry + rdy, c] = np.random.randint(0, 255)
        return img

    @staticmethod
    def _resize(img, shape):
        """Resize the specified image.

        :param img: image to resize
        :param shape: desired shape in the format (rows, columns)
        :return: resized image
        """
        if not (OPENCV_AVAILABLE or PILLOW_AVAILABLE):
            raise ValueError('No image library backend found.'
                             ' Install either '
                             'OpenCV or Pillow to support image processing.')

        if OPENCV_AVAILABLE:
            return cv2.resize(img, shape, interpolation=cv2.INTER_AREA)

        if PILLOW_AVAILABLE:
            return np.array(PIL.Image.fromarray(img).resize(shape))

        raise NotImplementedError

    def reset(self):
        """
        Resets environment to start a new mission.

        If there is more than one maze it will randomly select a new maze.

        :return: initial observation of the environment as an rgb array in the format (rows, columns, channels)
        """
        if self.number_maps is not 0:
            self.doom_map = random.choice(
                ["map" + str(i).zfill(2) for i in range(self.number_maps)])
            self.env.set_doom_map(self.doom_map)
        self.env.new_episode()
        self._rgb_array = self.env.get_state().screen_buffer
        observation = self._process_image()
        return observation

    def step(self, action):
        """Perform the specified action for the self.action_frame_repeat ticks within the environment.
        :param action: the index of the action to perform. The actions are specified when the cfg is created. The
        defaults are "MOVE_FORWARD TURN_LEFT TURN_RIGHT"
        :return: tuple following the gym interface, containing:
            - observation as a numpy array of shape (rows, height, channels)
            - scalar clipped reward
            - boolean which is true when the environment is done
            - {}
        """
        one_hot_action = np.zeros(self.action_space.n, dtype=int)
        one_hot_action[action] = 1

        reward = self.env.make_action(list(one_hot_action),
                                      self.action_frame_repeat)
        done = self.env.is_episode_finished()
        # state is available only if the episode is still running
        if not done:
            self._rgb_array = self.env.get_state().screen_buffer
        observation = self._process_image()

        if self.data_augmentation:
            observation = VizDoom._augment_data(observation)

        if self.clip:
            reward = np.clip(reward, self.clip[0], self.clip[1])

        return observation, reward, done, {}

    def step_record(self, action, record_path, record_shape=(120, 140)):
        """Perform the specified action for the self.action_frame_repeat ticks within the environment.
        :param action: the index of the action to perform. The actions are specified when the cfg is created. The
        defaults are "MOVE_FORWARD TURN_LEFT TURN_RIGHT"
        :param record_path: the path to save the image of the environment to
        :param record_shape: the shape of the image to save
        :return: tuple following the gym interface, containing:
            - observation as a numpy array of shape (rows, height, channels)
            - scalar clipped reward
            - boolean which is true when the environment is done
            - {}
        """
        one_hot_action = np.zeros(self.action_space.n, dtype=int)
        one_hot_action[action] = 1

        reward = 0
        for _ in range(self.action_frame_repeat // 2):
            reward += self.env.make_action(list(one_hot_action), 2)
            env_state = self.env.get_state()
            if env_state:
                self._rgb_array = self.env.get_state().screen_buffer
                imageio.imwrite(
                    os.path.join(record_path,
                                 str(datetime.datetime.now()) + ".png"),
                    self._process_image(record_shape))

        done = self.env.is_episode_finished()
        # state is available only if the episode is still running
        if not done:
            self._rgb_array = self.env.get_state().screen_buffer
        observation = self._process_image()

        if self.clip:
            reward = np.clip(reward, self.clip[0], self.clip[1])

        return observation, reward, done, {}

    def close(self):
        """Close environment"""
        self.env.close()

    def render(self, mode='rgb_array'):
        """Render frame"""
        if mode == 'rgb_array':
            return self._rgb_array

        raise NotImplementedError

    def create_env(self):
        """
        Returns a function to create an environment with the generated mazes.

        Used for vectorising the environment. For example as used by Stable Baselines

        :return: a function to create an environment with the generated mazes
        """
        return lambda: VizDoom(self.cfg_path,
                               number_maps=self.number_maps,
                               scaled_resolution=self.scaled_resolution,
                               action_frame_repeat=self.action_frame_repeat)
예제 #27
0
class DoomScenario:
    """
    DoomScenario class runs instances of Vizdoom according to scenario
    configuration (.cfg) files.

    Scenario Configuration files for this project are located in
    the /src/configs/ folder.

    """
    def __init__(self, config_filename):
        '''
        Method initiates Vizdoom with desired configuration file.

        '''
        self.config_filename = config_filename
        self.game = DoomGame()
        self.game.load_config("configs/" + config_filename)
        self.game.set_window_visible(False)
        self.game.init()

        self.res = (self.game.get_screen_height(),
                    self.game.get_screen_width())
        self.actions = [
            list(a)
            for a in it.product([0, 1],
                                repeat=self.game.get_available_buttons_size())
        ]

        self.pbar = None
        self.game.new_episode()

    def play(self, action, tics):
        '''
        Method advances state with desired action for a number of tics.

        '''
        self.game.set_action(action)
        self.game.advance_action(tics, True)
        if self.pbar: self.pbar.update(int(tics))

    def get_processed_state(self, depth_radius, depth_contrast):
        '''
        Method processes the Vizdoom RGB and depth buffer into
        a composite one channel image that can be used by the Models.

        depth_radius defines how far the depth buffer sees with 1.0 being
        as far as ViZDoom allows.

        depth_contrast defines how much of the depth buffer is in the final
        processed image as compared to the greyscaled RGB buffer.
        **processed = (1-depth_contrast)* grey_buffer + depth_contrast*depth_buffer

        '''
        state = self.game.get_state()
        if not self.game.is_episode_finished():
            img = state.screen_buffer  # screen pixels
            # print(img)
            screen_buffer = np.array(img).astype('float32') / 255
            # print(screen_buffer.shape)    # (3, 120, 160)
        try:
            # Grey Scaling
            grey_buffer = np.dot(np.transpose(screen_buffer, (1, 2, 0)),
                                 [0.21, 0.72, 0.07])
            # print(grey_buffer.shape)     # (120, 160)

            # Depth Radius
            depth_buffer = np.array(state.depth_buffer).astype('float32') / 255
            depth_buffer[(depth_buffer >
                          depth_radius)] = depth_radius  #Effects depth radius
            depth_buffer_filtered = (depth_buffer - np.amin(depth_buffer)) / (
                np.amax(depth_buffer) - np.amin(depth_buffer))

            # Depth Contrast
            processed_buffer = (
                (1 - depth_contrast) * grey_buffer) + (depth_contrast *
                                                       (1 - depth_buffer))
            processed_buffer = (processed_buffer - np.amin(processed_buffer)
                                ) / (np.amax(processed_buffer) -
                                     np.amin(processed_buffer))
            processed_buffer = np.round(processed_buffer, 6)
            processed_buffer = processed_buffer.reshape(self.res[-2:])
        except:
            processed_buffer = np.zeros(self.res[-2:])
        return processed_buffer  # balance the depth & RGB data

    def run(self, agent, save_replay='', verbose=False, return_data=False):
        '''
        Method runs a instance of DoomScenario.

        '''
        if return_data:
            data_S = []
            data_a = []
        if verbose:
            print("\nRunning Simulation:", self.config_filename)
            self.pbar = tqdm(total=self.game.get_episode_timeout())

        # Initiate New Instance
        self.game.close()
        self.game.set_window_visible(False)
        self.game.add_game_args("+vid_forcesurface 1 ")
        self.game.init()
        if save_replay != '':
            self.game.new_episode("../data/replay_data/" + save_replay)
        else:
            self.game.new_episode()

        # Run Simulation
        while not self.game.is_episode_finished():
            S = agent.get_state_data(self)
            q = agent.model.online_network.predict(S)
            if np.random.random() < 0.1:
                q = np.random.choice(len(q[0]), 1, p=softmax(q[0], 1))[0]
            else:
                q = int(np.argmax(q[0]))
            a = agent.model.predict(self, q)
            if return_data:
                delta = np.zeros((len(self.actions)))
                a_ = np.cast['int'](a)
                delta[a_] = 1
                data_S.append(S.reshape(S.shape[1], S.shape[2], S.shape[3]))
                data_a.append(delta)
            if not self.game.is_episode_finished():
                self.play(a, agent.frame_skips + 1)
            if agent.model.__class__.__name__ == 'HDQNModel' and not self.game.is_episode_finished(
            ):
                if q >= len(agent.model.actions):
                    for i in range(agent.model.skill_frame_skip):
                        if not self.game.is_episode_finished():
                            a = agent.model.predict(self, q)
                            self.play(a, agent.frame_skips + 1)
                        else:
                            break

        # Reset Agent and Return Score
        agent.frames = None
        if agent.model.__class__.__name__ == 'HDQNModel':
            agent.model.sub_model_frames = None
        score = self.game.get_total_reward()
        if verbose:
            self.pbar.close()
            print("Total Score:", score)
        if return_data:
            data_S = np.array(data_S)
            data_a = np.array(data_a)
            return [data_S, data_a]
        return score

    def replay(self, filename, verbose=False, doom_like=False):
        '''
        Method runs a replay of the simulations at 800 x 600 resolution.

        '''
        print("\nRunning Replay:", filename)

        # Initiate Replay
        self.game.close()
        self.game.set_screen_resolution(ScreenResolution.RES_800X600)
        self.game.set_window_visible(True)
        self.game.add_game_args("+vid_forcesurface 1")
        if doom_like:
            self.game.set_render_hud(True)
            self.game.set_render_minimal_hud(False)
            self.game.set_render_crosshair(False)
            self.game.set_render_weapon(True)
            self.game.set_render_particles(True)
        self.game.init()
        self.game.replay_episode("../data/replay_data/" + filename)

        # Run Replay
        while not self.game.is_episode_finished():
            if verbose: print("Reward:", self.game.get_last_reward())
            self.game.advance_action()

        # Print Score
        score = self.game.get_total_reward()
        print("Total Score:", score)
        self.game.close()

    def apprentice_run(self, test=False):
        '''
        Method runs an apprentice data gathering.

        '''
        # Initiate New Instance
        self.game.close()
        self.game.set_mode(Mode.SPECTATOR)
        self.game.set_screen_resolution(ScreenResolution.RES_800X600)
        self.game.set_window_visible(True)
        self.game.set_ticrate(30)
        self.game.init()
        self.game.new_episode()

        # Run Simulation
        while not self.game.is_episode_finished():
            self.game.advance_action()
        self.game.close()
    def play(self):
        
        # Create DoomGame instance. It will run the game and communicate with you.
        print ("Initializing doom...")
        game = DoomGame()

        game.load_config("./examples/config/deepdoomplayer.cfg")
        game.init()
        print ("Doom initialized.")
 
        episodes = 1
        training_steps_per_epoch = 100

        sleep_time = 0.100

        train_episodes_finished = 0
        train_rewards = []
        
        for epoch in range(episodes):
           
            train_loss = []
            
            game.new_episode()
        
            while(train_episodes_finished < 20 ):
        
                sleep(sleep_time)   

                if game.is_episode_finished():
                    
                    r = game.get_total_reward()
                    train_rewards.append(r)
                    game.new_episode()
                    train_episodes_finished += 1
                    self._last_state = None
                    self.last_action[1] = 1

                # first frame must be handled differently
                if self.last_state is None:
                    # the _last_state will contain the image data from the last self.state_frames frames
                    self.last_state = np.stack(tuple(self.convert_image(game.get_state().image_buffer) for _ in range(self.state_frames)), axis=2)
                    continue

                
                reward = game.make_action(DeepDoomPlayer.define_keys_to_action_pressed(self.last_action), 7)
           
                reward *= 0.01

                imagebuffer = game.get_state().image_buffer

                if imagebuffer is None:
                    terminal = True
                    screen_resized_binary =  np.zeros((40,40))
                    
                imagebufferlast = imagebuffer 
                    
                if imagebuffer is not None: 
                    terminal = False
                    screen_resized_binary = self.convert_image(imagebuffer)
                
                # add dimension
                screen_resized_binary = np.expand_dims(screen_resized_binary, axis=2)

                current_state = np.append(self.last_state[:, :, 1:], screen_resized_binary, axis=2)

                self.last_state = current_state

                self.last_action = self.choose_next_action_only_on_q()

            print (train_episodes_finished, "training episodes played.")
            print ("Training results:")
            
            train_rewards = np.array(train_rewards)
  
            print ("mean:", train_rewards.mean(), 
                   "std:", train_rewards.std(), 
                   "max:", train_rewards.max(), 
                   "min:", train_rewards.min())
           
            
        # It will be done automatically anyway but sometimes you need to do it in the middle of the program...
        game.close()
        self._last_state = None
예제 #29
0
    # save the model which is under training
    def save_model(self, name):
        self.model.save_weights(name)


if __name__ == "__main__":

    # Avoid Tensorflow eats up GPU memory
    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.compat.v1.Session(config=config)
    tf.compat.v1.keras.backend.set_session(sess)

    game = DoomGame()
    game.load_config("/content/VizDoom-Keras-RL/defend_the_center.cfg")
    game.set_sound_enabled(True)
    game.set_screen_resolution(ScreenResolution.RES_640X480)
    game.set_window_visible(False)
    game.init()

    game.new_episode()
    game_state = game.get_state()

    misc = game_state.game_variables  # [KILLCOUNT, AMMO, HEALTH]
    prev_misc = misc

    action_size = game.get_available_buttons_size()

    img_rows, img_cols = 64, 64
    img_channels = 3  # Color channel
예제 #30
0
def train(conf):

    #to get total time of training
    start_time = time.time()    

    #set the seeds for reproductability
    random.seed(conf.seed)
    np.random.seed(conf.seed)
    tf.set_random_seed(conf.seed)

    # Avoid Tensorflow eats up GPU memory
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    K.set_session(sess)

    game = DoomGame()
    game.load_config("VizDoom/scenarios/defend_the_center.cfg")
    game.set_sound_enabled(True)
    game.set_screen_resolution(ScreenResolution.RES_640X480)
    game.set_window_visible(False)
    game.set_living_reward(0.1)
    game.init()

    game.new_episode()
    game_state = game.get_state()
    misc = game_state.game_variables  # [KILLCOUNT, AMMO, HEALTH]
    prev_misc = misc

    action_size = game.get_available_buttons_size()

    img_rows , img_cols = 64, 64
    # Convert image into Black and white
    img_channels = 4 # We stack 4 frames

    state_size = (img_rows, img_cols, img_channels)
    agent = DoubleDQNAgent(state_size, action_size, conf)

    agent.model = Networks.dqn(state_size, action_size, agent.learning_rate)
    agent.target_model = Networks.dqn(state_size, action_size, agent.learning_rate)

    x_t = game_state.screen_buffer # 480 x 640
    x_t = preprocessImg(x_t, size=(img_rows, img_cols))
    s_t = np.stack(([x_t]*4), axis=2) # It becomes 64x64x4
    s_t = np.expand_dims(s_t, axis=0) # 1x64x64x4

    is_terminated = game.is_episode_finished()

    # Start training
    epsilon = agent.initial_epsilon
    GAME = 0
    t = 0
    max_life = 0 # Maximum episode life (Proxy for agent performance)
    life = 0

    # Buffer to compute rolling statistics 
    life_buffer, ammo_buffer, kills_buffer = [], [], [] 

    scores, episodes, steps, kills, ammos = [], [], [], [], []
    step = 0
    episode = conf.episode
    e = 0
    score = 0
    while e < episode:
        loss = 0
        Q_max = 0
        r_t = 0
        a_t = np.zeros([action_size])

        # Epsilon Greedy
        action_idx  = agent.get_action(s_t)
        a_t[action_idx] = 1
        a_t = a_t.astype(int)

        r_t = game.make_action(a_t.tolist(), agent.frame_per_action)

        game_state = game.get_state()  # Observe again after we take the action
        is_terminated = game.is_episode_finished()

        # print(r_t)
        score += r_t
        step += 1

        if (is_terminated):
            if (life > max_life):
                max_life = life
            GAME += 1
            life_buffer.append(life)
            ammo_buffer.append(misc[1])
            kills_buffer.append(misc[0])
            kills.append(misc[0])
            ammos.append(misc[1])
            print ("Episode Finish ", misc)
            # print(scores)
            game.new_episode()
            game_state = game.get_state()
            misc = game_state.game_variables
            x_t1 = game_state.screen_buffer

            scores.append(score)
            score = 0
            steps.append(step)
            episodes.append(e)
            e += 1
            

        x_t1 = game_state.screen_buffer
        misc = game_state.game_variables

        x_t1 = preprocessImg(x_t1, size=(img_rows, img_cols))
        x_t1 = np.reshape(x_t1, (1, img_rows, img_cols, 1))
        s_t1 = np.append(x_t1, s_t[:, :, :, :3], axis=3)

        r_t = agent.shape_reward(r_t, misc, prev_misc, t)

        if (is_terminated):
            life = 0
        else:
            life += 1

        # Update the cache
        prev_misc = misc

        # save the sample <s, a, r, s'> to the replay memory and decrease epsilon
        agent.replay_memory(s_t, action_idx, r_t, s_t1, is_terminated, t)

        # Do the training
        if t > agent.observe and t % agent.timestep_per_train == 0:
            Q_max, loss = agent.train_replay()
            
        s_t = s_t1
        t += 1

        # print info
        state = ""
        if t <= agent.observe:
            state = "observe"
        elif t > agent.observe and agent.epsilon > agent.final_epsilon:
            state = "explore"
        else:
            state = "train"

        if (is_terminated):
            print("TIME", t, "/ GAME", GAME, "/ STATE", state, \
                  "/ EPSILON", agent.epsilon, "/ ACTION", action_idx, "/ REWARD", score, \
                  "/ Q_MAX %e" % np.max(Q_max), "/ LIFE", max_life, "/ LOSS", loss)

            # Save Agent's Performance Statistics
            if GAME % agent.stats_window_size == 0 and t > agent.observe: 
                print("Update Rolling Statistics")
                agent.mavg_score.append(np.mean(np.array(life_buffer)))
                agent.var_score.append(np.var(np.array(life_buffer)))
                agent.mavg_ammo_left.append(np.mean(np.array(ammo_buffer)))
                agent.mavg_kill_counts.append(np.mean(np.array(kills_buffer)))

                # Reset rolling stats buffer
                life_buffer, ammo_buffer, kills_buffer = [], [], [] 

    total_time = time.time() - start_time

    return steps, scores, total_time, kills, ammos
예제 #31
0
파일: scenarios.py 프로젝트: CV-IP/ViZDoom
# 
#####################################################################
from __future__ import print_function
from vizdoom import DoomGame, ScreenResolution
from random import choice
import itertools as it
from time import sleep
import cv2

game = DoomGame()

# Choose scenario config file you wish to watch.
# Don't load two configs cause the second will overrite the first one.
# Multiple config files are ok but combining these ones doesn't make much sense.

game.load_config("../config/basic.cfg")
#game.load_config("../config/deadly_corridor.cfg")
#game.load_config("../config/deathmatch.cfg")
#game.load_config("../config/defend_the_center.cfg")
#game.load_config("../config/defend_the_line.cfg")
#game.load_config("../config/health_gathering.cfg")
#game.load_config("../config/my_way_home.cfg")
#game.load_config("../config/predict_position.cfg")
#game.load_config("../config/take_cover.cfg")

# Makes the screen bigger to see more details.
game.set_screen_resolution(ScreenResolution.RES_640X480)
game.init()

# Creates all possible actions depending on how many buttons there are.
actions_num = game.get_available_buttons_size()
예제 #32
0
class VizDoomEnv(Env):
    '''
    Wrapper for vizdoom to use as an OpenAI gym environment.
    '''
    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self, cfg_name, repeat=1):
        super(VizDoomEnv, self).__init__()
        self.game = DoomGame()
        self.game.load_config('./slm_lab/env/vizdoom/cfgs/' + cfg_name +
                              '.cfg')
        self._viewer = None
        self.repeat = 1
        # TODO In future, need to update action to handle (continuous) DELTA buttons using gym's Box space
        self.action_space = spaces.MultiDiscrete(
            [2] * self.game.get_available_buttons_size())
        self.action_space.dtype = 'uint8'
        output_shape = (self.game.get_screen_height(),
                        self.game.get_screen_width(),
                        self.game.get_screen_channels())
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=output_shape,
                                            dtype='uint8')
        self.game.init()

    def close(self):
        self.game.close()
        if self._viewer is not None:
            self._viewer.close()
            self._viewer = None

    def seed(self, seed=None):
        self.game.set_seed(seed)

    def step(self, action):
        reward = self.game.make_action(list(action), self.repeat)
        state = self.game.get_state()
        done = self.game.is_episode_finished()
        # info = self._get_game_variables(state.game_variables)
        info = {}
        if state is not None:
            observation = state.screen_buffer.transpose(1, 2, 0)
        else:
            observation = np.zeros(shape=self.observation_space.shape,
                                   dtype=np.uint8)
        return observation, reward, done, info

    def reset(self):
        # self.seed(seed)
        self.game.new_episode()
        return self.game.get_state().screen_buffer.transpose(1, 2, 0)

    def render(self, mode='human', close=False):
        if close:
            if self._viewer is not None:
                self._viewer.close()
                self._viewer = None
            return
        img = None
        state = self.game.get_state()
        if state is not None:
            img = state.screen_buffer
        if img is None:
            # at the end of the episode
            img = np.zeros(shape=self.observation_space.shape, dtype=np.uint8)
        if mode == 'rgb_array':
            return img
        elif mode is 'human':
            if self._viewer is None:
                self._viewer = rendering.SimpleImageViewer()
            self._viewer.imshow(img.transpose(1, 2, 0))

    def _get_game_variables(self, state_variables):
        info = {}
        if state_variables is not None:
            info['KILLCOUNT'] = state_variables[0]
            info['ITEMCOUNT'] = state_variables[1]
            info['SECRETCOUNT'] = state_variables[2]
            info['FRAGCOUNT'] = state_variables[3]
            info['HEALTH'] = state_variables[4]
            info['ARMOR'] = state_variables[5]
            info['DEAD'] = state_variables[6]
            info['ON_GROUND'] = state_variables[7]
            info['ATTACK_READY'] = state_variables[8]
            info['ALTATTACK_READY'] = state_variables[9]
            info['SELECTED_WEAPON'] = state_variables[10]
            info['SELECTED_WEAPON_AMMO'] = state_variables[11]
            info['AMMO1'] = state_variables[12]
            info['AMMO2'] = state_variables[13]
            info['AMMO3'] = state_variables[14]
            info['AMMO4'] = state_variables[15]
            info['AMMO5'] = state_variables[16]
            info['AMMO6'] = state_variables[17]
            info['AMMO7'] = state_variables[18]
            info['AMMO8'] = state_variables[19]
            info['AMMO9'] = state_variables[20]
            info['AMMO0'] = state_variables[21]
        return info
예제 #33
0
    def start(self):
        """
         this will get passed hier
        """
        
        
        # Create DoomGame instance. It will run the game and communicate with you.
        print ("Initializing doom...")
        game = DoomGame()
        game.load_config("./examples/config/learningtensorflow.cfg")
        game.init()
        print ("Doom initialized.")
        train_rewards = []
        
        
        for epoch in range(DeepDoom.episodes):
            print ("\nEpoch", epoch)
            train_time = 0
            train_episodes_finished = 0
            train_loss = []
            
            
                        
            #start saving after 20 epoch
            if epoch > 20:
                 if not os.path.exists(DeepDoom.checkpoint_path):
                    os.mkdir(DeepDoom.checkpoint_path)
                 self.saver.save(self.session, DeepDoom.checkpoint_path, global_step=epoch )
   
                 

            train_start = time()

            game.new_episode()
        
            for learning_step in tqdm(range(DeepDoom.training_steps_per_epoch)):


                if game.is_episode_finished():
                    #print("game is finished")
                    r = game.get_total_reward()
                    train_rewards.append(r)
                    game.new_episode()
                    train_episodes_finished += 1
                    self.last_state = None
                    #sleep(sleep_time)

                
                # first frame must be handled differently
                if self.last_state is None:
                    #print ("ich bin hier")
                    # the last_state will contain the image data from the last self.state_frames frames
                    self.last_state = np.stack(tuple(self.convert_image(game.get_state().image_buffer) for _ in range(self.state_frames)), axis=2)
                    continue
 
                reward = game.make_action(DeepDoom.define_keys_to_action_pressed(self.last_action), 7)
 
                reward *= 0.01
         
                #if screen_array is not None:   
                imagebuffer = game.get_state().image_buffer

                if imagebuffer is None:
                    terminal = True
                    #print(reward)
                    screen_resized_binary =  np.zeros((40,40))
                    
                imagebufferlast = imagebuffer 
                    
                if imagebuffer is not None: 
                    terminal = False
                    screen_resized_binary = self.convert_image(imagebuffer)
                
                # add dimension
                screen_resized_binary = np.expand_dims(screen_resized_binary, axis=2)

                current_state = np.append(self.last_state[:, :, 1:], screen_resized_binary, axis=2)
        
                self.observations.append((self.last_state, self.last_action, reward, current_state, terminal))


                #zeugs.write("oberservations %s \n" %len(self.observations))

                if len(self.observations) > self.memory_size:
                    self.observations.popleft()
                    #sleep(sleep_time)

                # only train if done observing
                if len(self.observations) > self.observation_steps:
                    #print("train")
                    self.train()
                    self.time += 1
                
                self.last_state = current_state

                self.last_action = self.choose_next_action()
                
                
                if self.probability_of_random_action > self.final_random_action_prob \
                        and len(self.observations) > self.observation_steps:
                    self.probability_of_random_action -= \
                        (self.initial_random_action_prob - self.final_random_action_prob) / self.explore_steps
                        

            print (train_episodes_finished, "training episodes played.")
            print ("Training results:")

            train_rewards = np.array(train_rewards)
            
            train_end = time()
            train_time = train_end - train_start
            mean_loss = np.mean(train_loss)


            print ("mean:", train_rewards.mean(), "std:", train_rewards.std(), "max:", train_rewards.max(), "min:", train_rewards.min(),  "epsilon:", self.probability_of_random_action)
            print ("t:", str(round(train_time, 2)) + "s")
            train_rewards = []
            
            
        
        # It will be done automatically anyway but sometimes you need to do it in the middle of the program...
        game.close()
        self.last_state = None
예제 #34
0
# 
#####################################################################
from __future__ import print_function

import itertools as it
from random import choice
from time import sleep
from vizdoom import DoomGame, ScreenResolution

game = DoomGame()

# Choose scenario config file you wish to watch.
# Don't load two configs cause the second will overrite the first one.
# Multiple config files are ok but combining these ones doesn't make much sense.

game.load_config("../../examples/config/basic.cfg")
# game.load_config("../../examples/config/deadly_corridor.cfg")
# game.load_config("../../examples/config/deathmatch.cfg")
# game.load_config("../../examples/config/defend_the_center.cfg")
# game.load_config("../../examples/config/defend_the_line.cfg")
# game.load_config("../../examples/config/health_gathering.cfg")
# game.load_config("../../examples/config/my_way_home.cfg")
# game.load_config("../../examples/config/predict_position.cfg")
# game.load_config("../../examples/config/take_cover.cfg")

# Makes the screen bigger to see more details.
game.set_screen_resolution(ScreenResolution.RES_640X480)
game.init()

# Creates all possible actions depending on how many buttons there are.
actions_num = game.get_available_buttons_size()
예제 #35
0
import argparse
import sys

if __name__ == '__main__':

    title = sys.argv[1]
    n_measures = int(sys.argv[2]) #number of measurements

    # Avoid Tensorflow eats up GPU memory
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    K.set_session(sess)

    game = DoomGame()
    game.load_config("../vizdoom/scenarios/health_gathering_supreme.cfg")

    # TODO : Change amo/frags values when dealing with D3
    amo = 0
    frags = 0
    game.set_sound_enabled(False)
    game.set_screen_resolution(ScreenResolution.RES_640X480)
    game.set_window_visible(False)
    game.init()

    game.new_episode()
    game_state = game.get_state()
    misc = game_state.game_variables  # [Health]
    prev_misc = misc

    action_size = game.get_available_buttons_size() # [Turn Left, Turn Right, Move Forward]