Пример #1
0
def measure(name, iters=5000, **settings):
    print(name)
    for k, v in settings.items():
        print("\t{}: {}".format(k, v))

    # Vizdoom wrapper
    doom_wrapper = VizdoomWrapper(**settings)
    start = time()
    for _ in trange(iters, leave=False):
        current_img, current_misc = doom_wrapper.get_current_state()
        action_index = randint(0, doom_wrapper.actions_num - 1)
        doom_wrapper.make_action(action_index)

        if doom_wrapper.is_terminal():
            doom_wrapper.reset()
    end = time()
    wrapper_t = (end - start)

    # Vanilla vizdoom:
    doom = vzd.DoomGame()
    if "scenarios_path" not in settings:
        scenarios_path = vzd.__path__[0] + "/scenarios"
    else:
        scenarios_path = settings["scenarios_path"]
    config_file = scenarios_path + "/" + settings["config_file"]
    doom.load_config(config_file)
    doom.set_window_visible(False)
    doom.set_screen_format(vzd.ScreenFormat.GRAY8)
    doom.set_screen_resolution(vzd.ScreenResolution.RES_160X120)
    doom.init()
    actions = [
        list(a)
        for a in it.product([0, 1],
                            repeat=len(doom.get_available_game_variables()))
    ]
    start = time()
    frame_skip = settings["frame_skip"]
    for _ in trange(iters, leave=False):
        if doom.is_episode_finished():
            doom.new_episode()
        doom.make_action(choice(actions), frame_skip)

    end = time()
    vanilla_t = end - start
    print(green("\twrapper: {:0.2f} steps/s".format(iters / wrapper_t)))
    print(
        green("\twrapper: {:0.2f} s/1000 steps".format(wrapper_t / iters *
                                                       1000)))
    print(blue("\tvanilla: {:0.2f} steps/s".format(iters / vanilla_t)))
    print(
        blue("\tvanilla: {:0.2f} s/1000 steps\n".format(vanilla_t / iters *
                                                        1000)))
def train_async(q_learning, settings):
    proto_vizdoom = VizdoomWrapper(noinit=True, **settings)
    actions_num = proto_vizdoom.actions_num
    misc_len = proto_vizdoom.misc_len
    img_shape = proto_vizdoom.img_shape
    del proto_vizdoom

    # TODO target global network
    # This global step counts gradient applications not performed actions.
    global_train_step = tf.Variable(0, trainable=False, name="global_step")
    global_learning_rate = tf.train.polynomial_decay(
        name="larning_rate",
        learning_rate=settings["initial_learning_rate"],
        end_learning_rate=settings["final_learning_rate"],
        decay_steps=settings["learning_rate_decay_steps"],
        global_step=global_train_step)
    optimizer = ClippingRMSPropOptimizer(learning_rate=global_learning_rate,
                                         **settings["rmsprop"])

    learners = []
    network_class = eval(settings["network_type"])

    global_network = network_class(actions_num=actions_num,
                                   misc_len=misc_len,
                                   img_shape=img_shape,
                                   **settings)

    global_steps_counter = ThreadsafeCounter()
    if q_learning:
        global_target_network = network_class(thread="global_target",
                                              actions_num=actions_num,
                                              misc_len=misc_len,
                                              img_shape=img_shape,
                                              **settings)
        global_network.prepare_unfreeze_op(global_target_network)
        unfreeze_thread = min(1, settings["threads_num"] - 1)
        for i in range(settings["threads_num"]):
            learner = ADQNLearner(thread_index=i,
                                  global_network=global_network,
                                  unfreeze_thread=i == unfreeze_thread,
                                  global_target_network=global_target_network,
                                  optimizer=optimizer,
                                  learning_rate=global_learning_rate,
                                  global_steps_counter=global_steps_counter,
                                  **settings)
            learners.append(learner)
    else:
        for i in range(settings["threads_num"]):
            learner = A3CLearner(thread_index=i,
                                 global_network=global_network,
                                 optimizer=optimizer,
                                 learning_rate=global_learning_rate,
                                 global_steps_counter=global_steps_counter,
                                 **settings)
            learners.append(learner)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    session = tf.Session(config=config)

    log("Initializing variables...")
    session.run(tf.global_variables_initializer())
    log("Initialization finished.\n")

    if q_learning:
        session.run(global_network.ops.unfreeze)

    log(green("Starting training.\n"))

    for l in learners:
        l.run_training(session)
    for l in learners:
        l.join()
Пример #3
0
    def __init__(self,
                 thread_index=0,
                 game=None,
                 model_savefile=None,
                 network_class="ACLstmNet",
                 global_steps_counter=None,
                 scenario_tag=None,
                 run_id_string=None,
                 session=None,
                 tf_logdir=None,
                 global_network=None,
                 optimizer=None,
                 learning_rate=None,
                 test_only=False,
                 test_interval=1,
                 write_summaries=True,
                 enable_progress_bar=True,
                 deterministic_testing=True,
                 save_interval=1,
                 writer_max_queue=10,
                 writer_flush_secs=120,
                 gamma_compensation=False,
                 figar_gamma=False,
                 gamma=0.99,
                 show_heatmaps=True,
                 **settings):
        super(A3CLearner, self).__init__()

        log("Creating actor-learner #{}.".format(thread_index))
        self.thread_index = thread_index

        self._global_steps_counter = global_steps_counter
        self.write_summaries = write_summaries
        self.save_interval = save_interval
        self.enable_progress_bar = enable_progress_bar
        self._model_savefile = None
        self._train_writer = None
        self._test_writer = None
        self._summaries = None
        self._session = session
        self.deterministic_testing = deterministic_testing
        self.local_steps = 0
        # TODO epoch as tf variable?
        self._epoch = 1
        self.train_scores = []
        self.train_actions = []
        self.train_frameskips = []
        self.show_heatmaps = show_heatmaps
        self.test_interval = test_interval

        self.local_steps_per_epoch = settings["local_steps_per_epoch"]
        self._run_tests = settings["test_episodes_per_epoch"] > 0 and settings["run_tests"]
        self.test_episodes_per_epoch = settings["test_episodes_per_epoch"]
        self._epochs = np.float32(settings["epochs"])
        self.max_remembered_steps = settings["max_remembered_steps"]

        assert not (gamma_compensation and figar_gamma)

        gamma = np.float32(gamma)

        if gamma_compensation:
            self.scale_gamma = lambda fskip: ((1 - gamma ** fskip) / (1 - gamma), gamma ** fskip)
        elif figar_gamma:
            self.scale_gamma = lambda fskip: (1.0, gamma ** fskip)
        else:
            self.scale_gamma = lambda _: (1.0, gamma)

        if self.write_summaries and thread_index == 0 and not test_only:
            assert tf_logdir is not None
            self.run_id_string = run_id_string
            self.tf_models_path = settings["models_path"]
            create_directory(tf_logdir)

            if self.tf_models_path is not None:
                create_directory(self.tf_models_path)

        if game is None:
            self.doom_wrapper = VizdoomWrapper(**settings)
        else:
            self.doom_wrapper = game
        misc_len = self.doom_wrapper.misc_len
        img_shape = self.doom_wrapper.img_shape
        self.use_misc = self.doom_wrapper.use_misc

        self.actions_num = self.doom_wrapper.actions_num
        self.local_network = getattr(networks, network_class)(actions_num=self.actions_num, img_shape=img_shape,
                                                              misc_len=misc_len,
                                                              thread=thread_index, **settings)

        if not test_only:
            self.learning_rate = learning_rate
            # TODO check gate_gradients != Optimizer.GATE_OP
            grads_and_vars = optimizer.compute_gradients(self.local_network.ops.loss,
                                                         var_list=self.local_network.get_params())
            grads, local_vars = zip(*grads_and_vars)

            grads_and_global_vars = zip(grads, global_network.get_params())
            self.train_op = optimizer.apply_gradients(grads_and_global_vars, global_step=tf.train.get_global_step())

            self.global_network = global_network
            self.local_network.prepare_sync_op(global_network)

        if self.thread_index == 0 and not test_only:
            self._model_savefile = model_savefile
            if self.write_summaries:
                self.actions_placeholder = tf.placeholder(tf.int32, None)
                self.frameskips_placeholder = tf.placeholder(tf.int32, None)
                self.scores_placeholder, summaries = setup_vector_summaries(scenario_tag + "/scores")

                # TODO remove scenario_tag from histograms
                a_histogram = tf.summary.histogram(scenario_tag + "/actions", self.actions_placeholder)
                fs_histogram = tf.summary.histogram(scenario_tag + "/frameskips", self.frameskips_placeholder)
                score_histogram = tf.summary.histogram(scenario_tag + "/scores", self.scores_placeholder)
                lr_summary = tf.summary.scalar(scenario_tag + "/learning_rate", self.learning_rate)
                summaries.append(lr_summary)
                summaries.append(a_histogram)
                summaries.append(fs_histogram)
                summaries.append(score_histogram)
                self._summaries = tf.summary.merge(summaries)
                self._train_writer = tf.summary.FileWriter("{}/{}/{}".format(tf_logdir, self.run_id_string, "train"),
                                                           flush_secs=writer_flush_secs, max_queue=writer_max_queue)
                self._test_writer = tf.summary.FileWriter("{}/{}/{}".format(tf_logdir, self.run_id_string, "test"),
                                                          flush_secs=writer_flush_secs, max_queue=writer_max_queue)
Пример #4
0
    def __init__(self,
                 scenario_tag=None,
                 run_id_string=None,
                 network_type="networks.DQNNet",
                 write_summaries=True,
                 tf_logdir="tensorboard_logs",
                 epochs=100,
                 train_steps_per_epoch=1000000,
                 test_episodes_per_epoch=100,
                 run_tests=True,
                 initial_epsilon=1.0,
                 final_epsilon=0.0000,
                 epsilon_decay_steps=10e07,
                 epsilon_decay_start_step=2e05,
                 frozen_steps=5000,
                 batchsize=32,
                 memory_capacity=10000,
                 update_pattern=(4, 4),
                 prioritized_memory=False,
                 enable_progress_bar=True,
                 save_interval=1,
                 writer_max_queue=10,
                 writer_flush_secs=120,
                 dynamic_frameskips=None,
                 **settings):

        if prioritized_memory:
            raise NotImplementedError("Prioritized memory not implemented. Maybe some day.")
            # TODO maybe some day ...
            pass

        if dynamic_frameskips:
            if isinstance(dynamic_frameskips, (list, tuple)):
                self.frameskips = list(dynamic_frameskips)
            elif isinstance(dynamic_frameskips, int):
                self.frameskips = list(range(1, dynamic_frameskips + 1))
        else:
            self.frameskips = [None]

        self.update_pattern = update_pattern
        self.write_summaries = write_summaries
        self._settings = settings
        self.run_id_string = run_id_string
        self.train_steps_per_epoch = train_steps_per_epoch
        self._run_tests = test_episodes_per_epoch > 0 and run_tests
        self.test_episodes_per_epoch = test_episodes_per_epoch
        self._epochs = np.float32(epochs)

        self.doom_wrapper = VizdoomWrapper(**settings)
        misc_len = self.doom_wrapper.misc_len
        img_shape = self.doom_wrapper.img_shape
        self.use_misc = self.doom_wrapper.use_misc
        self.actions_num = self.doom_wrapper.actions_num
        self.replay_memory = ReplayMemory(img_shape, misc_len, batch_size=batchsize, capacity=memory_capacity)
        self.network = eval(network_type)(actions_num=self.actions_num * len(self.frameskips), img_shape=img_shape,
                                          misc_len=misc_len,
                                          **settings)

        self.batchsize = batchsize
        self.frozen_steps = frozen_steps

        self.save_interval = save_interval

        self._model_savefile = settings["models_path"] + "/" + self.run_id_string
        ## TODO move summaries somewhere so they are consistent between dqn and asyncs
        if self.write_summaries:
            assert tf_logdir is not None
            if not os.path.isdir(tf_logdir):
                os.makedirs(tf_logdir)

            self.scores_placeholder, summaries = setup_vector_summaries(scenario_tag + "/scores")
            self._summaries = tf.summary.merge(summaries)
            self._train_writer = tf.summary.FileWriter("{}/{}/{}".format(tf_logdir, self.run_id_string, "train"),
                                                       flush_secs=writer_flush_secs, max_queue=writer_max_queue)
            self._test_writer = tf.summary.FileWriter("{}/{}/{}".format(tf_logdir, self.run_id_string, "test"),
                                                      flush_secs=writer_flush_secs, max_queue=writer_max_queue)
        else:
            self._train_writer = None
            self._test_writer = None
            self._summaries = None
        self.steps = 0
        # TODO epoch as tf variable?
        self._epoch = 1

        # Epsilon
        self.epsilon_decay_rate = (initial_epsilon - final_epsilon) / epsilon_decay_steps
        self.epsilon_decay_start_step = epsilon_decay_start_step
        self.initial_epsilon = initial_epsilon
        self.final_epsilon = final_epsilon

        self.enable_progress_bar = enable_progress_bar
Пример #5
0
train_network = False
show_results = True

config_path = 'scenarios/health_gathering.cfg'
model_path = 'models/health.hd5'
reward_table = OrderedDict({})
resolution = (84, 84)

episodes = 500
gamma = 0.99
learning_rate = 0.0002

print('Initialising Doom...')
doom = VizdoomWrapper(config_path=config_path,
                      reward_table=reward_table,
                      frame_resolution=resolution,
                      show_mode=False,
                      frame_stack=4)

doomguy = PolicyGradientAgent(doom.get_state_size(),
                              doom.get_action_size(),
                              learning_rate,
                              gamma,
                              save_path=model_path)

if load_pretrained_network:
    doomguy.load_model()

if train_network:
    for episode in range(episodes):
        print('Episode', episode)
    def __init__(self,
                 thread_index,
                 network_type,
                 global_steps_counter,
                 scenario_tag=None,
                 run_id_string=None,
                 session=None,
                 tf_logdir=None,
                 global_network=None,
                 optimizer=None,
                 learning_rate=None,
                 test_only=False,
                 write_summaries=True,
                 enable_progress_bar=True,
                 deterministic_testing=True,
                 save_interval=1,
                 writer_max_queue=10,
                 writer_flush_secs=120,
                 **settings):
        super(A3CLearner, self).__init__()

        log("Creating actor-learner #{}.".format(thread_index))
        self.thread_index = thread_index

        self._global_steps_counter = global_steps_counter
        self.write_summaries = write_summaries
        self.save_interval = save_interval
        self.enable_progress_bar = enable_progress_bar
        self._model_savefile = None
        self._train_writer = None
        self._test_writer = None
        self._summaries = None
        self._session = session
        self.deterministic_testing = deterministic_testing
        self.local_steps = 0
        # TODO epoch as tf variable?
        self._epoch = 1
        self.train_scores = []

        self.local_steps_per_epoch = settings["local_steps_per_epoch"]
        self._run_tests = settings["test_episodes_per_epoch"] > 0 and settings[
            "run_tests"]
        self.test_episodes_per_epoch = settings["test_episodes_per_epoch"]
        self._epochs = np.float32(settings["epochs"])
        self.max_remembered_steps = settings["max_remembered_steps"]
        self.gamma = np.float32(settings["gamma"])

        if self.write_summaries and thread_index == 0 and not test_only:
            assert tf_logdir is not None
            self.run_id_string = run_id_string
            self.tf_models_path = settings["models_path"]
            if not os.path.isdir(tf_logdir):
                os.makedirs(tf_logdir)

            if self.tf_models_path is not None:
                if not os.path.isdir(settings["models_path"]):
                    os.makedirs(settings["models_path"])

        self.doom_wrapper = VizdoomWrapper(**settings)
        misc_len = self.doom_wrapper.misc_len
        img_shape = self.doom_wrapper.img_shape
        self.use_misc = self.doom_wrapper.use_misc

        self.actions_num = self.doom_wrapper.actions_num
        # TODO add debug log
        self.local_network = eval(network_type)(actions_num=self.actions_num,
                                                img_shape=img_shape,
                                                misc_len=misc_len,
                                                thread=thread_index,
                                                **settings)

        if not test_only:
            self.learning_rate = learning_rate
            # TODO check gate_gradients != Optimizer.GATE_OP
            grads_and_vars = optimizer.compute_gradients(
                self.local_network.ops.loss,
                var_list=self.local_network.get_params())
            grads, local_vars = zip(*grads_and_vars)

            grads_and_global_vars = zip(grads, global_network.get_params())
            self.train_op = optimizer.apply_gradients(
                grads_and_global_vars, global_step=tf.train.get_global_step())

            self.global_network = global_network
            self.local_network.prepare_sync_op(global_network)

        if self.thread_index == 0 and not test_only:
            self._model_savefile = settings[
                "models_path"] + "/" + self.run_id_string

            if self.write_summaries:
                self.scores_placeholder, summaries = setup_vector_summaries(
                    scenario_tag + "/scores")
                lr_summary = tf.summary.scalar(scenario_tag + "/learning_rate",
                                               self.learning_rate)
                summaries.append(lr_summary)
                self._summaries = tf.summary.merge(summaries)
                self._train_writer = tf.summary.FileWriter(
                    "{}/{}/{}".format(tf_logdir, self.run_id_string, "train"),
                    flush_secs=writer_flush_secs,
                    max_queue=writer_max_queue)
                self._test_writer = tf.summary.FileWriter(
                    "{}/{}/{}".format(tf_logdir, self.run_id_string, "test"),
                    flush_secs=writer_flush_secs,
                    max_queue=writer_max_queue)