예제 #1
0
    def _print_train_log(self, scores, overall_start_time, last_log_time, steps):
        current_time = time.time()
        mean_score = np.mean(scores)
        score_std = np.std(scores)
        min_score = np.min(scores)
        max_score = np.max(scores)

        elapsed_time = time.time() - overall_start_time
        global_steps = self._global_steps_counter.get()
        local_steps_per_sec = steps / (current_time - last_log_time)
        global_steps_per_sec = global_steps / elapsed_time
        global_mil_steps_per_hour = global_steps_per_sec * 3600 / 1000000.0
        log(
            "TRAIN: {}(GlobalSteps), {} episodes, mean: {}, min: {}, max: {}, "
            "\nLocalSpd: {:.0f} STEPS/s GlobalSpd: "
            "{} STEPS/s, {:.2f}M STEPS/hour, total elapsed time: {}".format(
                global_steps,
                len(scores),
                green("{:0.3f}±{:0.2f}".format(mean_score, score_std)),
                red("{:0.3f}".format(min_score)),
                blue("{:0.3f}".format(max_score)),
                local_steps_per_sec,
                blue("{:.0f}".format(
                    global_steps_per_sec)),
                global_mil_steps_per_hour,
                sec_to_str(elapsed_time)
            ))
예제 #2
0
    def test(self, episodes_num=None, deterministic=True):
        if episodes_num is None:
            episodes_num = self.test_episodes_per_epoch

        test_start_time = time.time()
        test_rewards = []
        test_actions = []
        test_frameskips = []
        for _ in trange(episodes_num, desc="Testing", file=sys.stdout,
                        leave=False, disable=not self.enable_progress_bar):
            total_reward, actions, frameskips, _ = self.run_episode(deterministic=deterministic, return_stats=True)
            test_rewards.append(total_reward)
            test_actions += actions
            test_frameskips += frameskips

        self.doom_wrapper.reset()
        if self.local_network.has_state():
            self.local_network.reset_state()

        test_end_time = time.time()
        test_duration = test_end_time - test_start_time
        min_score = np.min(test_rewards)
        max_score = np.max(test_rewards)
        mean_score = np.mean(test_rewards)
        score_std = np.std(test_rewards)
        log(
            "TEST: mean: {}, min: {}, max: {}, test time: {}".format(
                green("{:0.3f}±{:0.2f}".format(mean_score, score_std)),
                red("{:0.3f}".format(min_score)),
                blue("{:0.3f}".format(max_score)),
                sec_to_str(test_duration)))
        return test_rewards, test_actions, test_frameskips
예제 #3
0
 def isReady(self, state):
     # print (self.cur_temp, state['time'])
     if self.cur_temp >= self.set_temp:
         print("Finish " + self.name + " at " + sec_to_str(state['time']))
         return True
     else:
         return False
예제 #4
0
    def print_epoch_log(prefix, scores, steps, epoch_time):
        mean_score = np.mean(scores)
        score_std = np.std(scores)
        min_score = np.min(scores)
        max_score = np.max(scores)
        episodes = len(scores)

        steps_per_sec = steps / epoch_time
        mil_steps_per_hour = steps_per_sec * 3600 / 1000000.0
        log("{}: Episodes: {}, mean: {}, min: {}, max: {}, "
            " Speed: {:.0f} STEPS/s, {:.2f}M STEPS/hour, time: {}".format(
                prefix, episodes,
                green("{:0.3f}±{:0.2f}".format(mean_score, score_std)),
                red("{:0.3f}".format(min_score)),
                blue("{:0.3f}".format(max_score)), steps_per_sec,
                mil_steps_per_hour, sec_to_str(epoch_time)))
예제 #5
0
 def check(self, s):
     if self.seqIdx == len(self.sequence):
         return self.isMet(s)
     if self.seqIdx > 0:
         if (s['act_truth'] in [
                 ac['act_truth'] for ac in self.sequence[self.seqIdx - 1]
         ]):
             self.last = s['time']
     if s['time'] - self.last > self.seqWaitTime:
         self.reset()
         return 0
     if (s['act_truth']
             in [ac['act_truth'] for ac in self.sequence[self.seqIdx]]):
         self.seqIdx += 1
         if self.seqIdx == len(self.sequence):
             print(self.name +
                   ' met at time:{}'.format(sec_to_str(s['time'])))
         self.last = s['time']
     return 0
예제 #6
0
    def train(self, session):

        # Prefill replay memory:
        for _ in trange(self.replay_memory.capacity, desc="Filling replay memory",
                        leave=False, disable=not self.enable_progress_bar, file=sys.stdout):
            if self.doom_wrapper.is_terminal():
                self.doom_wrapper.reset()
            s1 = self.doom_wrapper.get_current_state()
            action_frameskip_index = randint(0, self.actions_num * len(self.frameskips) - 1)
            action_index, frameskip = self.get_action_and_frameskip(action_frameskip_index)
            reward = self.doom_wrapper.make_action(action_index, frameskip)
            terminal = self.doom_wrapper.is_terminal()
            s2 = self.doom_wrapper.get_current_state()
            self.replay_memory.add_transition(s1, action_frameskip_index, s2, reward, terminal)

        overall_start_time = time()
        self.network.update_target_network(session)

        log(green("Starting training.\n"))
        while self._epoch <= self._epochs:
            self.doom_wrapper.reset()
            train_scores = []
            test_scores = []
            train_start_time = time()

            for _ in trange(self.train_steps_per_epoch, desc="Training, epoch {}".format(self._epoch),
                            leave=False, disable=not self.enable_progress_bar, file=sys.stdout):
                self.steps += 1
                s1 = self.doom_wrapper.get_current_state()

                if random() <= self.get_current_epsilon():
                    action_frameskip_index = randint(0, self.actions_num*len(self.frameskips) - 1)
                    action_index, frameskip = self.get_action_and_frameskip(action_frameskip_index)
                else:
                    action_frameskip_index = self.network.get_action(session, s1)
                    action_index, frameskip = self.get_action_and_frameskip(action_frameskip_index)

                reward = self.doom_wrapper.make_action(action_index, frameskip)
                terminal = self.doom_wrapper.is_terminal()
                s2 = self.doom_wrapper.get_current_state()
                self.replay_memory.add_transition(s1, action_frameskip_index, s2, reward, terminal)

                if self.steps % self.update_pattern[0] == 0:
                    for _ in range(self.update_pattern[1]):
                        self.network.train_batch(session, self.replay_memory.get_sample())

                if terminal:
                    train_scores.append(self.doom_wrapper.get_total_reward())
                    self.doom_wrapper.reset()
                if self.steps % self.frozen_steps == 0:
                    self.network.update_target_network(session)

            train_time = time() - train_start_time

            log("Epoch {}".format(self._epoch))
            log("Training steps: {}, epsilon: {}".format(self.steps, self.get_current_epsilon()))
            self.print_epoch_log("TRAIN", train_scores, self.train_steps_per_epoch, train_time)
            test_start_time = time()
            test_steps = 0
            # TESTING
            for _ in trange(self.test_episodes_per_epoch, desc="Testing, epoch {}".format(self._epoch),
                            leave=False, disable=not self.enable_progress_bar, file=sys.stdout):
                self.doom_wrapper.reset()
                while not self.doom_wrapper.is_terminal():
                    test_steps += 1
                    state = self.doom_wrapper.get_current_state()
                    action_frameskip_index = self.network.get_action(session, state)
                    action_index, frameskip = self.get_action_and_frameskip(action_frameskip_index)
                    self.doom_wrapper.make_action(action_index, frameskip)

                test_scores.append(self.doom_wrapper.get_total_reward())

            test_time = time() - test_start_time

            self.print_epoch_log("TEST", test_scores, test_steps, test_time)

            if self.write_summaries:
                log("Writing summaries.")
                train_summary = session.run(self._summaries, {self.scores_placeholder: train_scores})
                self._train_writer.add_summary(train_summary, self.steps)
                if self._run_tests:
                    test_summary = session.run(self._summaries, {self.scores_placeholder: test_scores})
                    self._test_writer.add_summary(test_summary, self.steps)

            # Save model
            if self._epoch % self.save_interval == 0:
                savedir = os.path.dirname(self._model_savefile)
                if not os.path.exists(savedir):
                    log("Creating directory: {}".format(savedir))
                    os.makedirs(savedir)
                log("Saving model to: {}".format(self._model_savefile))
                saver = tf.train.Saver()
                saver.save(session, self._model_savefile)

            overall_time = time() - overall_start_time
            log("Total elapsed time: {}\n".format(sec_to_str(overall_time)))
            self._epoch += 1