Exemplo n.º 1
0
 def write_stats(self, category: str, key: str, value: float,
                 step: int) -> None:
     self._maybe_create_summary_writer(category)
     summary = tf.Summary()
     summary.value.add(tag="{}".format(key), simple_value=value)
     self.summary_writers[category].add_summary(summary, step)
     self.summary_writers[category].flush()
Exemplo n.º 2
0
 def write_stats(self, category: str, values: Dict[str, StatsSummary],
                 step: int) -> None:
     self._maybe_create_summary_writer(category)
     for key, value in values.items():
         summary = tf.Summary()
         summary.value.add(tag="{}".format(key), simple_value=value.mean)
         self.summary_writers[category].add_summary(summary, step)
         self.summary_writers[category].flush()
Exemplo n.º 3
0
 def write_summary(
     self, global_step: int, delta_train_start: float, lesson_num: int = 0
 ) -> None:
     """
     Saves training statistics to Tensorboard.
     :param delta_train_start:  Time elapsed since training started.
     :param lesson_num: Current lesson number in curriculum.
     :param global_step: The number of steps the simulation has been going for
     """
     if (
         global_step % self.trainer_parameters["summary_freq"] == 0
         and global_step != 0
     ):
         is_training = (
             "Training."
             if self.is_training and self.get_step <= self.get_max_steps
             else "Not Training."
         )
         step = min(self.get_step, self.get_max_steps)
         if len(self.stats["Environment/Cumulative Reward"]) > 0:
             mean_reward = np.mean(self.stats["Environment/Cumulative Reward"])
             LOGGER.info(
                 " {}: {}: Step: {}. "
                 "Time Elapsed: {:0.3f} s "
                 "Mean "
                 "Reward: {:0.3f}"
                 ". Std of Reward: {:0.3f}. {}".format(
                     self.run_id,
                     self.brain_name,
                     step,
                     delta_train_start,
                     mean_reward,
                     np.std(self.stats["Environment/Cumulative Reward"]),
                     is_training,
                 )
             )
             set_gauge(f"{self.brain_name}.mean_reward", mean_reward)
         else:
             LOGGER.info(
                 " {}: {}: Step: {}. No episode was completed since last summary. {}".format(
                     self.run_id, self.brain_name, step, is_training
                 )
             )
         summary = tf.Summary()
         for key in self.stats:
             if len(self.stats[key]) > 0:
                 stat_mean = float(np.mean(self.stats[key]))
                 summary.value.add(tag="{}".format(key), simple_value=stat_mean)
                 self.stats[key] = []
         summary.value.add(tag="Environment/Lesson", simple_value=lesson_num)
         self.summary_writer.add_summary(summary, step)
         self.summary_writer.flush()