示例#1
0
 def __init__(
     self,
     actions: List[str],
     report_interval: int = 100,
     target_action_distribution: Optional[List[float]] = None,
     recent_window_size: int = 100,
 ):
     self.value_list_observers = {
         "cpe_results": ValueListObserver("cpe_details")
     }
     self.aggregating_observers = OrderedDict(
         (name, IntervalAggregatingObserver(report_interval, aggregator))
         for name, aggregator in [
             ("td_loss", agg.MeanAggregator("td_loss")),
             ("reward_loss", agg.MeanAggregator("reward_loss")),
             (
                 "model_values",
                 agg.FunctionsByActionAggregator("model_values", actions, {
                     "mean": torch.mean,
                     "std": torch.std
                 }),
             ),
             ("logged_action",
              agg.ActionCountAggregator("logged_actions", actions)),
             (
                 "model_action",
                 agg.ActionCountAggregator("model_action_idxs", actions),
             ),
             ("recent_rewards",
              agg.RecentValuesAggregator("logged_rewards")),
         ] + [(f"{key}_tb",
               agg.TensorBoardActionCountAggregator(key, title, actions))
              for key, title in [
                  ("logged_actions", "logged"),
                  ("model_action_idxs", "model"),
              ]] +
         [(f"{key}_tb",
           agg.TensorBoardHistogramAndMeanAggregator(key, log_key))
          for key, log_key in [
              ("td_loss", "td_loss"),
              ("reward_loss", "reward_loss"),
              ("logged_propensities", "propensities/logged"),
              ("logged_rewards", "reward/logged"),
          ]] + [(
              f"{key}_tb",
              agg.TensorBoardActionHistogramAndMeanAggregator(
                  key, category, title, actions),
          ) for key, category, title in [
              ("model_propensities", "propensities", "model"),
              ("model_rewards", "reward", "model"),
              ("model_values", "value", "model"),
          ]])
     self.last_epoch_end_num_batches = 0
     self.num_data_points_per_epoch = None
     epoch_end_observer = EpochEndObserver(self._epoch_end_callback)
     super().__init__(
         list(self.value_list_observers.values()) +
         list(self.aggregating_observers.values()) + [epoch_end_observer])
     self.target_action_distribution = target_action_distribution
     self.recent_window_size = recent_window_size
示例#2
0
 def aggregating_observers(self):
     return {
         name: IntervalAggregatingObserver(self.report_interval, aggregator)
         for name, aggregator in itertools.chain(
             [
                 ("mse_loss_per_batch", agg.MeanAggregator("mse_loss")),
                 (
                     "step_entropy_loss_per_batch",
                     agg.MeanAggregator("step_entropy_loss"),
                 ),
                 (
                     "q_values_per_batch",
                     agg.FunctionsByActionAggregator(
                         "q_values", self.action_names,
                         {"mean": torch.mean}),
                 ),
                 ("eval_mse_loss_per_batch",
                  agg.MeanAggregator("eval_mse_loss")),
                 (
                     "eval_step_entropy_loss_per_batch",
                     agg.MeanAggregator("eval_step_entropy_loss"),
                 ),
                 (
                     "eval_q_values_per_batch",
                     agg.FunctionsByActionAggregator(
                         "eval_q_values", self.action_names,
                         {"mean": torch.mean}),
                 ),
                 (
                     "eval_action_distribution_per_batch",
                     agg.FunctionsByActionAggregator(
                         "eval_action_distribution",
                         self.action_names,
                         {"mean": torch.mean},
                     ),
                 ),
             ],
             [(
                 f"{key}_tb",
                 agg.TensorBoardHistogramAndMeanAggregator(key, log_key),
             ) for key, log_key in [
                 ("mse_loss", "mse_loss"),
                 ("step_entropy_loss", "step_entropy_loss"),
                 ("eval_mse_loss", "eval_mse_loss"),
                 ("eval_step_entropy_loss", "eval_step_entropy_loss"),
             ]],
             [(
                 f"{key}_tb",
                 agg.TensorBoardActionHistogramAndMeanAggregator(
                     key, category, title, self.action_names),
             ) for key, category, title in [
                 ("q_values", "q_values", "training"),
                 ("eval_q_values", "q_values", "eval"),
                 ("eval_action_distribution", "action_distribution",
                  "eval"),
             ]],
         )
     }
示例#3
0
 def __init__(
     self,
     actions: List[str],
     report_interval: int = 100,
     target_action_distribution: Optional[List[float]] = None,
     recent_window_size: int = 100,
 ):
     self.value_list_observers = {}
     self.aggregating_observers = {
         **{
             "cpe_results":
             IntervalAggregatingObserver(1, agg.ListAggregator("cpe_details")),
         },
         **{
             name: IntervalAggregatingObserver(report_interval, aggregator)
             for name, aggregator in itertools.chain(
                 [
                     ("td_loss", agg.MeanAggregator("td_loss")),
                     ("reward_loss", agg.MeanAggregator("reward_loss")),
                     (
                         "model_values",
                         agg.FunctionsByActionAggregator(
                             "model_values",
                             actions,
                             {
                                 "mean": torch.mean,
                                 "std": torch.std
                             },
                         ),
                     ),
                     (
                         "logged_action",
                         agg.ActionCountAggregator("logged_actions", actions),
                     ),
                     (
                         "model_action",
                         agg.ActionCountAggregator("model_action_idxs", actions),
                     ),
                     (
                         "recent_rewards",
                         agg.RecentValuesAggregator("logged_rewards"),
                     ),
                 ],
                 [(
                     f"{key}_tb",
                     agg.TensorBoardActionCountAggregator(
                         key, title, actions),
                 ) for key, title in [
                      ("logged_actions", "logged"),
                      ("model_action_idxs", "model"),
                  ]],
                 [(
                     f"{key}_tb",
                     agg.TensorBoardHistogramAndMeanAggregator(
                         key, log_key),
                 ) for key, log_key in [
                      ("td_loss", "td_loss"),
                      ("reward_loss", "reward_loss"),
                      ("logged_propensities", "propensities/logged"),
                      ("logged_rewards", "reward/logged"),
                  ]],
                 [(
                     f"{key}_tb",
                     agg.TensorBoardActionHistogramAndMeanAggregator(
                         key, category, title, actions),
                 ) for key, category, title in [
                      ("model_propensities", "propensities", "model"),
                      ("model_rewards", "reward", "model"),
                      ("model_values", "value", "model"),
                  ]],
             )
         },
     }
     super().__init__(self.value_list_observers, self.aggregating_observers)
     self.target_action_distribution = target_action_distribution
     self.recent_window_size = recent_window_size