def _save_snapshot(self, policy: TFPolicy) -> None: weights = policy.get_weights() try: self.policy_snapshots[self.snapshot_counter] = weights except IndexError: self.policy_snapshots.append(weights) self.policy_elos[self.snapshot_counter] = self.current_elo self.snapshot_counter = (self.snapshot_counter + 1) % self.window
def add_policy(self, name_behavior_id: str, policy: TFPolicy) -> None: # for saving/swapping snapshots policy.init_load_weights() self.policies[name_behavior_id] = policy # First policy encountered if not self.learning_behavior_name: weights = policy.get_weights() self.current_policy_snapshot = weights self._save_snapshot(policy) self.trainer.add_policy(name_behavior_id, policy) self.learning_behavior_name = name_behavior_id
def add_policy(self, name_behavior_id: str, policy: TFPolicy) -> None: """ Adds policy to trainer. For the first policy added, add a trainer to the policy and set the learning behavior name to name_behavior_id. :param name_behavior_id: Behavior ID that the policy should belong to. :param policy: Policy to associate with name_behavior_id. """ self.policies[name_behavior_id] = policy policy.create_tf_graph() # First policy encountered if not self.learning_behavior_name: weights = policy.get_weights() self.current_policy_snapshot = weights self.trainer.add_policy(name_behavior_id, policy) self._save_snapshot(policy) # Need to save after trainer initializes policy self.learning_behavior_name = name_behavior_id else: # for saving/swapping snapshots policy.init_load_weights()