Пример #1
0
    def evaluate(
        self,
        evaluator: Evaluator,
        logged_actions: Optional[np.ndarray],
        logged_propensities: Optional[np.ndarray],
        logged_rewards: Optional[np.ndarray],
        logged_values: Optional[np.ndarray],
    ):
        self.model_propensities, model_values_on_logged_actions, maxq_action_idxs = (
            None,
            None,
            None,
        )
        if self.all_action_scores is not None:
            self.all_action_scores = self.all_action_scores.cpu().numpy()
            self.model_propensities = Evaluator.softmax(
                self.all_action_scores, self.rl_temperature)
            maxq_action_idxs = self.all_action_scores.argmax(axis=1)
            if logged_actions is not None:
                model_values_on_logged_actions = np.sum(
                    (logged_actions * self.all_action_scores),
                    axis=1,
                    keepdims=True)

        evaluator.report(
            self.loss.cpu().numpy(),
            logged_actions,
            logged_propensities,
            logged_rewards,
            logged_values,
            self.model_propensities,
            self.all_action_scores,
            model_values_on_logged_actions,
            maxq_action_idxs,
        )
Пример #2
0
    def evaluate(
        self,
        evaluator: Evaluator,
        logged_actions: Optional[np.ndarray],
        logged_propensities: Optional[np.ndarray],
        logged_values: Optional[np.ndarray],
    ):
        workspace.RunNet(self.all_q_score_model.net)
        all_action_scores = workspace.FetchBlob(self.all_q_score_output)
        maxq_action_idxs = workspace.FetchBlob(self.maxq_action_idxs)
        model_values_on_logged_actions = np.sum(
            (logged_actions * all_action_scores), axis=1, keepdims=True)
        model_propensities = Evaluator.softmax(all_action_scores,
                                               self.rl_temperature)
        logged_rewards = workspace.FetchBlob("rewards")

        evaluator.report(
            workspace.FetchBlob(self.loss_blob),
            logged_actions,
            logged_propensities,
            logged_rewards,
            logged_values,
            model_propensities,
            all_action_scores,
            model_values_on_logged_actions,
            maxq_action_idxs,
        )
 def evaluate(self, evaluator: Evaluator, logged_value: Optional[torch.Tensor]):
     evaluator.report(
         self.loss.cpu().numpy(),
         None,
         None,
         None,
         logged_value.cpu().numpy() if logged_value is not None else None,
         None,
         None,
         None,
         self.all_action_scores.cpu().numpy(),
         None,
     )
Пример #4
0
 def evaluate(self, evaluator: Evaluator):
     # FIXME
     evaluator.report(
         self.loss.cpu().numpy(),
         None,
         None,
         None,
         None,
         None,
         None,
         None,
         self.all_action_scores.cpu().numpy(),
         None,
     )
Пример #5
0
 def evaluate(
     self,
     evaluator: Evaluator,
     logged_actions: Optional[np.ndarray],
     logged_propensities: Optional[np.ndarray],
     logged_values: Optional[np.ndarray],
 ):
     evaluator.report(
         self.loss.cpu().numpy(),
         None,
         None,
         None,
         logged_values,
         None,
         None,
         self.all_action_scores.cpu().numpy(),
         None,
     )
Пример #6
0
    def evaluate(
        self,
        evaluator: Evaluator,
        logged_actions: torch.Tensor,
        logged_propensities: Optional[torch.Tensor],
        logged_rewards: torch.Tensor,
        logged_values: Optional[torch.Tensor],
    ):
        self.model_propensities, model_values_on_logged_actions, maxq_action_idxs = (
            None,
            None,
            None,
        )
        if self.all_action_scores is not None:
            self.all_action_scores = self.all_action_scores
            self.model_propensities = Evaluator.softmax(
                self.all_action_scores.cpu().numpy(), self.rl_temperature
            )
            maxq_action_idxs = self.all_action_scores.argmax(dim=1, keepdim=True)
            if logged_actions is not None:
                model_values_on_logged_actions = (
                    torch.sum(
                        (logged_actions * self.all_action_scores), dim=1, keepdim=True
                    )
                    .cpu()
                    .numpy()
                )

        evaluator.report(
            self.loss.cpu().numpy(),
            logged_actions.cpu().numpy(),
            logged_propensities.cpu().numpy()
            if logged_propensities is not None
            else None,
            logged_rewards.cpu().numpy(),
            logged_values.cpu().numpy() if logged_values is not None else None,
            self.model_propensities,
            self.reward_estimates.cpu().numpy(),
            self.all_action_scores.cpu().numpy(),
            model_values_on_logged_actions,
            maxq_action_idxs,
        )
Пример #7
0
    def evaluate(
        self,
        evaluator: Evaluator,
        logged_actions: Optional[np.ndarray],
        logged_propensities: Optional[np.ndarray],
        logged_values: Optional[np.ndarray],
    ):
        workspace.RunNet(self.q_score_model.net)
        model_values_on_logged_actions = workspace.FetchBlob(self.q_score_output)

        evaluator.report(
            workspace.FetchBlob(self.loss_blob),
            None,
            None,
            None,
            logged_values,
            None,
            None,
            model_values_on_logged_actions,
        )