示例#1
0
 def score_cpe(self, metric_name, edp: EvaluationDataPage,
               cpe_estimate_set: CpeEstimateSet):
     cpe_estimate_set.direct_method, cpe_estimate_set.inverse_propensity, cpe_estimate_set.doubly_robust = self.doubly_robust_estimator.estimate(
         edp)
     cpe_estimate_set.sequential_doubly_robust = self.sequential_doubly_robust_estimator.estimate(
         edp)
     cpe_estimate_set.weighted_doubly_robust = self.weighted_sequential_doubly_robust_estimator.estimate(
         edp, num_j_steps=1, whether_self_normalize_importance_weights=True)
     cpe_estimate_set.magic = self.weighted_sequential_doubly_robust_estimator.estimate(
         edp,
         num_j_steps=Evaluator.NUM_J_STEPS_FOR_MAGIC_ESTIMATOR,
         whether_self_normalize_importance_weights=True,
     )
示例#2
0
 def score_cpe(
     self, metric_name, edp: EvaluationDataPage, cpe_estimate_set: CpeEstimateSet
 ):
     cpe_estimate_set.direct_method, cpe_estimate_set.inverse_propensity, cpe_estimate_set.doubly_robust = self.doubly_robust_estimator.estimate(
         edp
     )
     cpe_estimate_set.sequential_doubly_robust = self.sequential_doubly_robust_estimator.estimate(
         edp
     )
     cpe_estimate_set.weighted_doubly_robust = self.weighted_sequential_doubly_robust_estimator.estimate(
         edp, num_j_steps=1, whether_self_normalize_importance_weights=True
     )
     cpe_estimate_set.magic = self.weighted_sequential_doubly_robust_estimator.estimate(
         edp,
         num_j_steps=Evaluator.NUM_J_STEPS_FOR_MAGIC_ESTIMATOR,
         whether_self_normalize_importance_weights=True,
     )
示例#3
0
    def evaluate_post_training(self, edp: EvaluationDataPage) -> CpeDetails:
        cpe_details = CpeDetails()

        self.score_cpe("Reward", edp, cpe_details.reward_estimates)

        if (
            self.metrics_to_score is not None
            and edp.logged_metrics is not None
            and self.action_names is not None
        ):
            for i, metric in enumerate(self.metrics_to_score):
                logger.info(
                    "--------- Running CPE on metric: {} ---------".format(metric)
                )

                metric_reward_edp = edp.set_metric_as_reward(i, len(self.action_names))

                cpe_details.metric_estimates[metric] = CpeEstimateSet()
                self.score_cpe(
                    metric, metric_reward_edp, cpe_details.metric_estimates[metric]
                )

        if self.action_names is not None:
            value_means = edp.model_values.mean(dim=0)
            cpe_details.q_value_means = {
                action: float(value_means[i])
                for i, action in enumerate(self.action_names)
            }
            value_stds = edp.model_values.std(dim=0)
            cpe_details.q_value_stds = {
                action: float(value_stds[i])
                for i, action in enumerate(self.action_names)
            }
            max_q_idxs = edp.model_values.argmax(dim=1)
            cpe_details.action_distribution = {
                action: float((max_q_idxs == i).sum()) / max_q_idxs.shape[0]
                for i, action in enumerate(self.action_names)
            }

        # Compute MC Loss on Aggregate Reward
        cpe_details.mc_loss = float(
            torch.mean(torch.abs(edp.logged_values - edp.model_values))
        )

        return cpe_details
示例#4
0
 def score_cpe(self, metric_name, edp: EvaluationDataPage):
     direct_method, inverse_propensity, doubly_robust = self.doubly_robust_estimator.estimate(
         edp)
     sequential_doubly_robust = self.sequential_doubly_robust_estimator.estimate(
         edp)
     weighted_doubly_robust = self.weighted_sequential_doubly_robust_estimator.estimate(
         edp, num_j_steps=1, whether_self_normalize_importance_weights=True)
     magic = self.weighted_sequential_doubly_robust_estimator.estimate(
         edp,
         num_j_steps=Evaluator.NUM_J_STEPS_FOR_MAGIC_ESTIMATOR,
         whether_self_normalize_importance_weights=True,
     )
     return CpeEstimateSet(
         direct_method=direct_method,
         inverse_propensity=inverse_propensity,
         doubly_robust=doubly_robust,
         sequential_doubly_robust=sequential_doubly_robust,
         weighted_doubly_robust=weighted_doubly_robust,
         magic=magic,
     )
示例#5
0
    def evaluate_post_training(self, edp: EvaluationDataPage) -> CpeDetails:
        cpe_details = CpeDetails()

        self.score_cpe("Reward", edp, cpe_details.reward_estimates)

        if self.metrics_to_score is not None:
            for i, metric in enumerate(self.metrics_to_score):
                logger.info(
                    "--------- Running CPE on metric: {} ---------".format(
                        metric))

                metric_reward_edp = edp.set_metric_as_reward(
                    i, len(self.action_names))

                cpe_details.metric_estimates[metric] = CpeEstimateSet()
                self.score_cpe(metric, metric_reward_edp,
                               cpe_details.metric_estimates[metric])

        # Compute MC Loss on Aggregate Reward
        cpe_details.mc_loss = float(
            torch.mean(torch.abs(edp.logged_values - edp.model_values)))

        return cpe_details