示例#1
0
    def set_evaluation_feedback(self, feedbacks):
        """Inform optimizer of outcome of a rollout with current weights."""
        return_ = check_feedback(feedbacks, compute_sum=True)
        # Transform reward (e.g. to a log-scale)
        return_ = self.value_transform(return_)

        self.bayes_opt.update(self.parameters, return_)
示例#2
0
    def set_evaluation_feedback(self, feedbacks):
        """Inform optimizer of outcome of a rollout with current weights."""
        return_ = check_feedback(feedbacks, compute_sum=True)
        # Transform reward (e.g. to a log-scale)
        return_ = self.value_transform(return_)

        self.bayes_opt.update(self.parameters, return_)
示例#3
0
    def set_evaluation_feedback(self, feedbacks):
        """Inform optimizer of outcome of a rollout with current weights."""
        return_ = check_feedback(feedbacks, compute_sum=True)
        # Transform reward (e.g., to a log-scale)
        return_ = self.value_transform(return_)

        self.bayes_opt.update(np.hstack((self.context, self.parameters)),
                              return_)

        if self.policy is not None:
            # Policy derived from internal model is no longer valid as the data
            # has changed
            self.policy_fitted = False
示例#4
0
    def set_evaluation_feedback(self, feedbacks):
        """Inform optimizer of outcome of a rollout with current weights."""
        return_ = check_feedback(feedbacks, compute_sum=True)
        # Transform reward (e.g., to a log-scale)
        return_ = self.value_transform(return_)

        self.bayes_opt.update(np.hstack((self.context, self.parameters)),
                              return_)

        if self.policy is not None:
            # Policy derived from internal model is no longer valid as the data
            # has changed
            self.policy_fitted = False
示例#5
0
    def set_evaluation_feedback(self, feedbacks):
        """Inform optimizer of outcome of a rollout with current weights."""
        self.reward = check_feedback(feedbacks, compute_sum=True)

        self.history_theta.append(self.params)
        self.history_R.append(self.reward)

        self.it += 1

        if self.it % self.train_freq == 0:
            theta = np.asarray(self.history_theta)
            R = np.asarray(self.history_R)
            d = solve_dual_reps(R, self.epsilon, self.min_eta)[0]

            self.policy_.fit(None, theta, d)

        self.logger.info("Reward %.6f" % self.reward)

        if self.reward > self.max_return:
            self.max_return = self.reward
            self.best_params = self.params
示例#6
0
    def set_evaluation_feedback(self, feedback):
        """Set feedbacks for the parameter vector.

        Parameters
        ----------
        feedback : list of float
            feedbacks for each step or for the episode, depends on the problem
        """
        k = self.it % self.n_samples_per_update
        self.fitness[k] = check_feedback(feedback, compute_sum=True)
        if self.maximize:
            self.fitness[k] *= -1

        if self.fitness[k] <= self.best_fitness:
            self.best_fitness = self.fitness[k]
            self.best_fitness_it = self.it
            self.best_params[:] = self.samples[k]

        self.it += 1

        if (self.it - self.initial_it) % self.n_samples_per_update == 0:
            self._update(self.samples, self.fitness)
示例#7
0
def test_check_feedback_inf():
    feedbacks = [0, 1, np.inf]
    assert_true(
        np.isinf(check_feedback(feedbacks, compute_sum=True, check_inf=False)))
    assert_raises(ValueError, check_feedback, feedbacks)
示例#8
0
def test_check_feedback_nan():
    feedbacks = [0, 1, np.nan]
    assert_true(
        np.isnan(check_feedback(feedbacks, compute_sum=True, check_nan=False)))
    assert_raises(ValueError, check_feedback, feedbacks)