示例#1
0
    def optimize_ei(self, bb_alpha, grid, lower, upper, incumbent,
                    bb_alpha_samples):

        X = T.matrix('X', dtype=theano.config.floatX)
        log_ei = self.sparse_gp.compute_log_ei(X, incumbent)
        pred_log_probs = LogSumExp(bb_alpha.network.output(X), 0) + T.log(
            1.0 / bb_alpha_samples)

        function_grid = theano.function(
            [X],
            -log_ei - T.reshape(pred_log_probs[:, :, 1], [T.shape(X)[0], 1]),
            allow_input_downcast=True)
        function_scalar = theano.function(
            [X],
            -log_ei[0, 0] -
            T.reshape(pred_log_probs[:, :, 1], [T.shape(X)[0], 1])[0, 0],
            allow_input_downcast=True)
        function_scalar_gradient = theano.function(
            [X],
            -T.grad(
                log_ei[0, 0] + T.reshape(pred_log_probs[:, :, 1],
                                         [T.shape(X)[0], 1])[0, 0], X),
            allow_input_downcast=True)

        return global_optimization(grid, lower, upper, function_grid,
                                   function_scalar,
                                   function_scalar_gradient)[0]
    def get_incumbent(self, bb_alpha, grid, bb_alpha_samples):

        self.sparse_gp.compute_output()
        m, v = self.sparse_gp.getPredictedValues()

        X = T.matrix('X', dtype=theano.config.floatX)
        pred_probs = T.exp(
            LogSumExp(bb_alpha.network.output(X), 0) +
            T.log(1.0 / bb_alpha_samples))

        function_grid = theano.function([X],
                                        m,
                                        givens={
                                            self.input_means: X,
                                            self.input_vars: 0 * X
                                        })
        function_grid_prob = theano.function([X],
                                             T.reshape(pred_probs[:, :, 1],
                                                       [T.shape(X)[0], 1]),
                                             givens={
                                                 self.input_means: X,
                                                 self.input_vars: 0 * X
                                             })

        m_on_grid = function_grid(grid)
        p_on_grid = function_grid_prob(grid)

        # obtain row in grid for which m_on_grid is smallest subject to p_on_grid larger than 0.95
        # if all p_on_grid smaller than 0.95 then obtain row in grid for which p_on_grid is the largest

        if np.max(p_on_grid) < 0.95:
            grid_row_val = grid[np.argmax(p_on_grid)]

        else:
            feasible_point_indices = [
                i for i in range(len(p_on_grid)) if p_on_grid[i] >= 0.95
            ]
            grid_row_val = grid[[
                i for i in feasible_point_indices if m_on_grid[i] == max(
                    [m_on_grid[i] for i in feasible_point_indices])
            ][0]]

        # return value of function function_grid evaluated on the resulting row from grid
        # me: returns array of shape [1,1] for the incumbent best point.

        return function_grid(grid_row_val.reshape(1, 2))
示例#3
0
    def get_incumbent(self, bb_alpha, grid, bb_alpha_samples):

        self.sparse_gp.compute_output()
        m, v = self.sparse_gp.getPredictedValues()

        X = T.matrix('X', dtype=theano.config.floatX)
        pred_probs = T.exp(
            LogSumExp(bb_alpha.network.output(X), 0) +
            T.log(1.0 / bb_alpha_samples))

        function_grid = theano.function([X],
                                        m,
                                        givens={
                                            self.input_means: X,
                                            self.input_vars: 0 * X
                                        },
                                        allow_input_downcast=True)
        function_grid_prob = theano.function([X],
                                             T.reshape(pred_probs[:, :, 1],
                                                       [T.shape(X)[0], 1]),
                                             givens={
                                                 self.input_means: X,
                                                 self.input_vars: 0 * X
                                             },
                                             allow_input_downcast=True)

        m_on_grid = function_grid(grid)
        p_on_grid = function_grid_prob(grid)

        # obtain row in grid for which m_on_grid is smallest subject to p_on_grid larger than 0.95
        # if all p_on_grid smaller than 0.95 then obtain row in grid for which p_on_grid is the largest

        if np.max(p_on_grid) < 0.95:
            grid_row_val = grid[np.argmax(p_on_grid)]

        else:
            feasible_point_indices = np.where(p_on_grid >= 0.95)[0]
            max_feasible_index = np.where(m_on_grid == np.max(
                m_on_grid.take(feasible_point_indices)))[0][0]
            grid_row_val = grid[np.int64(max_feasible_index)]

        # return value of function function_grid evaluated on the resulting row from grid
        # me: returns array of shape [1,1] for the incumbent best point.
        # hard-coding in the features (56) here.

        return function_grid(grid_row_val.reshape(1, 56))
示例#4
0
    def batched_greedy_ei(self,
                          bb_alpha,
                          q,
                          lower,
                          upper,
                          bb_alpha_samples,
                          n_samples=1):
        """
        Subroutine to select data points subject to constraint

        bb_alpha: instance of BB_Alpha class

        """

        self.setForPrediction()

        grid_size = 10000
        grid = casting(lower + np.random.rand(grid_size, self.d_input) *
                       (upper - lower))
        grid = np.concatenate([
            grid, self.input_means_numpy
        ], 0)  # 28 July new line added to put the training data with the grid.

        incumbent = self.get_incumbent(bb_alpha, grid, bb_alpha_samples)
        X_numpy = self.optimize_ei(bb_alpha, grid, lower, upper, incumbent,
                                   bb_alpha_samples)
        randomness_numpy = casting(0 * np.random.randn(
            X_numpy.shape[0], n_samples).astype(theano.config.floatX))

        randomness = theano.shared(value=randomness_numpy.astype(
            theano.config.floatX),
                                   name='randomness',
                                   borrow=True)
        X = theano.shared(value=X_numpy.astype(theano.config.floatX),
                          name='X',
                          borrow=True)
        x = T.matrix('x', dtype=theano.config.floatX)

        log_ei = self.sparse_gp.compute_log_averaged_ei(x, X, incumbent)
        pred_log_probs = LogSumExp(bb_alpha.network.output(x), 0) + T.log(
            1.0 / bb_alpha_samples
        )  # 2-D array of size (n_samples, 2) where the column 1 gives the log probability of the constraint being unsatisfied and column two gives the log probabilty of the constraint being satisfied

        function_grid = theano.function(
            [x],
            -log_ei -
            T.reshape(pred_log_probs[:, :, 1], [T.shape(x)[0], 1])[:, 0],
            allow_input_downcast=True
        )  # indices for pred_log_probs give the log probability of the constraint being satisfied
        function_scalar = theano.function(
            [x],
            -log_ei[0] -
            T.reshape(pred_log_probs[:, :, 1], [T.shape(x)[0], 1])[0, 0],
            allow_input_downcast=True)
        function_scalar_gradient = theano.function(
            [x],
            -T.grad(
                log_ei[0] + T.reshape(pred_log_probs[:, :, 1],
                                      [T.shape(x)[0], 1])[0, 0], x),
            allow_input_downcast=True)

        # We optimize the ei in a greedy manner

        for i in range(1, q):

            new_point = global_optimization(grid, lower, upper, function_grid,
                                            function_scalar,
                                            function_scalar_gradient)[0]
            X_numpy = casting(np.concatenate([X_numpy, new_point], 0))
            randomness_numpy = casting(0 * np.random.randn(
                X_numpy.shape[0], n_samples).astype(theano.config.floatX))
            X.set_value(X_numpy)
            randomness.set_value(randomness_numpy)
            print(i, X_numpy)

        m, v = self.predict(X_numpy, 0 * X_numpy)

        print("Predictive mean at selected points:\n", m)

        return X_numpy
    def __init__(self, layer_sizes, n_samples, alpha, learning_rate, v_prior,
                 batch_size, X_train, y_train, N_train, X_val, y_val, N_val):

        self.batch_size = batch_size
        self.N_train = N_train
        self.X_train = X_train
        self.y_train = y_train

        self.N_val = N_val
        self.X_val = X_val
        self.y_val = y_val

        # We create the network

        self.network = network.Network(layer_sizes, n_samples, v_prior,
                                       N_train)

        # index to a batch

        index = T.lscalar()

        # We create the input and output variables. The input will be a minibatch replicated n_samples times

        self.x = T.matrix('x')
        self.y = T.vector('y', dtype='int32')

        # The logarithm of the values for the likelihood factors

        ll = self.network.log_likelihood_values(self.x, self.y)

        # The energy function for black-box alpha

        self.estimate_marginal_ll = -1.0 * N_train / (self.x.shape[0] * alpha) * \
            T.sum(LogSumExp(alpha * (ll - self.network.log_f_hat()), 0) +
                  T.log(1.0 / n_samples)) - self.network.log_normalizer_q() + \
            self.network.log_Z_prior()

        # We create a theano function for updating q

        self.process_minibatch = theano.function(
            [index],
            self.estimate_marginal_ll,
            updates=adam(self.estimate_marginal_ll, self.network.params,
                         learning_rate),
            givens={
                self.x:
                self.X_train[index * batch_size:(index + 1) * batch_size],
                self.y:
                self.y_train[index * batch_size:(index + 1) * batch_size]
            })

        # We create a theano function for making predictions

        self.error_minibatch_train = theano.function(
            [index],
            T.mean(
                T.neq(
                    T.argmax((LogSumExp(self.network.output(self.x), 0) +
                              T.log(1.0 / n_samples))[0, :, :],
                             axis=1), self.y)),
            givens={
                self.x:
                self.X_train[index * batch_size:(index + 1) * batch_size],
                self.y:
                self.y_train[index * batch_size:(index + 1) * batch_size]
            })

        self.error_minibatch_val = theano.function(
            [index],
            T.mean(
                T.neq(
                    T.argmax((LogSumExp(self.network.output(self.x), 0) +
                              T.log(1.0 / n_samples))[0, :, :],
                             axis=1), self.y)),
            givens={
                self.x:
                self.X_val[index * batch_size:(index + 1) * batch_size],
                self.y: self.y_val[index * batch_size:(index + 1) * batch_size]
            })

        self.ll_minibatch_val = theano.function(
            [index],
            T.mean(LogSumExp(ll, 0) + T.log(1.0 / n_samples)),
            givens={
                self.x:
                self.X_val[index * batch_size:(index + 1) * batch_size],
                self.y: self.y_val[index * batch_size:(index + 1) * batch_size]
            })

        # We create a theano function for outputting prediction probabilities

        X = T.matrix('X', dtype=theano.config.floatX)
        self.prediction_probs = theano.function(
            [X],
            T.exp(
                LogSumExp(self.network.output(X), 0) + T.log(1.0 / n_samples)))

        # We create a theano function for outputing prediction log probabilities

        self.pred_log_probs = theano.function(
            [X],
            LogSumExp(self.network.output(X), 0) + T.log(1.0 / n_samples))

        self.network.update_randomness()