Exemplo n.º 1
0
class ScalarMeanFunction(MeanFunction):
    """
    Mean function defined as a scalar (fitted while optimizing the marginal
    likelihood).

    :param initial_mean_value: A scalar to initialize the value of the mean

    """
    def __init__(self, initial_mean_value = INITIAL_MEAN_VALUE, **kwargs):
        super(ScalarMeanFunction, self).__init__(**kwargs)

        # Even though we do not apply specific transformation to the mean value
        # we use an encoding to handle in a consistent way the box constraints
        # of Gluon parameters (like bandwidths or residual noise variance)
        self.encoding = IdentityScalarEncoding(
            init_val=initial_mean_value, regularizer=Normal(0.0, 1.0))
        with self.name_scope():
            self.mean_value_internal = register_parameter(
                self.params, 'mean_value', self.encoding)

    def hybrid_forward(self, F, X, mean_value_internal):
        """
        Actual computation of the scalar mean function
        We compute mean_value * vector_of_ones, whose dimensions are given by
        the the first column of X

        :param F: mx.sym or mx.nd
        :param X: input data of size (n,d) for which we want to compute the
            mean (here, only useful to extract the right dimension)

        """
        mean_value = self.encoding.get(F, mean_value_internal)
        return F.broadcast_mul(F.ones_like(F.slice_axis(
            F.BlockGrad(X), axis=1, begin=0, end=1)), mean_value)

    def param_encoding_pairs(self):
        return [(self.mean_value_internal, self.encoding)]

    def get_mean_value(self):
        return encode_unwrap_parameter(
            mx.nd, self.mean_value_internal, self.encoding).asscalar()

    def set_mean_value(self, mean_value):
        self.encoding.set(self.mean_value_internal, mean_value)

    def get_params(self):
        return {'mean_value': self.get_mean_value()}

    def set_params(self, param_dict):
        self.set_mean_value(param_dict['mean_value'])
Exemplo n.º 2
0
class FabolasKernelFunction(KernelFunction):
    """
    The kernel function proposed in:

        Klein, A., Falkner, S., Bartels, S., Hennig, P., & Hutter, F. (2016).
        Fast Bayesian Optimization of Machine Learning Hyperparameters
        on Large Datasets, in AISTATS 2017.
        ArXiv:1605.07079 [Cs, Stat]. Retrieved from http://arxiv.org/abs/1605.07079

    Please note this is only one of the components of the factorized kernel
    proposed in the paper. This is the finite-rank ("degenerate") kernel for
    modelling data subset fraction sizes. Defined as:

        k(x, y) = (U phi(x))^T (U phi(y)),  x, y in [0, 1],
        phi(x) = [1, (1 - x)^2]^T,  U = [[u1, u3], [0, u2]] upper triangular,
        u1, u2 > 0.
    """

    def __init__(self, encoding_type=DEFAULT_ENCODING,
                 u1_init=1.0, u3_init=0.0, **kwargs):

        super(FabolasKernelFunction, self).__init__(dimension=1, **kwargs)

        self.encoding_u12 = create_encoding(
            encoding_type, u1_init, COVARIANCE_SCALE_LOWER_BOUND,
            COVARIANCE_SCALE_UPPER_BOUND, 1, None)
        # This is not really needed, but param_encoding_pairs needs an encoding
        # for each parameter
        self.encoding_u3 = IdentityScalarEncoding(init_val=u3_init)
        with self.name_scope():
            self.u1_internal = register_parameter(
                self.params, 'u1', self.encoding_u12)
            self.u2_internal = register_parameter(
                self.params, 'u2', self.encoding_u12)
            self.u3_internal = register_parameter(
                self.params, 'u3', self.encoding_u3)

    @staticmethod
    def _compute_factor(F, x, u1, u2, u3):
        tvec = (1.0 - x) ** 2
        return F.concat(
            F.broadcast_add(F.broadcast_mul(tvec, u3), u1),
            F.broadcast_mul(tvec, u2), dim=1)

    def hybrid_forward(self, F, X1, X2, u1_internal, u2_internal, u3_internal):
        X1 = self._check_input_shape(F, X1)

        u1 = self.encoding_u12.get(F, u1_internal)
        u2 = self.encoding_u12.get(F, u2_internal)
        u3 = self.encoding_u3.get(F, u3_internal)

        mat1 = self._compute_factor(F, X1, u1, u2, u3)
        if X2 is X1:
            return F.linalg.syrk(mat1, transpose=False)
        else:
            X2 = self._check_input_shape(F, X2)
            mat2 = self._compute_factor(F, X2, u1, u2, u3)
            return F.dot(mat1, mat2, transpose_a=False, transpose_b=True)

    def _get_pars(self, F, X):
        u1 = encode_unwrap_parameter(F, self.u1_internal, self.encoding_u12, X)
        u2 = encode_unwrap_parameter(F, self.u2_internal, self.encoding_u12, X)
        u3 = encode_unwrap_parameter(F, self.u3_internal, self.encoding_u3, X)
        return (u1, u2, u3)

    def diagonal(self, F, X):
        X = self._check_input_shape(F, X)
        u1, u2, u3 = self._get_pars(F, X)
        mat = self._compute_factor(F, X, u1, u2, u3)
        return F.sum(mat ** 2, axis=1)

    def diagonal_depends_on_X(self):
        return True

    def param_encoding_pairs(self):
        return [
            (self.u1_internal, self.encoding_u12),
            (self.u2_internal, self.encoding_u12),
            (self.u3_internal, self.encoding_u3)
        ]

    def get_params(self):
        values = list(self._get_pars(mx.nd, None))
        keys = ['u1', 'u2', 'u3']
        return {k: v.reshape((1,)).asscalar() for k, v in zip(keys, values)}

    def set_params(self, param_dict):
        self.encoding_u12.set(self.u1_internal, param_dict['u1'])
        self.encoding_u12.set(self.u2_internal, param_dict['u2'])
        self.encoding_u3.set(self.u3_internal, param_dict['u3'])
Exemplo n.º 3
0
class Coregionalization(KernelFunction):
    """
    k(i, j) = K_{ij}, where K = W W^T + diag(rho).
    """
    def __init__(self, num_outputs, num_factors=16,
                 rho_init=INITIAL_NOISE_VARIANCE,
                 encoding_type=DEFAULT_ENCODING, **kwargs):

        super(Coregionalization, self).__init__(dimension=1, **kwargs)

        self.encoding_W_flat = IdentityScalarEncoding(
            dimension=num_outputs * num_factors)
        self.encoding_rho = create_encoding(encoding_type, rho_init,
                                            NOISE_VARIANCE_LOWER_BOUND,
                                            NOISE_VARIANCE_UPPER_BOUND,
                                            dimension=1)

        self.num_outputs = num_outputs
        self.num_factors = num_factors

        with self.name_scope():
            self.W_flat_internal = self.params.get(
                "W_internal", shape=(num_outputs * num_factors,),
                init=mx.init.Normal(),  # TODO: Use Xavier initialization here
                dtype=DATA_TYPE)
            self.rho_internal = self.params.get(
                "rho_internal", shape=(1,),
                init=mx.init.Constant(self.encoding_rho.init_val_int),
                dtype=DATA_TYPE)

    @staticmethod
    def _meshgrid(F, a, b):
        """
        Return coordinate matrices from coordinate vectors.

        Like https://docs.scipy.org/doc/numpy/reference/generated/numpy.meshgrid.html
        (with Cartesian indexing), but only supports two coordinate vectors as input.

        :param a: 1-D array representing the coordinates of a grid (length n) 
        :param b: 1-D array representing the coordinates of a grid (length m) 
        :return: coordinate matrix. 3-D array of shape (2, m, n).
        """
        aa = F.broadcast_mul(F.ones_like(F.expand_dims(a, axis=-1)), b)
        bb = F.broadcast_mul(F.ones_like(F.expand_dims(b, axis=-1)), a)
        return F.stack(bb, F.transpose(aa), axis=0)

    def _compute_gram_matrix(self, F, W_flat, rho):
        W = F.reshape(W_flat, shape=(self.num_outputs, self.num_factors))
        rho_vec = F.broadcast_mul(rho, F.ones(self.num_outputs, dtype=DATA_TYPE))
        return F.linalg.syrk(W) + F.diag(rho_vec)

    def hybrid_forward(self, F, ind1, ind2, W_flat_internal, rho_internal):
        W_flat = self.encoding_W_flat.get(F, W_flat_internal)
        rho = self.encoding_rho.get(F, rho_internal)
        K = self._compute_gram_matrix(F, W_flat, rho)
        ind1 = self._check_input_shape(F, ind1)
        if ind2 is not ind1:
            ind2 = self._check_input_shape(F, ind2)
        ind = self._meshgrid(F, ind1, ind2)
        return F.transpose(F.squeeze(F.gather_nd(K, ind)))

    def diagonal(self, F, ind):
        ind = self._check_input_shape(F, ind)
        W_flat = self.encoding_W_flat.get(F, unwrap_parameter(F, self.W_flat_internal, ind))
        rho = self.encoding_rho.get(F, unwrap_parameter(F, self.rho_internal, ind))
        K = self._compute_gram_matrix(F, W_flat, rho)
        K_diag = F.diag(K)
        return F.take(K_diag, ind)

    def diagonal_depends_on_X(self):
        return True

    def param_encoding_pairs(self):
        return [
            (self.W_flat_internal, self.encoding_W_flat),
            (self.rho_internal, self.encoding_rho),
        ]