def loss(self, n_samples, regularization_strength, mix, mu, sigma):
        log_sum_loss = -tensor.sum(tensor.log(
                            tensor.sum(mix * tensor.inv(np.sqrt(2 * np.pi) * sigma) *
                                       tensor.exp(tensor.neg(tensor.sqr(mu - self.target_vector)) *
                                                  tensor.inv(2 * tensor.sqr(sigma))), axis=0)
        ))

        # reg_loss = tensor.sum(tensor.sqr(self.layers.values()[0].W))
        # for layer in self.layers.values()[1:]:
        #     reg_loss += tensor.sum(tensor.sqr(layer.W))

        # regularization = 1/n_samples * regularization_strength/2 * reg_loss

        return log_sum_loss #+ regularization
示例#2
0
    def get_output_for(self, input, deterministic=False,
                       batch_norm_use_averages=None,
                       batch_norm_update_averages=None, **kwargs):

        self.count = self.count + 1
        self.alpha = 5.0 / (10 + self.count)
        # self.alpha = 1.0 / (self.count^2)

        input_mean = input.mean(self.axes)
        input_inv_std = T.inv(T.sqrt(input.var(self.axes) + self.epsilon))

        # Decide whether to use the stored averages or mini-batch statistics
        if batch_norm_use_averages is None:
            batch_norm_use_averages = deterministic
        use_averages = batch_norm_use_averages

        if use_averages:
            mean = self.mean
            inv_std = self.inv_std
        else:
            mean = input_mean
            inv_std = input_inv_std

        # Decide whether to update the stored averages
        if batch_norm_update_averages is None:
            batch_norm_update_averages = not deterministic
        update_averages = batch_norm_update_averages

        if update_averages:
            # Trick: To update the stored statistics, we create memory-aliased
            # clones of the stored statistics:
            running_mean = theano.clone(self.mean, share_inputs=False)
            running_inv_std = theano.clone(self.inv_std, share_inputs=False)
            # set a default update for them:
            running_mean.default_update = ((1 - self.alpha) * running_mean +
                                           self.alpha * input_mean)
            running_inv_std.default_update = ((1 - self.alpha) *
                                              running_inv_std +
                                              self.alpha * input_inv_std)
            # and make sure they end up in the graph without participating in
            # the computation (this way their default_update will be collected
            # and applied, but the computation will be optimized away):
            mean += 0 * running_mean    
            inv_std += 0 * running_inv_std

        # prepare dimshuffle pattern inserting broadcastable axes as needed
        param_axes = iter(range(input.ndim - len(self.axes)))
        pattern = ['x' if input_axis in self.axes
                   else next(param_axes)
                   for input_axis in range(input.ndim)]

        # apply dimshuffle pattern to all parameters
        beta = 0 if self.beta is None else self.beta.dimshuffle(pattern)
        gamma = 1 if self.gamma is None else self.gamma.dimshuffle(pattern)
        mean = mean.dimshuffle(pattern)
        inv_std = inv_std.dimshuffle(pattern)

        # normalize
        normalized = (input - mean) * (gamma * inv_std) + beta
        return normalized
def normal_log_likelihood_per_component(x, mu, sigma, mixing):
     return (
        MINUS_HALF_LOG_2PI
        - T.log(sigma)
        - 0.5 * T.inv(sigma**2) * (x - mu)**2
        + T.log(mixing)
    )
def __init():
    dataset = T.matrix("dataset", dtype=config.globalFloatType())
    trans_dataset = T.transpose(dataset)
    dot_mul = T.dot(dataset, trans_dataset)
    l2 = T.sqrt(T.sum(T.square(dataset), axis=1))
    
#     p =printing.Print("l2")
#     l2 = p(l2)
    
    l2_inv2 = T.inv(l2).dimshuffle(['x', 0])
#     p =printing.Print("l2_inv2")
#     l2_inv2 = p(l2_inv2)
    
    l2_inv1 = T.transpose(l2_inv2)
#     p =printing.Print("l2_inv1")
#     l2_inv1 = p(l2_inv1)
    
    l2_inv = T.dot(l2_inv1, l2_inv2)
    
#     p =printing.Print("l2_inv")
#     l2_inv = p(l2_inv)
    
    affinty = (T.mul(dot_mul, l2_inv) + 1) / 2
    globals()['__affinty_fun'] = theano.function(
             [dataset],
             [affinty],
             allow_input_downcast=True
             )
示例#5
0
def set_generator_update_function(generator_rnn_model,
                                  generator_mean_model,
                                  generator_std_model,
                                  generator_optimizer,
                                  grad_clipping):

    # input data (time length * num_samples * input_dims)
    source_data = tensor.tensor3(name='source_data',
                                 dtype=floatX)

    target_data = tensor.tensor3(name='target_data',
                                 dtype=floatX)

    # set generator input data list
    generator_input_data_list = [source_data,]

    # get generator hidden data
    hidden_data = generator_rnn_model[0].forward(generator_input_data_list, is_training=True)[0]

    # get generator output data
    output_mean_data = get_tensor_output(input=hidden_data,
                                         layers=generator_mean_model,
                                         is_training=True)
    output_std_data = get_tensor_output(input=hidden_data,
                                        layers=generator_std_model,
                                        is_training=True)

    generator_cost  = -0.5*tensor.inv(2.0*tensor.sqr(output_std_data))*tensor.sqr(output_mean_data-target_data)
    generator_cost += -0.5*tensor.log(2.0*tensor.sqr(output_std_data)*numpy.pi)

    # set generator update
    generator_updates_cost = generator_cost.mean()
    generator_updates_dict = get_model_updates(layers=generator_rnn_model+generator_mean_model+generator_std_model,
                                               cost=generator_updates_cost,
                                               optimizer=generator_optimizer,
                                               use_grad_clip=grad_clipping)

    gradient_dict  = get_model_gradients(generator_rnn_model+generator_mean_model+generator_std_model, generator_updates_cost)
    gradient_norm  = 0.
    for grad in gradient_dict:
        gradient_norm += tensor.sum(grad**2)
        gradient_norm  = tensor.sqrt(gradient_norm)

    # set generator update inputs
    generator_updates_inputs  = [source_data,
                                 target_data,]

    # set generator update outputs
    generator_updates_outputs = [generator_cost, gradient_norm]

    # set generator update function
    generator_updates_function = theano.function(inputs=generator_updates_inputs,
                                                 outputs=generator_updates_outputs,
                                                 updates=generator_updates_dict,
                                                 on_unused_input='ignore')

    return generator_updates_function
示例#6
0
 def energy_function(feature_data, is_train=True):
     # feature-wise std
     feature_std_inv = T.inv(T.nnet.softplus(feature_std)+1e-10)
     # energy hidden-feature
     e = softplus(T.dot(feature_data*feature_std_inv, linear_w0)+linear_b0)
     e = T.sum(-e, axis=1)
     # energy feature prior
     e += 0.5*T.sum(T.sqr(feature_std_inv)*T.sqr(feature_data-feature_mean), axis=1)
     return e
示例#7
0
def set_generator_update_function(
    generator_rnn_model, generator_mean_model, generator_std_model, generator_optimizer, grad_clipping
):

    # input data (time length * num_samples * input_dims)
    source_data = tensor.tensor3(name="source_data", dtype=floatX)

    target_data = tensor.tensor3(name="target_data", dtype=floatX)

    # set generator input data list
    generator_input_data_list = [source_data]

    # get generator hidden data
    hidden_data = generator_rnn_model[0].forward(generator_input_data_list, is_training=True)[0]
    hidden_data = hidden_data.dimshuffle(0, 2, 1, 3).flatten(3)

    # get generator output data
    output_mean_data = get_tensor_output(input=hidden_data, layers=generator_mean_model, is_training=True)
    # output_std_data = get_tensor_output(input=hidden_data,
    #                                     layers=generator_std_model,
    #                                     is_training=True)
    output_std_data = 0.22
    # get generator cost (time_length x num_samples x hidden_size)
    generator_cost = 0.5 * tensor.inv(2.0 * tensor.sqr(output_std_data)) * tensor.sqr(output_mean_data - target_data)
    generator_cost += tensor.log(output_std_data) + 0.5 * tensor.log(2.0 * numpy.pi)
    generator_cost = tensor.sum(generator_cost, axis=2)

    # set generator update
    generator_updates_cost = generator_cost.mean()
    generator_updates_dict = get_model_updates(
        layers=generator_rnn_model + generator_mean_model,
        cost=generator_updates_cost,
        optimizer=generator_optimizer,
        use_grad_clip=grad_clipping,
    )

    gradient_dict = get_model_gradients(generator_rnn_model + generator_mean_model, generator_updates_cost)
    gradient_norm = 0.0
    for grad in gradient_dict:
        gradient_norm += tensor.sum(grad ** 2)
    gradient_norm = tensor.sqrt(gradient_norm)

    # set generator update inputs
    generator_updates_inputs = [source_data, target_data]

    # set generator update outputs
    generator_updates_outputs = [generator_cost, gradient_norm]

    # set generator update function
    generator_updates_function = theano.function(
        inputs=generator_updates_inputs,
        outputs=generator_updates_outputs,
        updates=generator_updates_dict,
        on_unused_input="ignore",
    )

    return generator_updates_function
示例#8
0
    def logsum_loss(self, n_samples, l1_regularization_strength, l2_regularization_strength):
        log_sum_loss = -tensor.sum(tensor.log(
                            tensor.sum(self.mix * tensor.inv(np.sqrt(2 * np.pi) * self.sigma) *
                                       tensor.exp(tensor.neg(tensor.sqr(self.mu - self.target_vector)) *
                                                  tensor.inv(2 * tensor.sqr(self.sigma))), axis=0)
        ))

        l1_reg_loss = tensor.sum(np.abs(self.layers.values()[0].W))
        for layer in self.layers.values()[1:]:
            l1_reg_loss += tensor.sum(np.abs(layer.W))

        l2_reg_loss = tensor.sum(tensor.sqr(self.layers.values()[0].W))
        for layer in self.layers.values()[1:]:
            l2_reg_loss += tensor.sum(tensor.sqr(layer.W))

        l1_regularization = 1/n_samples * l1_regularization_strength/2 * l1_reg_loss

        l2_regularization = 1/n_samples * l2_regularization_strength/2 * l2_reg_loss

        return log_sum_loss + l1_regularization + l2_regularization
示例#9
0
	def __spectral_matrix(self, covariance):
		egvalues, egmatrix = T.nlinalg.eig(covariance)
		egmatrix_inv = T.nlinalg.matrix_inverse(egmatrix)
		diag_sqr_inv = T.nlinalg.alloc_diag(
			T.inv(
				T.sqrt(
					T.switch(T.eq(egvalues,0), 0.001, egvalues)
				)
			)
		)
		return egmatrix.dot(diag_sqr_inv).dot(egmatrix_inv)
示例#10
0
def standardize(layer, offset, scale, shared_axes):
    """
    Convenience function for standardizing inputs by applying a fixed offset
    and scale.  This is usually useful when you want the input to your network
    to, say, have zero mean and unit standard deviation over the feature
    dimensions.  This layer allows you to include the appropriate statistics to
    achieve this normalization as part of your network, and applies them to its
    input.  The statistics are supplied as the `offset` and `scale` parameters,
    which are applied to the input by subtracting `offset` and dividing by
    `scale`, sharing dimensions as specified by the `shared_axes` argument.

    Parameters
    ----------
    layer : a :class:`Layer` instance or a tuple
        The layer feeding into this layer, or the expected input shape.
    offset : Theano shared variable, expression, or numpy array
        The offset to apply (via subtraction) to the axis/axes being
        standardized.
    scale : Theano shared variable, expression or numpy array
        The scale to apply (via division) to the axis/axes being standardized.
    shared_axes : 'auto', int or tuple of int
        The axis or axes to share the offset and scale over. If ``'auto'`` (the
        default), share over all axes except for the second: this will share
        scales over the minibatch dimension for dense layers, and additionally
        over all spatial dimensions for convolutional layers.

    Examples
    --------
    Assuming your training data exists in a 2D numpy ndarray called
    ``training_data``, you can use this function to scale input features to the
    [0, 1] range based on the training set statistics like so:

    >>> import lasagne
    >>> import numpy as np
    >>> training_data = np.random.standard_normal((100, 20))
    >>> input_shape = (None, training_data.shape[1])
    >>> l_in = lasagne.layers.InputLayer(input_shape)
    >>> offset = training_data.min(axis=0)
    >>> scale = training_data.max(axis=0) - training_data.min(axis=0)
    >>> l_std = standardize(l_in, offset, scale, shared_axes=0)

    Alternatively, to z-score your inputs based on training set statistics, you
    could set ``offset = training_data.mean(axis=0)`` and
    ``scale = training_data.std(axis=0)`` instead.
    """
    # Subtract the offset
    layer = BiasLayer(layer, -offset, shared_axes)
    # Do not optimize the offset parameter
    layer.params[layer.b].remove('trainable')
    # Divide by the scale
    layer = ScaleLayer(layer, T.inv(scale), shared_axes)
    # Do not optimize the scales parameter
    layer.params[layer.scales].remove('trainable')
    return layer
示例#11
0
文件: test_dnn.py 项目: nke001/Theano
def test_dnn_batchnorm_train():
    if not dnn.dnn_available(test_ctx_name):
        raise SkipTest(dnn.dnn_available.msg)
    if dnn.version(raises=False) < 5000:
        raise SkipTest("batch normalization requires cudnn v5+")
    utt.seed_rng()

    for mode in ('per-activation', 'spatial'):
        for vartype in (T.ftensor4, T.ftensor3, T.fmatrix, T.fvector):
            x, scale, bias = (vartype(n) for n in ('x', 'scale', 'bias'))
            ndim = x.ndim
            eps = 5e-3  # some non-standard value to test if it's used

            # forward pass
            out, x_mean, x_invstd = dnn.dnn_batch_normalization_train(
                x, scale, bias, mode, eps)
            # reference forward pass
            if mode == 'per-activation':
                axes = (0,)
            elif mode == 'spatial':
                axes = (0,) + tuple(range(2, ndim))
            x_mean2 = x.mean(axis=axes, keepdims=True)
            x_invstd2 = T.inv(T.sqrt(x.var(axis=axes, keepdims=True) + eps))
            scale2 = T.addbroadcast(scale, *axes)
            bias2 = T.addbroadcast(bias, *axes)
            out2 = (x - x_mean2) * (scale2 * x_invstd2) + bias2
            # backward pass
            dy = vartype('dy')
            grads = T.grad(None, wrt=[x, scale, bias], known_grads={out: dy})
            # reference backward pass
            grads2 = T.grad(None, wrt=[x, scale, bias], known_grads={out2: dy})
            # compile
            f = theano.function([x, scale, bias, dy],
                                [out, x_mean, x_invstd, out2, x_mean2, x_invstd2] +
                                grads + grads2, mode=mode_with_gpu)
            # run
            for data_shape in ((10, 20, 30, 40), (4, 3, 1, 1), (1, 1, 5, 5)):
                data_shape = data_shape[:ndim]
                param_shape = tuple(1 if d in axes else s
                                    for d, s in enumerate(data_shape))
                X = 4 + 3 * numpy.random.randn(*data_shape).astype('float32')
                Dy = -1 + 2 * numpy.random.randn(*data_shape).astype('float32')
                Scale = numpy.random.randn(*param_shape).astype('float32')
                Bias = numpy.random.randn(*param_shape).astype('float32')
                outputs = f(X, Scale, Bias, Dy)
                # compare outputs
                utt.assert_allclose(outputs[0], outputs[0 + 3])  # out
                utt.assert_allclose(outputs[1], outputs[1 + 3])  # mean
                utt.assert_allclose(outputs[2], outputs[2 + 3])  # invstd
                # compare gradients
                utt.assert_allclose(outputs[6], outputs[6 + 3])  # dx
                utt.assert_allclose(outputs[7], outputs[7 + 3], rtol=3e-3)  # dscale
                utt.assert_allclose(outputs[8], outputs[8 + 3])  # dbias
示例#12
0
    def get_symbolic_thermal_hmm_params(log_prior_c: types.TheanoVector,
                                        log_trans_tcc: types.TheanoTensor3,
                                        log_emission_tc: types.TheanoMatrix,
                                        temperature: tt.scalar):
        inv_temperature = tt.inv(temperature)

        thermal_log_prior_c = inv_temperature * log_prior_c
        thermal_log_prior_c -= pm.math.logsumexp(thermal_log_prior_c)
        thermal_log_trans_tcc = inv_temperature * log_trans_tcc
        thermal_log_trans_tcc -= pm.math.logsumexp(thermal_log_trans_tcc, axis=-1)
        thermal_log_emission_tc = inv_temperature * log_emission_tc

        return thermal_log_prior_c, thermal_log_trans_tcc, thermal_log_emission_tc
示例#13
0
    def predict(self, X1, y1, X2):
   
        cov_train = self.compute_cov_s(X1,self.N)
        cov_test  = self.compute_cov_s(X2,self.M)
        cov_te_tr = self.compute_cov(X1,X2,self.N,self.M)     
        cov_tr_te = cov_te_tr.T

        arg0  = T.inv(cov_train+self.noise**2 *T.identity_like(cov_train))
        #arg0  = T.inv(cov_train)
        arg1  = T.dot(cov_te_tr, arg0)
        mu    = T.dot(arg1,y1)
        sigma = cov_test - T.dot(arg1, cov_tr_te) 

        return mu,T.diag(sigma)
示例#14
0
def logp(X):
    '''
    logp de la probabilidad de muchas gaussianas
    '''
#    print(X.shape.eval(), mu.shape.eval())
    err = T.reshape(X, (-1,2)) - T.reshape(mu, (-1,2))  # shaped as (n*m,2)

    S = T.inv(cov)  # np.linalg.inv(cov)

    E = (T.reshape(err, (-1, 2, 1)) *
         S *
         T.reshape(err, (-1, 1, 2))
         ).sum()

    return - E / 2
示例#15
0
    def _whiten_input(self, n): 
        X = T.matrix('X', dtype=theano.config.floatX)

        cov = T.dot(X.T, X) / (n - 1)
        
        eigenvalues, eigenvectors = T.nlinalg.eig(cov)

        V = eigenvectors
        D = eigenvalues
        D_prime = T.nlinalg.alloc_diag(T.inv(T.sqrt(D + self.e_zca)))
                
        M = T.dot(V, T.dot(D_prime, V.T))

        # now the input has been rotated: each column is a sample
        return theano.function(inputs=[X], outputs=T.dot(M, X.T))
示例#16
0
文件: compact_cnn.py 项目: tyhu/PyAI
def addFullBNLayerTrain(x,gamma,beta, mean=None, var=None):
    fsize = gamma.get_value().shape[0]
    ep = 1e-5
    momentum = 0.9
    if mean is None:
        mean = theano.shared(np.zeros((fsize,)))
        var = theano.shared(np.ones((fsize,)))
    input_mean = T.mean(x, axis=0)
    input_var = T.var(x, axis=0)
    inv_std = T.inv(T.sqrt(input_var + ep))
    
    updates = []
    updates.append((mean, momentum*mean+(1-momentum)*input_mean))
    updates.append((var,momentum*var+(1-momentum)*(x.shape[0]/(x.shape[0]-1)*input_var)))

    o = (x-input_mean) * gamma * inv_std + beta
    return o, mean, var, updates
示例#17
0
def set_generator_evaluation_function(generator_rnn_model,
                                      generator_mean_model,
                                      generator_std_model):

    # input data (time length * num_samples * input_dims)
    source_data = tensor.tensor3(name='source_data',
                                 dtype=floatX)

    target_data = tensor.tensor3(name='target_data',
                                 dtype=floatX)

    # set generator input data list
    generator_input_data_list = [source_data,]

    # get generator hidden data
    hidden_data = generator_rnn_model[0].forward(generator_input_data_list, is_training=True)[0]
    hidden_data = hidden_data.dimshuffle(0, 2, 1, 3)
    hidden_data = hidden_data[:,:,-1,:].flatten(3)

    # get generator output data
    output_mean_data = get_tensor_output(input=hidden_data,
                                         layers=generator_mean_model,
                                         is_training=True)
    output_std_data = get_tensor_output(input=hidden_data,
                                        layers=generator_std_model,
                                        is_training=True)
    # output_std_data = 0.22
    # get generator cost (time_length x num_samples x hidden_size)
    generator_cost  = 0.5*tensor.inv(2.0*tensor.sqr(output_std_data))*tensor.sqr(output_mean_data-target_data)
    generator_cost += tensor.log(output_std_data) + 0.5*tensor.log(2.0*numpy.pi)
    generator_cost  = tensor.sum(generator_cost, axis=2)

    # set generator evaluate inputs
    generator_evaluate_inputs  = [source_data,
                                  target_data,]

    # set generator evaluate outputs
    generator_evaluate_outputs = [generator_cost,]

    # set generator evaluate function
    generator_evaluate_function = theano.function(inputs=generator_evaluate_inputs,
                                                  outputs=generator_evaluate_outputs,
                                                  on_unused_input='ignore')

    return generator_evaluate_function
示例#18
0
    def fprop(X, test):
        btest = tensor.lt(0, test)

        X_means = X.mean([0, 2, 3])
        X_inv_stds = tensor.inv(tensor.sqrt(X.var([0, 2, 3])) + epsilon)

        means_clone = theano.clone(means, share_inputs = False)
        inv_stds_clone = theano.clone(inv_stds, share_inputs = False)

        means_clone.default_update = ifelse(btest, means, lerp(means, X_means, alpha))
        inv_stds_clone.default_update = ifelse(btest, inv_stds, lerp(inv_stds, X_inv_stds, alpha))
    
        X_means += 0 * means_clone
        X_inv_stds += 0 * inv_stds_clone

        X_means = ifelse(btest, means, X_means)
        X_inv_stds = ifelse(btest, inv_stds, X_inv_stds)

        return (X - ds(X_means)) * ds(X_inv_stds) * ds(gammas)
示例#19
0
    def _build(self, input_tensor):
        self._instantiate_parameters(
            input_tensor.shape, input_tensor.dtype)

        input_tensor_ = input_tensor.unwrap()

        mean_acc = self.get_parameter_variable('mean').unwrap()
        var_acc = self.get_parameter_variable('var').unwrap()
        scale = self.get_parameter_variable('scale').unwrap()
        offset = self.get_parameter_variable('offset').unwrap()

        if self.args['learn']:
            decay = self.args['decay']
            mean_in = input_tensor_.mean(axis=self._axes)
            var_in = input_tensor_.var(self._axes)

            new_mean_acc = decay * mean_acc + (1 - decay) * mean_in
            new_var_acc = decay * var_acc + (1 - decay) * var_in

            self._update_operations.append(
                wrapper.Operation(
                    op={mean_acc: new_mean_acc},
                    name='update_mean',
                )
            )
            self._update_operations.append(
                wrapper.Operation(
                    op={var_acc: new_var_acc},
                    name='update_var',
                )
            )

            mean_acc = new_mean_acc
            var_acc = new_var_acc

        mean_acc = mean_acc.dimshuffle(self._pattern)
        var_acc = var_acc.dimshuffle(self._pattern)
        scale = scale.dimshuffle(self._pattern)
        offset = offset.dimshuffle(self._pattern)

        stdi = T.inv(T.sqrt(var_acc + self.args['epsilon']))
        output = scale * (input_tensor_ - mean_acc) * stdi + offset
        return wrapper.Tensor(output, shape=input_tensor.shape, name='output')
示例#20
0
    def __call__(self, x):
        axes = range(x.ndim)
        axes.remove(self.axis)
        axes = tuple(axes)
        input_mean = x.mean(axes)
        input_inv_std = T.inv(T.sqrt(x.var(axes) + self.epsilon))

        if self.train:
            mean = input_mean
            inv_std = input_inv_std
        else:
            if self.collect:
                mean = self.mean
                inv_std = self.inv_std
            else:
                mean = input_mean
                inv_std = input_inv_std

        self.updates = {}
        if self.train:
            if self.collect:
                self.updates[self.mean] = (
                    1 - self.alpha) * self.mean + self.alpha * input_mean
                self.updates[self.inv_std] = (
                    1 - self.alpha) * self.inv_std + self.alpha * input_inv_std

        # prepare dimshuffle pattern inserting broadcastable axes as needed
        param_axes = iter(range(x.ndim - len(axes)))
        pattern = [
            'x' if input_axis in axes else next(param_axes)
            for input_axis in range(x.ndim)
        ]

        # apply dimshuffle pattern to all parameters
        beta = self.beta.dimshuffle(pattern)
        gamma = self.gamma.dimshuffle(pattern)
        mean = mean.dimshuffle(pattern)
        inv_std = inv_std.dimshuffle(pattern)

        # normalize
        normalized = (x - mean) * (gamma * inv_std) + beta
        return normalized
示例#21
0
文件: utils.py 项目: Seleucia/v3d
def nll(mu, sigma, mixing, y):
    """Computes the mean of negative log likelihood for P(y|x)

    y = T.matrix('y') # (minibatch_size, output_size)
    mu = T.tensor3('mu') # (minibatch_size, output_size, n_components)
    sigma = T.matrix('sigma') # (minibatch_size, n_components)
    mixing = T.matrix('mixing') # (minibatch_size, n_components)

    """

    # multivariate Gaussian
    exponent = -0.5 * T.inv(sigma) * T.sum((y.dimshuffle(0,1,'x') - mu)**2, axis=1)
    normalizer = (2 * np.pi * sigma)
    exponent = exponent + T.log(mixing) - (y.shape[1]*.5)*T.log(normalizer)
    max_exponent = T.max(exponent ,axis=1, keepdims=True)
    mod_exponent = exponent - max_exponent
    gauss_mix = T.sum(T.exp(mod_exponent),axis=1)
    log_gauss = max_exponent + T.log(gauss_mix)
    res = -T.mean(log_gauss)
    return res
def NLL(sigma, mixing, y):
    """Computes the mean of negative log likelihood for P(y|x)
    
    y = T.matrix('y') # (minibatch_size, output_size)
    mu = T.tensor3('mu') # (minibatch_size, output_size, n_components)
    sigma = T.matrix('sigma') # (minibatch_size, n_components)
    mixing = T.matrix('mixing') # (minibatch_size, n_components)

    """

    # multivariate Gaussian
    exponent = -0.5 * T.inv(sigma) * T.sum(y ** 2, axis=1)
    normalizer = 2 * np.pi * sigma
    exponent = exponent + T.log(mixing) - (y.shape[1] * 0.5) * T.log(normalizer)
    max_exponent = T.max(exponent, axis=1)
    mod_exponent = exponent - max_exponent[:, None]
    gauss_mix = T.sum(T.exp(mod_exponent), axis=1)
    log_gauss = max_exponent + T.log(gauss_mix)
    res = -T.mean(log_gauss)
    return res
示例#23
0
def set_generator_evaluation_function(generator_rnn_model,
                                      generator_mean_model,
                                      generator_std_model):

    # input data (time length * num_samples * input_dims)
    source_data = tensor.tensor3(name='source_data',
                                 dtype=floatX)

    target_data = tensor.tensor3(name='target_data',
                                 dtype=floatX)

    # set generator input data list
    generator_input_data_list = [source_data,]

    # get generator hidden data
    hidden_data = generator_rnn_model[0].forward(generator_input_data_list, is_training=True)[0]

    # get generator output data
    output_mean_data = get_tensor_output(input=hidden_data,
                                         layers=generator_mean_model,
                                         is_training=True)
    output_std_data = get_tensor_output(input=hidden_data,
                                        layers=generator_std_model,
                                        is_training=True)

    generator_cost  = -0.5*tensor.inv(2.0*tensor.sqr(output_std_data))*tensor.sqr(output_mean_data-target_data)
    generator_cost += -0.5*tensor.log(2.0*tensor.sqr(output_std_data)*numpy.pi)

    # set generator evaluate inputs
    generator_evaluate_inputs  = [source_data,
                                  target_data,]

    # set generator evaluate outputs
    generator_evaluate_outputs = [generator_cost, ]

    # set generator evaluate function
    generator_evaluate_function = theano.function(inputs=generator_evaluate_inputs,
                                                  outputs=generator_evaluate_outputs,
                                                  on_unused_input='ignore')

    return generator_evaluate_function
示例#24
0
def NLL(mu, sigma, mixing, y):
    """Computes the mean of negative log likelihood for P(y|x)
    
    y = T.matrix('y') # (minibatch_size, output_size)
    mu = T.tensor3('mu') # (minibatch_size, output_size, n_components)
    sigma = T.matrix('sigma') # (minibatch_size, n_components)
    mixing = T.matrix('mixing') # (minibatch_size, n_components)

    """

    # multivariate Gaussian
    exponent = -0.5 * T.inv(sigma) * T.sum(
        (y.dimshuffle(0, 1, 'x') - mu)**2, axis=1)
    normalizer = (2 * np.pi * sigma)
    exponent = exponent + T.log(mixing) - (y.shape[1] * .5) * T.log(normalizer)
    max_exponent = T.max(exponent, axis=1, keepdims=True)
    mod_exponent = exponent - max_exponent
    gauss_mix = T.sum(T.exp(mod_exponent), axis=1)
    log_gauss = max_exponent + T.log(gauss_mix)
    res = -T.mean(log_gauss)
    return res
示例#25
0
    def output(self, input_value):
        epsilon = asfloat(self.epsilon)
        alpha = asfloat(self.alpha)
        gamma, beta = self.gamma, self.beta

        ndim = input_value.ndim
        axes = self.axes

        running_mean = self.running_mean
        running_inv_std = self.running_inv_std

        input_mean = input_value.mean(axes)
        input_var = input_value.var(axes)
        input_inv_std = T.inv(T.sqrt(input_var + epsilon))

        self.updates = [(
            running_inv_std,
            asfloat(1 - alpha) * running_inv_std + alpha * input_inv_std
        ), (
            running_mean,
            asfloat(1 - alpha) * running_mean + alpha * input_mean
        )]

        if not self.training_state:
            mean = running_mean
            inv_std = running_inv_std

        else:
            mean = input_mean
            inv_std = input_inv_std

        opposite_axes = find_opposite_axes(axes, ndim)

        beta = dimshuffle(beta, ndim, opposite_axes)
        gamma = dimshuffle(gamma, ndim, opposite_axes)
        mean = dimshuffle(mean, ndim, opposite_axes)
        inv_std = dimshuffle(inv_std, ndim, opposite_axes)

        normalized_value = (input_value - mean) * inv_std
        return gamma * normalized_value + beta
示例#26
0
    def get_output_for(self, input, deterministic=False, **kwargs):
        input_mean = input.mean(self.axes)
        input_inv_std = T.inv(T.sqrt(input.var(self.axes) + self.epsilon))

        mean = input_mean
        inv_std = input_inv_std

        # prepare dimshuffle pattern inserting broadcastable axes as needed
        param_axes = iter(range(input.ndim - len(self.axes)))
        pattern = [
            'x' if input_axis in self.axes else next(param_axes)
            for input_axis in range(input.ndim)
        ]

        # apply dimshuffle pattern to all parameters
        beta = 0 if self.beta is None else self.beta.dimshuffle(pattern)
        gamma = 1 if self.gamma is None else self.gamma.dimshuffle(pattern)
        mean = mean.dimshuffle(pattern)
        inv_std = inv_std.dimshuffle(pattern)

        # normalize
        normalized = (input - mean) * (gamma * inv_std) + beta
        return normalized
def generate_functions(A, y, gamma):
    tA = T.matrix('A')
    ty = T.vector('y')
    tx = T.vector('x')
    ttheta = T.vector('theta')
    
    tx0 = T.vector('x0')
    tx1 = T.vector('x1')
    tbetas = T.vector('betas')
    
    error = lambda x: T.sum((T.dot(tA, x) - ty)**2)
    derror = lambda x: T.grad(error(x), x)
    penalty = lambda x: x.norm(1)
    loss = lambda x: error(x) + penalty(x)

    entering_index = T.argmax(abs(derror(tx)))
    txs, _ = theano.map(lambda b, x0, x1: (1-b)*x0 + b*x1,
                        [tbetas], [tx0, tx1])

    return {
        "select_entering": theano.function([tx],
                                           [entering_index, derror(tx)[entering_index]],
                                           givens = {tA: A, ty: y}),
        "qp_optimum": theano.function([tA, ttheta],
                                      T.dot(T.inv(T.dot(tA.T, tA)), T.dot(tA.T, ty) - gamma/2*ttheta),
                                      givens = {ty: y}),
        "txs": theano.function([tbetas, tx0, tx1], txs),
        "select_candidate": theano.function([tA, tbetas, tx0, tx1],
                                            txs[T.argmin(theano.map(loss, [txs])[0])],
                                            givens = {ty: y}),
        "optimal_nz": theano.function([tA, tx],
                                      derror(tx) + gamma*T.sgn(tx),
                                      givens = {ty: y}),
        "optimal_z": theano.function([tA, tx],
                                     abs(derror(tx)),
                                     givens = {ty: y}),
        }
示例#28
0
def normalize_batch_in_training(x, gamma, beta,
                                reduction_axes, epsilon=0.0001):
    '''Compute mean and std for batch then apply batch_normalization on batch.
    '''
    dev = theano.config.device
    use_cudnn = ndim(x) < 5 and reduction_axes == [0, 2, 3] and (dev.startswith('cuda') or dev.startswith('gpu'))
    if use_cudnn:
        broadcast_beta = beta.dimshuffle('x', 0, 'x', 'x')
        broadcast_gamma = gamma.dimshuffle('x', 0, 'x', 'x')
        try:
            normed, mean, stdinv = theano.sandbox.cuda.dnn.dnn_batch_normalization_train(
                x, broadcast_gamma, broadcast_beta, 'spatial', epsilon)
            var = T.inv(stdinv ** 2)
            return normed, T.flatten(mean), T.flatten(var)
        except AttributeError:
            pass

    var = x.var(reduction_axes)
    mean = x.mean(reduction_axes)

    target_shape = []
    for axis in range(ndim(x)):
        if axis in reduction_axes:
            target_shape.append(1)
        else:
            target_shape.append(x.shape[axis])
    target_shape = T.stack(*target_shape)

    broadcast_mean = T.reshape(mean, target_shape)
    broadcast_var = T.reshape(var, target_shape)
    broadcast_beta = T.reshape(beta, target_shape)
    broadcast_gamma = T.reshape(gamma, target_shape)
    normed = batch_normalization(x, broadcast_mean, broadcast_var,
                                 broadcast_beta, broadcast_gamma,
                                 epsilon)
    return normed, mean, var
示例#29
0
def test_batch_normalization_train():
    utt.seed_rng()

    for axes in ('per-activation', 'spatial', (1, 2, 3, 4)):
        for vartype in (T.tensor5, T.tensor4, T.tensor3, T.matrix, T.vector):
            x, scale, bias, running_mean, running_var = (vartype(n)
                                                         for n in ('x', 'scale', 'bias',
                                                                   'running_mean',
                                                                   'running_var'))
            ndim = x.ndim
            eps = 5e-3  # some non-standard value to test if it's used
            running_average_factor = 0.3

            # remove non-existing axes
            if isinstance(axes, tuple):
                axes = tuple(i for i in axes if i < ndim)
            if len(axes) == 0:
                continue

            # forward pass
            out, x_mean, x_invstd, out_running_mean, out_running_var = \
                bn.batch_normalization_train(
                    x, scale, bias, axes, eps,
                    running_average_factor, running_mean, running_var)
            # reference forward pass
            if axes == 'per-activation':
                axes2 = (0,)
            elif axes == 'spatial':
                axes2 = (0,) + tuple(range(2, ndim))
            else:
                axes2 = axes
            x_mean2 = x.mean(axis=axes2, keepdims=True)
            x_var2 = x.var(axis=axes2, keepdims=True)
            x_invstd2 = T.inv(T.sqrt(x_var2 + eps))
            scale2 = T.addbroadcast(scale, *axes2)
            bias2 = T.addbroadcast(bias, *axes2)
            out2 = (x - x_mean2) * (scale2 * x_invstd2) + bias2
            m = T.cast(T.prod(x.shape) / T.prod(scale.shape), theano.config.floatX)
            out_running_mean2 = running_mean * (1 - running_average_factor) + \
                x_mean2 * running_average_factor
            out_running_var2 = running_var * (1 - running_average_factor) + \
                (m / (m - 1)) * x_var2 * running_average_factor
            # backward pass
            dy = vartype('dy')
            grads = T.grad(None, wrt=[x, scale, bias], known_grads={out: dy})
            # reference backward pass
            grads2 = T.grad(None, wrt=[x, scale, bias], known_grads={out2: dy})
            # compile
            f = theano.function([x, scale, bias, running_mean, running_var, dy],
                                [out, x_mean, x_invstd, out_running_mean, out_running_var,
                                 out2, x_mean2, x_invstd2, out_running_mean2, out_running_var2] +
                                grads + grads2)
            # check if the abstract Ops have been replaced
            assert not any([isinstance(n.op, (bn.AbstractBatchNormTrain,
                                              bn.AbstractBatchNormInference,
                                              bn.AbstractBatchNormTrainGrad))
                            for n in f.maker.fgraph.toposort()])
            # run
            for data_shape in ((5, 10, 30, 40, 10), (4, 3, 1, 1, 1), (2, 3, 5, 5, 5)):
                data_shape = data_shape[:ndim]
                param_shape = tuple(1 if d in axes2 else s
                                    for d, s in enumerate(data_shape))
                X = 4 + 3 * numpy.random.randn(*data_shape).astype(theano.config.floatX)
                Dy = -1 + 2 * numpy.random.randn(*data_shape).astype(theano.config.floatX)
                Scale = numpy.random.randn(*param_shape).astype(theano.config.floatX)
                Bias = numpy.random.randn(*param_shape).astype(theano.config.floatX)
                Running_mean = numpy.random.randn(*param_shape).astype(theano.config.floatX)
                Running_var = numpy.random.randn(*param_shape).astype(theano.config.floatX)
                outputs = f(X, Scale, Bias, Running_mean, Running_var, Dy)
                # compare outputs
                utt.assert_allclose(outputs[0], outputs[0 + 5])  # out
                utt.assert_allclose(outputs[1], outputs[1 + 5])  # mean
                utt.assert_allclose(outputs[2], outputs[2 + 5])  # invstd
                utt.assert_allclose(outputs[3], outputs[3 + 5])  # running_mean
                utt.assert_allclose(numpy.nan_to_num(outputs[4]),
                                    numpy.nan_to_num(outputs[4 + 5]))  # running_var
                # compare gradients
                utt.assert_allclose(outputs[10], outputs[10 + 3], atol=1e-4)  # dx
                utt.assert_allclose(outputs[11], outputs[11 + 3], rtol=2e-4, atol=1e-4)  # dscale
                utt.assert_allclose(outputs[12], outputs[12 + 3])  # dbias
示例#30
0
 def logp(self, value):
     u = self.u
     k = self.k
     logp = -tt.pow(value / u, k) - tt.log(u) - gammaln(1 + tt.inv(k))
     return bound(logp, value > 0, u > 0, k > 0)
示例#31
0
def f1_score_theano(X, W, b=None):
    XW = T.dot(X, W.T)
    XX = T.abs_(X).sum(axis=1).reshape((-1, 1))
    WW = T.abs_(W).sum(axis=1).reshape((1, -1))
    return T.inv(XW / XX) + T.inv(XW / WW)
        theano.config.floatX),
    name='mc',
    borrow=True,
    broadcastable=(True, False, False),)
mw = theano.shared(
    value=numpy.log(model_weights.copy().astype(
        theano.config.floatX)),
    name='mw',
    borrow=True,)

Wc = theano.shared(
    value=numpy.zeros((n_out, sigma_in, n_components,), dtype=theano.config.floatX),
    name='Wc',
    borrow=True,)

invsigma_given_x = tensor.inv(tensor.maximum(tensor.nnet.softplus(theano.dot(x, Wc) + mc), 1e-8))
f = theano.function(
        inputs=[x,],
        outputs=invsigma_given_x,
    )
p_mix_given_x = tensor.nnet.softmax(mw) 
p_mix_given_x = tensor.log(p_mix_given_x / (tensor.sum(p_mix_given_x, axis=1)[:, None] + 10 * EPS) + EPS)
log_exponent = tensor.sum((y**2)[:, :, None] * invsigma_given_x, axis=1)
f = theano.function(
        inputs=[x, y],
        outputs=log_exponent,
    )

dim_constant = - 0.5 * WINSIZE * tensor.log(2 * numpy.pi) + p_mix_given_x
lpr = dim_constant + 0.5 * (
            tensor.sum(tensor.log(invsigma_given_x), axis=1) - log_exponent)
示例#33
0
    def __init__(self, numpy_rng, n_ins=784, n_outs=24, l1_reg = None, l2_reg = None,
                 hidden_layers_sizes=[500, 500],
                 hidden_activation='tanh', output_activation='linear', var_floor=0.01,
                 n_component=1, beta_opt=False, use_rprop=0, rprop_init_update=0.001,
                 eff_sample_size=0.8, mean_log_det=-100.0):

        logger = logging.getLogger("Multi-stream DNN initialization")

        self.sigmoid_layers = []
        self.params = []
        self.delta_params   = []

        self.final_layers = []

        self.n_outs = n_outs

        self.n_layers = len(hidden_layers_sizes)

        self.output_activation = output_activation
        self.var_floor = var_floor

        self.use_rprop = use_rprop
        self.rprop_init_update = rprop_init_update

        self.l1_reg = l1_reg
        self.l2_reg = l2_reg

        self.beta_opt = beta_opt
        self.eff_sample_size = eff_sample_size
        self.mean_log_det = mean_log_det

        assert self.n_layers > 0

        # allocate symbolic variables for the data
        self.x = T.matrix('x')
        self.y = T.matrix('y')

        for i in range(self.n_layers):
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.tanh)  ##T.nnet.sigmoid)  #
            self.sigmoid_layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params)
            self.delta_params.extend(sigmoid_layer.delta_params)

        hidden_output_size = hidden_layers_sizes[-1]

        self.final_layer = MixtureDensityOutputLayer(rng = numpy_rng,
                                                input = sigmoid_layer.output,
                                                n_in = hidden_output_size,
                                                n_out = self.n_outs,
                                                n_component = n_component,
                                                var_floor = self.var_floor)
        self.params.extend(self.final_layer.params)
        self.delta_params.extend(self.final_layer.delta_params)

        ### Maximum likelihood
        self.finetune_cost = 0.0

        self.errors = 0.0

        epsd = self.eff_sample_size**(-2.0/(n_outs + 2.0))
        beta = (epsd - 1.0) + math.sqrt(epsd*(epsd - 1.0))

        if self.beta_opt:
            assert n_component == 1, "beta optimisation only implemented for single-component MDNs"
            for i in range(n_component):  #n_component
                sigma = self.final_layer.sigma[:, i*n_outs:(i+1)*n_outs]
                mu    = self.final_layer.mu[:, i*n_outs:(i+1)*n_outs]
                mix_weight = self.final_layer.mix[:, i]

                xEx = -0.5 * beta * T.sum(((self.y - mu)**2) * T.inv(sigma), axis=1)
                exponent = (0.5 * (n_outs + 2.0) * T.log(1 + beta)) + xEx
                point_fit = T.exp(exponent) - beta

                log_det_mult = -0.5 * beta * T.sum(T.log(sigma), axis=1)

                log_det_mult += (0.5 * beta * self.mean_log_det) # normalise by mean_log_det

                beta_obj = (mix_weight**2) * point_fit * T.exp(log_det_mult)

                self.finetune_cost += -T.mean(beta_obj)

            # lines to compute debugging information for later printing
            #self.errors = T.min(T.min(T.log(sigma), axis=1))
            #self.errors = T.mean(T.sum(T.log(sigma), axis=1)) # computes mean_log_det
            #self.errors = -xEx # (vector quantity) should be about 0.5 * beta * n_outs
            #self.errors = point_fit  # (vector quantity) should be about one
            #self.errors = T.mean(T.exp(exponent)) / T.exp(T.max(exponent)) # fraction of the data used, should be about efficiency
            #self.errors = T.mean(point_fit) # should be about one
            #self.errors = log_det_mult # (vector quantity) about zero, or always less if using Rprop
            #self.errors = beta_obj # (vector quantity) objective function terms
            #self.errors = self.finetune_cost # disable this line below when debugging
        else:

            all_mix_prob = []

            print(n_component)
            for i in range(n_component):  #n_component
                sigma = self.final_layer.sigma[:, i*n_outs:(i+1)*n_outs]
                mu    = self.final_layer.mu[:, i*n_outs:(i+1)*n_outs]
                mix_weight = self.final_layer.mix[:, i]

                xEx = -0.5 * T.sum(((self.y - mu)**2) * T.inv(sigma), axis=1)
                normaliser = 0.5 * ( n_outs * T.log(2 * numpy.pi) + T.sum(T.log(sigma), axis=1))
                exponent = xEx + T.log(mix_weight) - normaliser
                all_mix_prob.append(exponent)

            max_exponent = T.max(all_mix_prob, axis=0, keepdims=True)
            mod_exponent = T.as_tensor_variable(all_mix_prob) - max_exponent

            self.finetune_cost = - T.mean(max_exponent + T.log(T.sum(T.exp(mod_exponent), axis=0)))

            #self.errors = self.finetune_cost


        if self.l2_reg is not None:
            for i in range(self.n_layers-1):
                W = self.params[i * 2]
                self.finetune_cost += self.l2_reg * T.sqr(W).sum()
            self.finetune_cost += self.l2_reg * T.sqr(self.final_layer.W_mu).sum()
            self.finetune_cost += self.l2_reg * T.sqr(self.final_layer.W_sigma).sum()
            self.finetune_cost += self.l2_reg * T.sqr(self.final_layer.W_mix).sum()

        self.errors = self.finetune_cost # disable this line if debugging beta_opt