Exemplos de adadelta em Python, exemplos de optimization.adadelta em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: model.py Projeto: stevexiaofei/Speech-emotion-recognition

def build_model_2():
    trng = RandomStreams(SEED)
    rng = np.random.RandomState(1234)
    use_noise = theano.shared(numpy_floatX(0.))
    x = T.tensor3('x', dtype=config.floatX)
    mask = T.matrix('mask', dtype=config.floatX)
    y = T.vector('y', dtype='int32')
    lr = T.scalar('lr')
    params = net_params()
    ################   build model ##########################
    L1 = HiddenLayer(rng, x, 64, 32)
    #model_params=L1.params.copy()
    params.add(L1)
    L6 = conv_layer(rng,
                    L1.output,
                    filters_shape=(32, 1, 11, 32),
                    input_shape=(None, 1, None, 32))
    mask_conv = L6.get_mask(mask)
    params.add(L6)
    L5 = dropout_layer(L6.output, use_noise, trng)
    L2 = lstm(rng, L5.output, mask_conv, 32, 32, pooling_type="mean_pooling")
    #model_params.update(L2.params)
    params.add(L2)
    L3 = dropout_layer(L2.mean_pool_out, use_noise, trng)
    L4 = LogisticRegression(L3.output, rng, 32, 4)
    params.add(L4)
    #model_params.update(L4.params)
    ########################################################
    cost = L4.negative_log_likelihood(y)
    errors = L4.errors(y)
    grads = T.grad(cost, wrt=list(params.params.values()))
    f_grad_shared, f_update = adadelta(lr, params.params, grads, x, mask, y,
                                       cost)
    f_error = theano.function([x, mask, y], outputs=errors, name='f_error')
    return f_grad_shared, f_update, f_error, params, use_noise

Exemplo n.º 2

0

Exibir arquivo

Arquivo: model.py Projeto: stevexiaofei/Speech-emotion-recognition

def build_model_3():
    trng = RandomStreams(SEED)
    rng = np.random.RandomState(1234)
    use_noise = theano.shared(numpy_floatX(0.))
    x = T.tensor3('x', dtype=config.floatX)
    mask = T.matrix('mask', dtype=config.floatX)
    y = T.vector('y', dtype='int32')
    lr = T.scalar('lr')
    params = net_params()
    ################   build model ##########################
    L1 = HiddenLayer(rng, x, 64,
                     32)  #wavenet_layer(x,rng,13,64,dilation_levels=4)#
    params.add(L1)
    L6 = conv_layer(rng,
                    L1.output,
                    filters_shape=(32, 1, 11, 32),
                    input_shape=(None, 1, None, 32))
    params.add(L6)
    mask_conv = L6.get_mask(mask)
    L8 = conv_layer(rng,
                    L1.output,
                    filters_shape=(14, 1, 11, 32),
                    input_shape=(None, 1, None, 32))
    weight = L8.output.sum(axis=2)
    e_x = T.exp(weight - weight.max(axis=0, keepdims=True))
    e_x_mask = e_x * mask_conv
    attention = e_x_mask / e_x_mask.sum(axis=0, keepdims=True)

    L5 = dropout_layer(L6.output, use_noise, trng)
    L2 = lstm(rng, L5.output, mask_conv, 32, 32)
    L7 = lstm(rng, L5.output, mask_conv, 32, 32, go_backwards=True)
    #model_params.update(L2.params)
    params.add(L2)
    params.add(L7)
    LSTM_out = T.concatenate([L2.mean_pool_out, L7.mean_pool_out], axis=1)
    L3 = dropout_layer(LSTM_out, use_noise, trng)
    L4 = LogisticRegression(L3.output, rng, 64, 4)

    params.add(L4)
    #model_params.update(L4.params)
    ########################################################
    cost = L4.negative_log_likelihood(y)
    updates_cost = lasagne.updates.adadelta(cost, list(params.params.values()))
    errors = L4.errors(y)
    #grads=T.grad(cost,wrt=list(model_params.values()))
    grads = T.grad(cost, wrt=list(params.params.values()))
    f_grad_shared, f_update = adadelta(lr, params.params, grads, x, mask, y,
                                       cost)
    f_error = theano.function([x, mask, y], outputs=errors, name='f_error')
    return f_grad_shared, f_update, f_error, params, use_noise

Exemplo n.º 3

0

Exibir arquivo

Arquivo: cifar100.py Projeto: crimsonlander/nn

from keras.datasets import cifar100

input_size = 28*28
output_size = 10


(X_train, y_train), (X_test, y_test) = cifar100.load_data()

y_train = y_train.flatten()
y_test = y_test.flatten()

model = nnet((3, 32, 32))

model.addConvolutionLayer((32, 4, 4))
model.addActivation(relu)
model.addConvolutionLayer((32, 4, 4))
model.addMaxPooling((2, 2))

model.addActivation(relu)
model.addConvolutionLayer((32, 4, 4))
model.addActivation(relu)
model.addConvolutionLayer((16, 4, 4))
model.addActivation(relu)

model.addFullyConnectedLayer(200)
model.addActivation(hard_sigmoid)
model.addFullyConnectedLayer(100)
model.addActivation(softmax)

model.train(X_train, y_train, X_test, y_test, 15, 100, adadelta(1e-1), CE(0.005))
model.train(X_train, y_train, X_test, y_test, 50, 100, adadelta(1e-2), CE(0.005))

Exemplo n.º 4

0

Exibir arquivo

Arquivo: TFImputeModel.py Projeto: qinqian/TFImpute

    def __init__(self,
                 num_motif,
                 max_motif_len,
                 embed,
                 L1_reg=0.00,
                 L2_reg=0.00,
                 gradient_clip=1,
                 param_clip=1,
                 optimizer="sgd",
                 batch_size=32):
        num_motif = num_motif.split(",")
        num_motif = [int(each) for each in num_motif]
        if (len(num_motif) != 3):
            sys.stderr.write(
                "TFImputeModelRNN require num_motif parameter to be <#Motif><Maxpooling window><HiddenSize> format.\n"
            )
            exit()

        n_hidden = num_motif[2]
        window_size = num_motif[1]
        num_motif = num_motif[0]

        # Note that embed length is ignored. Use the sigmoid(embedding) as gate directly

        embed_cnt = embed[0][0]  # Cellline
        target_cnt = embed[1][0]  # TFs
        embedding = util.createRandomShareAdaDelta((embed_cnt, num_motif),
                                                   'embedding')

        # The shape of the tensor is: [number of feature maps at layer m, number of feature maps at layer m-1, filter height, filter width]
        motif_filter_size = (num_motif, 4, max_motif_len, 1)
        motif_filter = util.createRandomShareAdaDelta(motif_filter_size,
                                                      'motif_filter',
                                                      mean=0,
                                                      std=0.01)

        rec_bias = util.createRandomShareAdaDelta((num_motif),
                                                  'rec_bias',
                                                  const=-4)

        # First half is memory and second half is the traiditional hidden
        hidden_states = util.createZeroShare((batch_size, n_hidden),
                                             'hidden_states')
        bias = util.createRandomShareAdaDelta((n_hidden * 3), 'bias')
        W_i2h = util.createRandomShareAdaDelta((num_motif, n_hidden * 3),
                                               'W_i2h')
        W_h2h = util.createRandomShareAdaDelta((n_hidden, n_hidden * 3),
                                               'W_h2h')
        W_h2o = util.createRandomShareAdaDelta((n_hidden, target_cnt), 'W_h2o')
        b_o = util.createRandomShareAdaDelta((target_cnt), 'b_o')

        self.parameters = [
            embedding, motif_filter, rec_bias, bias, W_h2h, W_i2h, W_h2o,
            embedding, b_o
        ]
        [
            embedding, motif_filter, rec_bias, bias, W_h2h, W_i2h, W_h2o,
            embedding, b_o
        ] = [each[0] for each in self.parameters]

        # The shape of the tensor is as follows: [mini-batch size, number of input feature maps, image height, image width].
        # number of input feature maps is 4, height should be sequence length and width should always be 1
        dnaseq = T.tensor4(name='dnaseq', dtype=theano.config.floatX)
        idxes = T.matrix(name='idxes', dtype='int32')
        target = T.matrix(name='target', dtype='int32')
        mask = T.matrix(name='mask', dtype='int32')
        l_r = T.scalar('l_r')
        mom = T.scalar('mom')

        predict = conv.conv2d(dnaseq, motif_filter)

        predict = max_pool_2d(predict, (window_size, 1), ignore_border=True)
        predict = T.transpose(predict, (2, 0, 1, 3))

        # Now a [seq, mini-batch size, num_motif] matrix
        predict = predict.reshape(
            (predict.shape[0], predict.shape[1], predict.shape[2]))
        predict = T.maximum(0, predict - rec_bias)
        embed = T.nnet.sigmoid(embedding[idxes[:, 0]])
        embed = embed.reshape((1, embed.shape[0], embed.shape[1]))
        predict = embed * predict

        def step(x_t, h_tm1):
            gate = T.dot(x_t, W_i2h[:, n_hidden:]) + T.dot(
                h_tm1, W_h2h[:, n_hidden:]) + bias[n_hidden:]
            g_r = T.nnet.sigmoid(gate[:, :n_hidden])
            g_u = T.nnet.sigmoid(gate[:, n_hidden:])
            h_t = T.tanh(
                T.dot(x_t, W_i2h[:, :n_hidden]) +
                T.dot(h_tm1 * g_r, W_h2h[:, :n_hidden]))
            h_t = (1 - g_u) * h_tm1 + g_u * h_t
            return h_t

        hidden_states = hidden_states[:predict.shape[1]]
        predict, _ = theano.scan(step,
                                 sequences=predict,
                                 outputs_info=[hidden_states])
        predict = T.nnet.sigmoid(T.dot(predict[-1], W_h2o) + b_o)

        # Consecutive pairs are forward and backword sequences.
        predict = predict.reshape((predict.shape[0] / 2, 2, -1))
        predict = T.max(predict, axis=1)
        loss = -mask * target * T.log(predict + 1e-5) - mask * (
            1 - target) * T.log(1 - predict + 1e-5)
        loss = T.sum(loss) / T.sum(mask)

        L1 = 0
        L2 = 0
        for param in self.parameters:
            L1 += abs(param[0].sum())
            L2 += (param[0]**2).sum()
        cost = loss + L1_reg * L1 + L2_reg * L2

        if (optimizer == "sgd"):
            parameters = [[each[0], each[1]] for each in self.parameters]
            updates = optimization.sgd(cost, parameters, mom, l_r,
                                       gradient_clip, param_clip)
            self.train_func = theano.function(
                inputs=[idxes, dnaseq, target, mask, l_r, mom],
                outputs=[cost, predict],
                updates=updates,
                mode=mode)
        elif (optimizer == "adadelta"):
            updates = optimization.adadelta(cost, self.parameters, param_clip,
                                            l_r)
            self.train_func = theano.function(
                inputs=[idxes, dnaseq, target, mask, l_r],
                outputs=[cost, predict],
                updates=updates,
                mode=mode)

        self.error_func = theano.function(inputs=[idxes, dnaseq, target, mask],
                                          outputs=[loss, predict],
                                          mode=mode)

        self.predict_func = theano.function(inputs=[idxes, dnaseq],
                                            outputs=predict,
                                            mode=mode)

        self.optimizer = optimizer

Exemplo n.º 5

0

Exibir arquivo

Arquivo: TFImputeModel.py Projeto: qinqian/TFImpute

    def __init__(self,
                 num_motif,
                 max_motif_len,
                 embed,
                 L1_reg=0.00,
                 L2_reg=0.00,
                 gradient_clip=1,
                 param_clip=1,
                 optimizer="sgd"):
        # The shape of the tensor is: [number of feature maps at layer m, number of feature maps at layer m-1, filter height, filter width]
        motif_filter_size = (num_motif, 4, max_motif_len, 1)
        motif_filter = util.createRandomShareAdaDelta(motif_filter_size,
                                                      'motif_filter',
                                                      mean=0,
                                                      std=0.01)

        rec_bias = util.createRandomShareAdaDelta((num_motif),
                                                  'rec_bias',
                                                  const=-4)
        W_h = util.createRandomShareAdaDelta((num_motif, 32),
                                             'W_h',
                                             mean=0,
                                             std=0.01)
        b_h = util.createRandomShareAdaDelta((32), 'b_h', mean=0, std=0.01)

        W_out = util.createRandomShareAdaDelta((32, 1),
                                               'W_out',
                                               mean=0,
                                               std=0.01)
        b_out = util.createRandomShareAdaDelta((1), 'b_out', mean=0, std=0.01)

        self.parameters = [motif_filter, rec_bias, W_h, b_h, W_out, b_out]
        [motif_filter, rec_bias, W_h, b_h, W_out,
         b_out] = [each[0] for each in self.parameters]

        # The shape of the tensor is as follows: [mini-batch size, number of input feature maps, image height, image width].
        # number of input feature maps is 4, height should be sequence length and width should always be 1
        dnaseq = T.tensor4(name='dnaseq', dtype=theano.config.floatX)
        target = T.vector(name='target', dtype=theano.config.floatX)
        target_weight = T.vector(name='target_weight',
                                 dtype=theano.config.floatX)
        l_r = T.scalar('l_r')
        mom = T.scalar('mom')

        predict = conv.conv2d(dnaseq, motif_filter)
        predict = T.transpose(predict, (0, 2, 1, 3))
        predict = predict.reshape(
            (predict.shape[0], predict.shape[1], predict.shape[2]))
        predict = T.maximum(0, predict - rec_bias)
        predict = T.max(predict,
                        axis=1)  # Now a [mini-batch size, num_motif] matrix

        saved1 = T.argmax(predict, axis=1)
        saved2 = predict[T.arange(saved1.shape[0]), saved1]
        saved1 = saved1.reshape([-1, 1])
        saved2 = saved2.reshape([-1, 1])
        saved = T.concatenate([saved1, saved2], axis=1)

        predict = T.maximum(0, T.dot(predict, W_h) + b_h)
        predict = T.nnet.sigmoid(T.dot(predict, W_out) + b_out)
        # Consecutive pairs are forward and backword sequences.
        predict = predict.reshape((-1, 2))
        predict = T.max(predict, axis=1)
        loss = -target * T.log(predict +
                               1e-5) - (1 - target) * T.log(1 - predict + 1e-5)
        loss = loss * target_weight
        loss = T.mean(loss)

        L1 = 0
        L2 = 0
        for param in self.parameters:
            L1 += abs(param[0].sum())
            L2 += (param[0]**2).sum()
        cost = loss + L1_reg * L1 + L2_reg * L2

        if (optimizer == "sgd"):
            parameters = [[each[0], each[1]] for each in self.parameters]
            updates = optimization.sgd(cost, parameters, mom, l_r,
                                       gradient_clip, param_clip)
            self.train_func = theano.function(
                inputs=[dnaseq, target, target_weight, l_r, mom],
                outputs=[cost, predict],
                updates=updates,
                mode=mode)
        elif (optimizer == "adadelta"):
            updates = optimization.adadelta(cost, self.parameters, param_clip,
                                            l_r)
            self.train_func = theano.function(
                inputs=[dnaseq, target, target_weight, l_r],
                outputs=[cost, predict],
                updates=updates,
                mode=mode)

        self.error_func = theano.function(
            inputs=[dnaseq, target, target_weight],
            outputs=[cost, predict],
            mode=mode)

        self.debug_func = theano.function(inputs=[dnaseq],
                                          outputs=saved,
                                          mode=mode)

        self.predict_func = theano.function(inputs=[dnaseq],
                                            outputs=predict,
                                            mode=mode)

        self.optimizer = optimizer

Exemplo n.º 6

0

Exibir arquivo

Arquivo: TFImputeModel.py Projeto: qinqian/TFImpute

    def __init__(self,
                 num_motif,
                 max_motif_len,
                 embed,
                 L1_reg=0.00,
                 L2_reg=0.00,
                 gradient_clip=1,
                 param_clip=1,
                 optimizer="sgd",
                 seq_len=300,
                 tfcell_comb_cnt=0):
        num_motif = num_motif.split(",")
        num_motif = [int(each) for each in num_motif]
        if (len(num_motif) != 3):
            sys.stderr.write(
                "TFImputeModel require num_motif parameter to be X,Y,Z format.\n"
            )
        hidden_size = num_motif[2]
        max_window = num_motif[1]
        num_motif = num_motif[0]
        window_cnt = seq_len / max_window

        full_product_count = 1
        embed_len = 0
        embed_sum = 0
        startidx = [0]
        for each in embed:
            embed_len += each[1]
            embed_sum += each[0] * each[1]
            startidx.append(embed_sum)
            full_product_count *= each[0]

        if (tfcell_comb_cnt == 0):
            tfcell_comb_cnt = full_product_count

        self.tfcell_comb_cnt = tfcell_comb_cnt

        self.embed = [tuple(each) for each in embed]
        self.startidx = startidx

        embedding = util.createRandomShareAdaDelta((embed_sum), 'tf_embedding')

        # The shape of the tensor is: [number of feature maps at layer m, number of feature maps at layer m-1, filter height, filter width]
        motif_filter_size = (num_motif, 4, max_motif_len, 1)
        motif_filter = util.createRandomShareAdaDelta(motif_filter_size,
                                                      'motif_filter',
                                                      mean=0,
                                                      std=0.01)

        W_g = util.createRandomShareAdaDelta((embed_len, num_motif),
                                             'W_g',
                                             mean=0,
                                             std=0.01)
        b_g = util.createRandomShareAdaDelta((num_motif),
                                             'b_g',
                                             mean=0,
                                             std=0.01)

        rec_bias = util.createRandomShareAdaDelta((num_motif),
                                                  'rec_bias',
                                                  const=-4)

        W_h = util.createRandomShareAdaDelta(
            (window_cnt * num_motif, hidden_size), 'W_h', mean=0, std=0.01)
        b_h = util.createRandomShareAdaDelta((hidden_size),
                                             'b_h',
                                             mean=0,
                                             std=0.01)

        # This shared variable is special. It will be useful only after prepare_batch_predict is called
        # In prepare_batch_predict, the values will be reset
        self.precompute_gate = util.createZeroShare(size=(tfcell_comb_cnt,
                                                          num_motif),
                                                    name='precompute_gate')

        self.parameters = [
            embedding, motif_filter, rec_bias, W_g, b_g, W_h, b_h
        ]
        [embedding, motif_filter, rec_bias, W_g, b_g, W_h,
         b_h] = [each[0] for each in self.parameters]

        # The shape of the tensor is as follows: [mini-batch size, number of input feature maps, image height, image width].
        # number of input feature maps is 4, height should be sequence length and width should always be 1
        dnaseq = T.tensor4(name='dnaseq', dtype=theano.config.floatX)
        idxes = T.matrix(name='idxes', dtype='int32')
        target = T.vector(name='target', dtype=theano.config.floatX)
        target_weight = T.vector(name='target_weight',
                                 dtype=theano.config.floatX)
        l_r = T.scalar('l_r')
        mom = T.scalar('mom')

        predict = conv.conv2d(dnaseq, motif_filter)
        rec_bias = rec_bias.reshape((1, num_motif, 1, 1))
        predict = T.maximum(0, predict - rec_bias)

        predict = max_pool_2d(predict, (max_window, 1), ignore_border=True)
        predict = T.transpose(predict, (0, 2, 1, 3))
        predict = predict.reshape(
            (predict.shape[0], predict.shape[1], predict.shape[2]))
        # Now [batch_size, window_cnt, num_motif]
        seqfeature = predict

        em = []
        for i in range(len(embed)):
            sidx = startidx[i]
            eidx = startidx[i + 1]
            curr = embedding[sidx:eidx]
            curr = curr.reshape(embed[i])
            em.append(curr[idxes[:, i]])

        embed = T.concatenate(em, axis=1)
        embed = embed.reshape((embed.shape[0], 1, embed.shape[1]))
        gate = T.nnet.sigmoid(T.dot(embed, W_g) + b_g)
        predict = seqfeature * gate
        predict = predict.reshape((predict.shape[0], -1))
        predict = T.dot(predict, W_h) + b_h
        predict = T.max(predict, axis=1)
        predict = T.nnet.sigmoid(predict)
        # Consecutive pairs are forward and backward sequences.
        predict = predict.reshape((-1, 2))
        predict = T.max(predict, axis=1)
        loss = -target * T.log(predict +
                               1e-5) - (1 - target) * T.log(1 - predict + 1e-5)
        loss = loss * target_weight
        loss = T.mean(loss)

        # The precompute_gate shape is: (tfcell_comb_cnt, num_motif)
        # The seqfeature shape is: (batch_size, window_cnt, num_motif)
        # The precompute_gate shape could be: (12376, 2000)
        # The seqfeature shape could be: (16, 3, 2000)
        precompute_gate = T.extra_ops.repeat(self.precompute_gate,
                                             seqfeature.shape[0] * window_cnt,
                                             axis=0)
        bat_pred = seqfeature.reshape((-1, num_motif))
        bat_pred = T.tile(bat_pred, (tfcell_comb_cnt, 1))
        bat_pred = bat_pred * precompute_gate  # shape: [tfcell_comb_cnt * batch_size * window_cnt, num_motif]
        bat_pred = bat_pred.reshape(
            (tfcell_comb_cnt, seqfeature.shape[0] / 2, 2, -1))
        bat_pred = T.dot(bat_pred, W_h) + b_h
        bat_pred = T.max(bat_pred, axis=3)
        bat_pred = T.nnet.sigmoid(bat_pred)
        bat_pred = T.max(bat_pred, axis=2)
        bat_pred = bat_pred.transpose([1, 0])

        L1 = 0
        L2 = 0
        for param in [W_g]:
            L1 += T.mean(abs(param))
            L2 += T.mean(param**2)
        cost = loss + L1_reg * L1 + L2_reg * L2

        if (optimizer == "sgd"):
            parameters = [[each[0], each[1]] for each in self.parameters]
            updates = optimization.sgd(cost, parameters, mom, l_r,
                                       gradient_clip, param_clip)
            self.train_func = theano.function(
                inputs=[idxes, dnaseq, target, target_weight, l_r, mom],
                outputs=[cost, predict],
                updates=updates,
                mode=mode)
        elif (optimizer == "adadelta"):
            updates = optimization.adadelta(cost, self.parameters, param_clip,
                                            l_r)
            self.train_func = theano.function(
                inputs=[idxes, dnaseq, target, target_weight, l_r],
                outputs=[cost, predict],
                updates=updates,
                mode=mode)

        self.error_func = theano.function(
            inputs=[idxes, dnaseq, target, target_weight],
            outputs=[loss, predict],
            mode=mode)

        self.predict_func = theano.function(inputs=[idxes, dnaseq],
                                            outputs=predict,
                                            mode=mode)

        self.batch_predict_func = theano.function(inputs=[dnaseq],
                                                  outputs=bat_pred,
                                                  mode=mode)

        # After the model is trained,
        # given the DNA sequence, output the learned sequence feature.
        # The calculation is independent of TF and cell type
        seqfeature = seqfeature.reshape([seqfeature.shape[0], -1])
        self.seqfeature_func = theano.function(inputs=[dnaseq],
                                               outputs=seqfeature,
                                               mode=mode)

        # After the model is trained,
        # call this function to precalculate the gates
        gate = gate.reshape((gate.shape[0], -1))
        self.gate_func = theano.function(inputs=[idxes],
                                         outputs=gate,
                                         mode=mode)

        self.optimizer = optimizer

Exemplo n.º 7

0

Exibir arquivo

Arquivo: mnist_mlp.py Projeto: crimsonlander/nn

from theano import function
from nn import nnet
from optimization import adadelta, CE
from theano.tensor.nnet import relu, softmax, sigmoid, hard_sigmoid, conv
from keras.datasets import mnist

input_size = 28*28
output_size = 10


(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.reshape((X_train.shape[0], input_size))
y_train = y_train.flatten()

X_test = X_test.reshape((X_test.shape[0], input_size))
y_test = y_test.flatten()

model = nnet((input_size,))

model.addFullyConnectedLayer(400)
model.addActivation(hard_sigmoid)
model.addFullyConnectedLayer(200)
model.addActivation(hard_sigmoid)
model.addFullyConnectedLayer(10)
model.addActivation(softmax)

model.train(X_train, y_train, X_test, y_test, 10, 100, adadelta(1e-1), CE(0.005), random_order=False)
#model.train(X_train, y_train, X_test, y_test, 10, 100, adadelta(1e-2), CE(0.005))