Пример #1
0
def advance(u, u_1, u_2, f_a, Cx2, Cy2, dt2, V=None, step1=False):
    u_in, u_1_in, u_2_in = T.fmatrices('u_in','u_1_in','u_2_in')
    f_a_in, V_in = T.fmatrices('f_in','V_in')
    step1_in = T.lscalar('step1_in')
    Cx2_in, Cy2_in, dt2_in = T.fscalars('Cx2_in','Cy2_in','dt2_in')

    u_out = T.fmatrix('u_out')

    if V is None:
        V = np.zeros_like(f_a)
        
    step_f = theano.function([u_in, u_1_in, u_2_in, f_a_in, Cx2_in, Cy2_in, dt2_in, V_in, step1_in], u_out, step, on_unused_input='ignore')
    u_out = step_f(u, u_1, u_2, f_a, Cx2, Cy2, dt2, V, step1)

    return u_out
Пример #2
0
    def get_samples():  # get samples from the model
        X, Y = T.fmatrices(2)
        givens_train_samples = {X: train_x[0:50000], Y: train_y[0:50000]}

        H1, H2 = iteration(X, 15, 0.1)

        # get prior statistics (100 mean and std)
        H2_mean = T.mean(H2, axis=0)
        H2_std = T.std(H2, axis=0)
        # sampling h2 from prior
        H2_ = RNG.normal((10000, 100),
                         avg=H2_mean,
                         std=4 * H2_std,
                         ndim=None,
                         dtype=H2.dtype,
                         nstreams=None)

        # iterative sampling from samples h2
        X_ = G1(G2(H2_))
        for i in range(3):
            H1_, H2_ = iteration(X_, 15, 0.1, 3)
            X_ = G1(H1_)
        #H1_, H2_ = iteration(X_, 1, 0.1, 3)
        #X_ = G1(H1_)

        sampling = theano.function([],
                                   X_,
                                   on_unused_input='ignore',
                                   givens=givens_train_samples)
        samples = sampling()
        np.save('samples', samples)
        return samples
Пример #3
0
    def __init__(self,
                 steps      = 1,
                 num_layers = 2,
                 num_units  = 32,
                 eps        = 1e-2):

        self.X, self.Z         = T.fvectors('X','Z')
        self.P, self.Q, self.R = T.fmatrices('P','Q','R')
        self.dt                = T.scalar('dt')

        self.matrix_inv = T.nlinalg.MatrixInverse()

        self.ar = AutoRegressiveModel(steps      = steps,
                                      num_layers = num_layers,
                                      num_units  = num_units,
                                      eps        = eps)

        l = InputLayer(input_var = self.X,
                       shape     = (steps,))
        l = ReshapeLayer(l, shape = (1,steps,))
        l = self.ar.network(l)
        l = ReshapeLayer(l, shape=(1,))

        self.l_ = l
        self.f_ = get_output(self.l_)

        self.X_  = T.concatenate([self.f_, T.dot(T.eye(steps)[:-1], self.X)], axis=0)
        self.fX_ = G.jacobian(self.X_.flatten(), self.X)
        self.P_  = T.dot(T.dot(self.fX_, self.P), T.transpose(self.fX_)) + \
                    T.dot(T.dot(T.eye(steps)[:,0:1], self.dt * self.Q), T.eye(steps)[0:1,:])

        self.h = T.dot(T.eye(steps)[0:1], self.X_)
        self.y = self.Z - self.h

        self.hX_ = G.jacobian(self.h, self.X_)

        self.S = T.dot(T.dot(self.hX_, self.P_), T.transpose(self.hX_)) + self.R
        self.K = T.dot(T.dot(self.P_, T.transpose(self.hX_)), self.matrix_inv(self.S))

        self.X__ = self.X_ + T.dot(self.K, self.y)
        self.P__ = T.dot(T.identity_like(self.P) - T.dot(self.K, self.hX_), self.P_)


        self.prediction = theano.function(inputs  = [self.X,
                                                     self.P,
                                                     self.Q,
                                                     self.dt],
                                          outputs = [self.X_,
                                                     self.P_],
                                          allow_input_downcast = True)

        self.update = theano.function(inputs  = [self.X,
                                                 self.Z,
                                                 self.P,
                                                 self.Q,
                                                 self.R,
                                                 self.dt],
                                      outputs = [self.X__,
                                                 self.P__],
                                      allow_input_downcast = True)
Пример #4
0
def SGD(eta, n_epochs, valid_steps, momentum, low, high, init, random_init='gaussian'):
    t0 = time.time()
    index = T.iscalar('index')
    x, y, z, alpha = T.fmatrices('x', 'y', 'z', 'alpha')
    n_minibatch = max_minibatch - 2
    model = Model(n_tree, n_nodes, low, high, init, random_init)
    model_op, auto_upd = model.op(x)
    valid_op, valid_upd = model.valid_op(z, valid_steps)

    loss = model.loss(y, model_op)
    valid_loss = model.loss(alpha, valid_op)

    print "Updation to be compiled yet"

    params = model.params
    train_upd = gradient_updates_momentum(loss, params, eta, momentum) + auto_upd
    train_output = [model_op, loss]
    valid_output = [valid_op, valid_loss]

    print "Train function to be compiled"
    train_fn = theano.function([index], train_output, updates=train_upd,
                               givens={x: train_x[:, n_in * index: n_in * (index + 1)],
                                       y: train_x[:, (n_in * index + n_tree): (n_in * (index + 1) + 1)]}, name='train_fn')

    valid_fn = theano.function([index], valid_output, updates=valid_upd,
                               givens={z: train_x[:, n_tree * index: n_tree * (index + 1)],
                                       alpha: train_x[:, (n_in * index + n_tree): (n_in * index + n_tree + valid_steps)]}, name='valid_fn')

    print "Train function compiled"


    # Compilation over
    #################
    ## TRAIN MODEL ##
    #################
    print 'The compilation time is', time.time() - t0
    loss_list = []
    for i in range(n_epochs):
        epoch_loss = 0

        t1 = time.time()
        for idx in range(n_minibatch):
            print 'The current idx is ', idx,' and the epoch number is  ', i
            output, loss_ = train_fn(idx)[:-1], train_fn(idx)[-1]
            if idx%500 == 0:
                v_output, v_loss = valid_fn(idx/500)[:-1][0], valid_fn(idx/500)[-1]
                print 'v_pred is', ' '.join([mappings_words[prediction(abc)] for abc in v_output])
                print 'v_loss is', np.array(v_loss)
            print 'The loss is', loss_
            epoch_loss +=loss_
            loss_list.append(loss_)

            print '=='*20
        print 'The mean loss for the epoch was', epoch_loss/float(n_minibatch)
        print 'Time taken by this epoch is', time.time()-t1
        print '-'*50
    pyplot.plot(loss_list)
    pyplot.show()
Пример #5
0
    def get_cost_updates(self, corruption_level, learning_rate, sample_method, enc_function):
        """ This function computes the cost and the updates for one trainng
        step of the dA """

        tilde_x = self.get_corrupted_input(self.x, corruption_level)
        y = self.get_hidden_values(tilde_x, enc_function)
        z = self.get_reconstructed_input(y, enc_function)

        L = T.fmatrices()

        # if only encoding but not sample
        if sample_method == -1:


            if self.error_type == 1:
                L = - T.sum(tilde_x * T.log(z) + (1 - tilde_x) * T.log(1 - z), axis=1)

            #square error, added by feng
            #print 'using'
            if self.error_type == 0:
                L = T.sum((tilde_x - z)**2, axis=1)

        else:

            sampled_x = self.get_sampled(tilde_x)
            sampled_z = self.get_sampled(z)
            #sampled version

            if self.error_type == 1:
                L = - T.sum(sampled_x * T.log(sampled_z) + (1 - sampled_x) * T.log(1 - sampled_z), axis=1)

            #square error, added by feng

            #print 'using'
            if self.error_type == 0:
                L = T.sum((sampled_x - sampled_z)**2, axis = 1)

        # note : L is now a vector, where each element is the
        #        cross-entropy cost of the reconstruction of the
        #        corresponding example of the minibatch. We need to
        #        compute the average of all these to get the cost of
        #        the minibatch
        cost = T.mean(L)

        # compute the gradients of the cost of the `dA` with respect
        # to its parameters
        gparams = T.grad(cost, self.params)
        # generate the list of updates
        updates = [
            (param, param - learning_rate * gparam)
            for param, gparam in zip(self.params, gparams)
        ]

        return (cost,  updates)
Пример #6
0
def test_advinc_subtensor1():
    """ Test the second case in the opt local_gpu_advanced_incsubtensor1 """
    shared = cuda.shared_constructor
    # shared = tensor.shared
    xval = numpy.asarray([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype="float32")
    yval = numpy.asarray([[10, 10, 10], [10, 10, 10]], dtype="float32")
    x = shared(xval, name="x")
    y = T.fmatrices("y")
    expr = T.advanced_inc_subtensor1(x, y, [0, 2])
    f = theano.function([y], expr, mode=mode_with_gpu)
    assert sum([isinstance(node.op, cuda.GpuAdvancedIncSubtensor1) for node in f.maker.env.toposort()]) == 1
    assert numpy.allclose(f(yval), [[11.0, 12.0, 13.0], [4.0, 5.0, 6.0], [17.0, 18.0, 19.0]])
Пример #7
0
def test_set_subtensor():
    shared = cuda.shared_constructor
    #shared = tensor.shared
    x,y = T.fmatrices('x','y')
    xval = numpy.asarray([[1,2,3], [4,5,6], [7,8,9]],
                      dtype='float32')
    yval = numpy.asarray([[10,10,10], [10,10,10], [10,10,10]],
                      dtype='float32')
    expr = T.set_subtensor(x[:,1:3], y[:,1:3])
    f=theano.function([x,y], expr, mode=mode_with_gpu)
    assert sum([isinstance(node.op,cuda.GpuSubtensor) for node in f.maker.env.toposort() ])==1
    assert sum([isinstance(node.op,cuda.GpuIncSubtensor) and node.op.set_instead_of_inc==True for node in f.maker.env.toposort() ])==1
    print f(xval,yval)
Пример #8
0
def test_advinc_subtensor1():
    """ Test the second case in the opt local_gpu_advanced_incsubtensor1 """
    shared = cuda.shared_constructor
    #shared = tensor.shared
    xval = numpy.asarray([[1,2,3], [4,5,6], [7,8,9]],
                      dtype='float32')
    yval = numpy.asarray([[10,10,10], [10,10,10]],
                      dtype='float32')
    x = shared(xval, name = 'x')
    y = T.fmatrices('y')
    expr = T.advanced_inc_subtensor1(x,y,[0,2])
    f=theano.function([y], expr, mode=mode_with_gpu)
    assert sum([isinstance(node.op,cuda.GpuAdvancedIncSubtensor1) for node in f.maker.env.toposort() ])==1
    assert numpy.allclose(f(yval),[[11.,12.,13.], [4.,5.,6.], [17.,18.,19.]])
Пример #9
0
def test_abs_cost():

    ySym,yhatSym = T.fmatrices('y','yhat')

    ac = theano.function([yhatSym,ySym],
                         outputs=absoluteError(yhatSym,ySym))
    
    yhat = np.asarray([[1],[2],[3]],dtype=theano.config.floatX)
    y = np.asarray([[1],[2],[3]],dtype=theano.config.floatX)

    assert np.abs(ac(yhat,y)) < 1e-5

    yhat = np.asarray([[1],[2.1],[3]],dtype=theano.config.floatX)

    assert np.abs(ac(yhat,y) - 0.1/3) < 1e-5
Пример #10
0
def test_squared_error_cost():

    ySym,yhatSym = T.fmatrices('y','yhat')

    sqerr = theano.function([yhatSym,ySym],
                            outputs=squaredError(yhatSym,ySym))

    yhat = np.asarray([[1],[2],[3]],dtype=theano.config.floatX)
    y = np.asarray([[1],[2],[3]],dtype=theano.config.floatX)

    assert np.abs(sqerr(yhat,y)) < 1e-5

    yhat = np.asarray([[1],[2.1],[3]],dtype=theano.config.floatX)

    assert np.abs(sqerr(yhat,y) - 0.01/3) < 1e-5
Пример #11
0
def test_inc_subtensor():
    shared = cuda.shared_constructor
    #shared = tensor.shared
    x, y = T.fmatrices('x', 'y')
    xval = numpy.asarray([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
                      dtype='float32')
    yval = numpy.asarray([[10, 10, 10], [10, 10, 10], [10, 10, 10]],
                      dtype='float32')
    expr = T.inc_subtensor(x[:, 1:3], y[:, 1:3])
    f = theano.function([x, y], expr, mode=mode_with_gpu)

    assert sum([isinstance(node.op, cuda.GpuSubtensor)
                for node in f.maker.fgraph.toposort()]) == 1
    assert sum([isinstance(node.op, cuda.GpuIncSubtensor) and
                node.op.set_instead_of_inc==False
                for node in f.maker.fgraph.toposort()]) == 1
    assert numpy.allclose(f(xval, yval), [[1., 12., 13.],
                                          [4., 15., 16.], [7., 18., 19.]])
Пример #12
0
def test_inc_subtensor():
    shared = cuda.shared_constructor
    #shared = tensor.shared
    x, y = T.fmatrices('x', 'y')
    xval = numpy.asarray([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
                      dtype='float32')
    yval = numpy.asarray([[10, 10, 10], [10, 10, 10], [10, 10, 10]],
                      dtype='float32')
    expr = T.inc_subtensor(x[:, 1:3], y[:, 1:3])
    f = theano.function([x, y], expr, mode=mode_with_gpu)

    assert sum([isinstance(node.op, cuda.GpuSubtensor)
                for node in f.maker.fgraph.toposort()]) == 1
    assert sum([isinstance(node.op, cuda.GpuIncSubtensor) and
                node.op.set_instead_of_inc==False
                for node in f.maker.fgraph.toposort()]) == 1
    assert numpy.allclose(f(xval, yval), [[1., 12., 13.],
                                          [4., 15., 16.], [7., 18., 19.]])
Пример #13
0
from theano import *
import theano.tensor as T
import numpy as np

# Logistic function
x = T.matrix('x', 'float32')
op = 1 / (1 + T.exp(-x))

logistic = function([x], op)

mat1 = [[0, 1], [-1, -2]]
print(logistic(mat1))

# Multiple outputs
a, b = T.fmatrices('a', 'b')
diff = a - b
absDiff = abs(diff)
sqrDiff = diff**2

f = function([a, b], [diff, absDiff, sqrDiff])

mat2 = [[10, 5], [5, 10]]
mat3 = [[5, 10], [10, 5]]

print(f(mat2, mat3))

# Default values

x, y = T.fscalars('x', 'y')
z = x + y
	def __init__(self,  We_initial, char_embedd_table_initial, params):

		We = theano.shared(We_initial)
 
                # initial embedding for the InfNet
                We_inf = theano.shared(We_initial)
        	embsize = We_initial.shape[1]
        	hidden = params.hidden
		self.en_hidden_size = params.hidden_inf
		self.num_labels = 17
		self.de_hidden_size = params.de_hidden_size
		

                char_embedd_dim = params.char_embedd_dim
                char_dic_size = len(params.char_dic)
                char_embedd_table = theano.shared(char_embedd_table_initial)
                char_embedd_table_inf = theano.shared(char_embedd_table_initial)


		input_var = T.imatrix(name='inputs')
        	target_var = T.imatrix(name='targets')
		target_var_in = T.imatrix(name='targets')
        	mask_var = T.fmatrix(name='masks')
		mask_var1 = T.fmatrix(name='masks1')
                char_input_var = T.itensor3(name='char-inputs')

		length = T.iscalar()
		length0 = T.iscalar()
		t_t = T.fscalar()
		t_t0 = T.fscalar()		

                use_dropout = T.fscalar()
                use_dropout0 = T.fscalar()

		Wyy0 = np.random.uniform(-0.02, 0.02, (self.num_labels +1 , self.num_labels + 1)).astype('float32')
                Wyy = theano.shared(Wyy0)


                l_in_word = lasagne.layers.InputLayer((None, None))
                l_mask_word = lasagne.layers.InputLayer(shape=(None, None))

		if params.emb ==1:
                        l_emb_word = lasagne.layers.EmbeddingLayer(l_in_word,  input_size= We_initial.shape[0] , output_size = embsize, W =We, name='word_embedding')
                else:
                        l_emb_word = lasagne_embedding_layer_2(l_in_word, embsize, We)

                layer_char_input = lasagne.layers.InputLayer(shape=(None, None, Max_Char_Length ),
                                                     input_var=char_input_var, name='char-input')

                layer_char = lasagne.layers.reshape(layer_char_input, (-1, [2]))
                layer_char_embedding = lasagne.layers.EmbeddingLayer(layer_char, input_size=char_dic_size,
                                                             output_size=char_embedd_dim, W=char_embedd_table,
                                                             name='char_embedding')

                layer_char = lasagne.layers.DimshuffleLayer(layer_char_embedding, pattern=(0, 2, 1))


                # first get some necessary dimensions or parameters
                conv_window = 3
                num_filters = params.num_filters

                # construct convolution layer
                cnn_layer = lasagne.layers.Conv1DLayer(layer_char, num_filters=num_filters, filter_size=conv_window, pad='full',
                                           nonlinearity=lasagne.nonlinearities.tanh, name='cnn')
                # infer the pool size for pooling (pool size should go through all time step of cnn)
                _, _, pool_size = cnn_layer.output_shape

                # construct max pool layer
                pool_layer = lasagne.layers.MaxPool1DLayer(cnn_layer, pool_size=pool_size)
                # reshape the layer to match lstm incoming layer [batch * sent_length, num_filters, 1] --> [batch, sent_length, num_filters]
                output_cnn_layer = lasagne.layers.reshape(pool_layer, (-1, length, [1]))

                # finally, concatenate the two incoming layers together.
                incoming = lasagne.layers.concat([output_cnn_layer, l_emb_word], axis=2)

           

		l_lstm_wordf = lasagne.layers.LSTMLayer(incoming, hidden, mask_input=l_mask_word)
        	l_lstm_wordb = lasagne.layers.LSTMLayer(incoming, hidden, mask_input=l_mask_word, backwards = True)

        	concat = lasagne.layers.concat([l_lstm_wordf, l_lstm_wordb], axis=2)
		
		l_reshape_concat = lasagne.layers.ReshapeLayer(concat,(-1,2*hidden))

		l_local = lasagne.layers.DenseLayer(l_reshape_concat, num_units= self.num_labels + 1, nonlinearity=lasagne.nonlinearities.linear)

		
		network_params = lasagne.layers.get_all_params(l_local, trainable=True)
                network_params.append(Wyy)

		
		print len(network_params)
		f = open('NER_BiLSTM_CNN_CRF_.Batchsize_10_dropout_1_LearningRate_0.005_0.0_50_hidden_200.pickle','r')
		data = pickle.load(f)
		f.close()

		for idx, p in enumerate(network_params):

                        p.set_value(data[idx])


		self.params = []
		self.hos = []
                self.Cos = []
		self.encoder_lstm_layers = []
                self.decoder_lstm_layers = []
		self.lstm_layers_num = 1		

		ei, di, dt = T.imatrices(3)    #place holders
                decoderInputs0 ,em, em1, dm, tf, di0 =T.fmatrices(6)
		ci = T.itensor3()

		#### the last one is for the stary symbole
                self.de_lookuptable = theano.shared(name="Decoder LookUpTable", value=init_xavier_uniform(self.num_labels +1, self.de_hidden_size), borrow=True)

                self.linear = theano.shared(name="Linear", value = init_xavier_uniform(self.de_hidden_size + 2*self.en_hidden_size, self.num_labels), borrow= True)
		self.linear_bias = theano.shared(name="Hidden to Bias", value=np.asarray(np.random.randn(self.num_labels, )*0., dtype=theano.config.floatX), borrow=True)
                #self.hidden_decode = theano.shared(name="Hidden to Decode", value= init_xavier_uniform(2*hidden, self.de_hidden_size), borrow = True)
		
                #self.hidden_bias = theano.shared(
                #        name="Hidden to Bias",
                #        value=np.asarray(np.random.randn(self.de_hidden_size, )*0., dtype=theano.config.floatX) ,
                #        borrow=True
                #        )

       

		input_var_shuffle = input_var.dimshuffle(1, 0)
		mask_var_shuffle = mask_var.dimshuffle(1, 0)
		target_var_in_shuffle = target_var_in.dimshuffle(1,0)
		target_var_shuffle = target_var.dimshuffle(1,0)


		self.params += [We_inf, self.linear, self.de_lookuptable, self.linear_bias] 
                
                ######[batch, sent_length, embsize] 
		state_below = We_inf[input_var_shuffle.flatten()].reshape((input_var_shuffle.shape[0], input_var_shuffle.shape[1], embsize))
                
                ###### character word embedding
                layer_char_input_inf = lasagne.layers.InputLayer(shape=(None, None, Max_Char_Length ),
                                                     input_var=char_input_var, name='char-input')
                layer_char_inf = lasagne.layers.reshape(layer_char_input_inf, (-1, [2]))
                layer_char_embedding_inf = lasagne.layers.EmbeddingLayer(layer_char_inf, input_size=char_dic_size,
                                                             output_size=char_embedd_dim, W=char_embedd_table_inf,
                                                             name='char_embedding_inf')

                layer_char_inf = lasagne.layers.DimshuffleLayer(layer_char_embedding_inf, pattern=(0, 2, 1))
                #layer_char_inf = lasagne.layers.DropoutLayer(layer_char_inf, p=0.5)

                cnn_layer_inf = lasagne.layers.Conv1DLayer(layer_char_inf, num_filters=num_filters, filter_size=conv_window, pad='full',
                                           nonlinearity=lasagne.nonlinearities.tanh, name='cnn_inf')
               
                pool_layer_inf = lasagne.layers.MaxPool1DLayer(cnn_layer_inf, pool_size=pool_size)
                output_cnn_layer_inf = lasagne.layers.reshape(pool_layer_inf, (-1, length, [1]))
                char_params = lasagne.layers.get_all_params(output_cnn_layer_inf, trainable=True)
                self.params += char_params          
 
                ###### [batch, sent_length, num_filters]
                #char_state_below = lasagne.layers.get_output(output_cnn_layer_inf, {layer_char_input_inf:char_input_var})
                char_state_below = lasagne.layers.get_output(output_cnn_layer_inf)

       
                char_state_below = dropout_layer(char_state_below, use_dropout, trng)
                
                char_state_shuff = char_state_below.dimshuffle(1,0, 2) 
                state_below = T.concatenate([state_below, char_state_shuff], axis=2)
                
                state_below = dropout_layer(state_below, use_dropout, trng)

		enclstm_f = LSTM(embsize+num_filters, self.en_hidden_size)
                enclstm_b = LSTM(embsize+num_filters, self.en_hidden_size, True)
                self.encoder_lstm_layers.append(enclstm_f)    #append
                self.encoder_lstm_layers.append(enclstm_b)    #append
                self.params += enclstm_f.params + enclstm_b.params   #concatenate

                hs_f, Cs_f = enclstm_f.forward(state_below, mask_var_shuffle)
                hs_b, Cs_b = enclstm_b.forward(state_below, mask_var_shuffle)

                hs = T.concatenate([hs_f, hs_b], axis=2)
                Cs = T.concatenate([Cs_f, Cs_b], axis=2)

		hs0 = T.concatenate([hs_f[-1], hs_b[0]], axis=1)
                Cs0 = T.concatenate([Cs_f[-1], Cs_b[0]], axis=1)
		#self.hos += T.tanh(tensor.dot(hs0, self.hidden_decode) + self.hidden_bias),
                #self.Cos += T.tanh(tensor.dot(Cs0, self.hidden_decode) + self.hidden_bias),
                self.hos += T.alloc(np.asarray(0., dtype=theano.config.floatX), input_var_shuffle.shape[1], self.de_hidden_size),
                self.Cos += T.alloc(np.asarray(0., dtype=theano.config.floatX), input_var_shuffle.shape[1], self.de_hidden_size),
		
		Encoder = hs
                	
		state_below = self.de_lookuptable[target_var_in_shuffle.flatten()].reshape((target_var_in_shuffle.shape[0], target_var_in_shuffle.shape[1], self.de_hidden_size))

		for i in range(self.lstm_layers_num):
                        declstm = LSTM(self.de_hidden_size, self.de_hidden_size)
                        self.decoder_lstm_layers += declstm,    #append
                        self.params += declstm.params    #concatenate
                        ho, Co = self.hos[i], self.Cos[i]
                        state_below, Cs = declstm.forward(state_below, mask_var_shuffle, ho, Co)		
		

		decoder_lstm_outputs = T.concatenate([state_below, Encoder], axis=2)
		linear_outputs = T.dot(decoder_lstm_outputs, self.linear) + self.linear_bias[None, None, :]
                softmax_outputs, updates = theano.scan(
                        fn=lambda x: T.nnet.softmax(x),
                        sequences=[linear_outputs],
                        )

		def _NLL(pred, y, m):
                        return -m * T.log(pred[T.arange(input_var.shape[0]), y])

		"""
		costs, _ = theano.scan(fn=_NLL, sequences=[softmax_outputs, target_var_shuffle, mask_var_shuffle])
                #loss = costs.sum() / mask_var.sum() + params.L2*sum(lasagne.regularization.l2(x) for x in self.params)
		loss = costs.sum() / mask_var.sum()		

                updates = lasagne.updates.sgd(loss, self.params, self.eta)
                updates = lasagne.updates.apply_momentum(updates, self.params, momentum=0.9)

		###################################################
                #### using the ground truth when training
                ##################################################
                self._train = theano.function(
                        inputs=[ei, em, di, dm, dt],
                        outputs=[loss, softmax_outputs],
                        updates=updates,
                        givens={input_var:ei, mask_var:em, target_var_in:di, decoderMask:dm, target_var:dt}
                        )
		"""
	

		def _step2(ctx_, state_, hs_, Cs_):

                        hs, Cs = [], []
                        token_idxs = T.cast(state_.argmax(axis=-1), "int32" )
                        msk_ = T.fill( (T.zeros_like(token_idxs, dtype="float32")), 1.)
                        msk_ = msk_.dimshuffle('x', 0)
                        state_below0 = self.de_lookuptable[token_idxs].reshape((1, ctx_.shape[0], self.de_hidden_size))
                        for i, lstm in enumerate(self.decoder_lstm_layers):
                                h, C = lstm.forward(state_below0, msk_, hs_[i], Cs_[i])    #mind msk
                                hs += h[-1],
                                Cs += C[-1],
                                state_below0 = h

                        hs, Cs = T.as_tensor_variable(hs), T.as_tensor_variable(Cs)
			state_below0 = state_below0.reshape((ctx_.shape[0], self.de_hidden_size))
                        state_below0 = T.concatenate([ctx_, state_below0], axis =1)			

                        newpred = T.dot(state_below0, self.linear) + self.linear_bias[None, :]
                        state_below = T.nnet.softmax(newpred)
			##### the beging symbole probablity is 0
                        extra_p = T.zeros_like(hs[:,:,0])
                        state_below = T.concatenate([state_below, extra_p.T], axis=1)


                        return state_below, hs, Cs


		hs0, Cs0 = T.as_tensor_variable(self.hos, name="hs0"), T.as_tensor_variable(self.Cos, name="Cs0")

                train_outputs, _ = theano.scan(
                        fn=_step2,
			sequences = [Encoder],
                        outputs_info=[decoderInputs0, hs0, Cs0],
                        n_steps=input_var_shuffle.shape[0]
                        )

                predy = train_outputs[0].dimshuffle(1, 0 , 2)
		predy = predy[:,:,:-1]*mask_var[:,:,None]
		predy0 = predy.reshape((-1, 17))
          
 

	
		def inner_function( targets_one_step, mask_one_step,  prev_label, tg_energy):
                        """
                        :param targets_one_step: [batch_size, t]
                        :param prev_label: [batch_size, t]
                        :param tg_energy: [batch_size]
                        :return:
                        """                 
                        new_ta_energy = T.dot(prev_label, Wyy[:-1,:-1])
                        new_ta_energy_t = tg_energy + T.sum(new_ta_energy*targets_one_step, axis =1)
			tg_energy_t = T.switch(mask_one_step, new_ta_energy_t,  tg_energy)

                        return [targets_one_step, tg_energy_t]


		local_energy = lasagne.layers.get_output(l_local, {l_in_word: input_var, l_mask_word: mask_var, layer_char_input:char_input_var})
		local_energy = local_energy.reshape((-1, length, 17))
                local_energy = local_energy*mask_var[:,:,None]		

		#####################
		# for the end symbole of a sequence
		####################

		end_term = Wyy[:-1,-1]
                local_energy = local_energy + end_term.dimshuffle('x', 'x', 0)*mask_var1[:,:, None]


		#predy0 = lasagne.layers.get_output(l_local_a, {l_in_word_a:input_var, l_mask_word_a:mask_var})

		predy_in = T.argmax(predy0, axis=1)
                A = T.extra_ops.to_one_hot(predy_in, 17)
                A = A.reshape((-1, length, 17))		

		#predy = predy0.reshape((-1, length, 25))
		#predy = predy*mask_var[:,:,None]

		
		targets_shuffled = predy.dimshuffle(1, 0, 2)
                target_time0 = targets_shuffled[0]
		
		masks_shuffled = mask_var.dimshuffle(1, 0)		 

                initial_energy0 = T.dot(target_time0, Wyy[-1,:-1])


                initials = [target_time0, initial_energy0]
                [ _, target_energies], _ = theano.scan(fn=inner_function, outputs_info=initials, sequences=[targets_shuffled[1:], masks_shuffled[1:]])
                cost11 = target_energies[-1] + T.sum(T.sum(local_energy*predy, axis=2)*mask_var, axis=1)

		
                cost = T.mean(-cost11)		
  
				
		from momentum import momentum
                updates_a = momentum(cost, self.params, params.eta, momentum=0.9)

                self.train_fn = theano.function(
                                inputs=[ei, ci, em, em1, length0, di0, use_dropout0],
                                outputs=[cost],
                                updates=updates_a,
                                on_unused_input='ignore',
                                givens={input_var:ei, char_input_var:ci, mask_var:em, mask_var1:em1, length: length0, decoderInputs0:di0, use_dropout:use_dropout0}
                                )


	
		
		prediction = T.argmax(predy, axis=2)
		corr = T.eq(prediction, target_var)
        	corr_train = (corr * mask_var).sum(dtype=theano.config.floatX)
        	num_tokens = mask_var.sum(dtype=theano.config.floatX)

		self.eval_fn = theano.function(
                                inputs=[ei, ci, em, em1, length0, di0, use_dropout0],
                                outputs=[prediction, -cost11],
				on_unused_input='ignore',
                                givens={input_var:ei, char_input_var:ci, mask_var:em, mask_var1:em1, length: length0, decoderInputs0:di0, use_dropout:use_dropout0}
                                )        	
Пример #15
0
from theano import *
import theano.tensor as T
import numpy as np

# Logistic function
x = T.matrix('x', 'float32')
op = 1 / (1 + T.exp(-x))

logistic = function([x], op)

mat1 = [[0, 1], [-1, -2]]
print(logistic(mat1))

# Multiple outputs
a, b = T.fmatrices('a', 'b')
diff = a - b
absDiff = abs(diff)
sqrDiff = diff ** 2

f = function([a, b], [diff, absDiff, sqrDiff])

mat2 = [[10, 5], [5, 10]]
mat3 = [[5, 10], [10, 5]]

print(f(mat2, mat3))

# Default values

x, y = T.fscalars('x', 'y')
z = x + y
Пример #16
0
    def __init__(self,
                 steps      = 1,
                 num_layers = 2,
                 num_units  = 32,
                 eps        = 1e-2,
                 alpha      = 1e-2,
                 beta       = 2.0,
                 kappa      = 0.0):

        lam = alpha * alpha * (steps + kappa) - steps + beta

        self.X, self.Z         = T.fvectors('X','Z')
        self.P, self.Q, self.R = T.fmatrices('P','Q','R')
        self.dt                = T.scalar('dt')

        sqrtm = MatrixSqrt()
        self.matrix_inv = T.nlinalg.MatrixInverse()

        self.ar = AutoRegressiveModel(steps      = steps,
                                      num_layers = num_layers,
                                      num_units  = num_units,
                                      eps        = eps)

        def weighted_mean(A,w):
            mu = T.zeros((steps, 1))
            for i in range(2 * steps + 1):
                mu += w[i] * A[:,i:i+1]
            return mu

        def weighted_covariance(A,B,a,b,w):
            sigma = T.zeros((steps,steps))
            for i in range(2 * steps + 1):
                sigma += w[i] * T.dot((A[:,i:i+1] - a), (B[:,i:i+1] - b).T)
            return sigma


        self.sqrtP = sqrtm(self.P)
        self.XB = T.dot(T.stack(self.X).T, T.ones((1, 2 * steps +1))) + T.concatenate([T.zeros((steps,1)),
                                                                                       T.sqrt(steps + lam) * self.sqrtP,
                                                                                       -T.sqrt(steps + lam) * self.sqrtP], axis=1)

        l = InputLayer(input_var = self.XB.T,
                       shape     = (2 * steps + 1, steps))
        l = self.ar.network(l)
        l = ReshapeLayer(l, shape=(1, 2 * steps + 1))

        self.l_ = l
        self.f_ = get_output(self.l_)

        self.XC = T.concatenate([self.f_, T.dot(T.eye(steps)[:-1], self.XB)], axis=0)

        W_m = T.concatenate([(lam / (steps + lam)) * T.ones(1),
                             (1.0 / (2.0 * (steps + lam))) * T.ones(2 * steps)], axis=0)
        W_c = T.concatenate([(lam / (steps + lam) + (1.0 - alpha * alpha + beta)) * T.ones(1),
                             (1.0 / (2.0 * (steps + lam))) * T.ones(2 * steps)], axis=0)

        self.X_ = weighted_mean(self.XC, W_m)
        self.P_ = weighted_covariance(self.XC, self.XC, self.X_, self.X_, W_c) + \
                                      T.dot(T.dot(T.eye(steps)[:,0:1], self.dt * self.Q), T.eye(steps)[0:1,:])

        self.ZB = T.dot(T.eye(steps)[0:1,:], self.XC)

        self.Z_  = weighted_mean(self.ZB, W_m)
        self.S   = weighted_covariance(self.ZB, self.ZB, self.Z_, self.Z_, W_c) + self.R

        self.K  = T.dot(weighted_covariance(self.XC, self.ZB, self.X_, self.Z_, W_c),
                        self.matrix_inv(self.S))

        self.X__ = self.X_ + T.dot(self.K, self.Z - self.Z_)
        self.P__ = self.P_ - T.dot(T.dot(self.K, self.S), self.K.T)

        self.prediction = theano.function(inputs  = [self.X,
                                                     self.P,
                                                     self.Q,
                                                     self.dt],
                                          outputs = [self.X_,
                                                     self.P_],
                                          allow_input_downcast = True)

        self.update = theano.function(inputs  = [self.X,
                                                 self.Z,
                                                 self.P,
                                                 self.Q,
                                                 self.R,
                                                 self.dt],
                                      outputs = [self.X__,
                                                 self.P__],
                                      allow_input_downcast = True)
Пример #17
0
def exp(__lr):

    max_epochs, batch_size, n_batches = 1000, 100, 500  # = 50000/100
    nX, nH1, nH2 = 784, 1000, 100

    W1 = rand_ortho((nX, nH1), np.sqrt(6. / (nX + nH1)))
    B1 = zeros((nH1, ))
    W2 = rand_ortho((nH1, nH2), np.sqrt(6. / (nH1 + nH2)))
    B2 = zeros((nH2, ))

    V1 = rand_ortho((nH1, nX), np.sqrt(6. / (nH1 + nX)))
    C1 = zeros((nX, ))
    V2 = rand_ortho((nH2, nH1), np.sqrt(6. / (nH2 + nH1)))
    C2 = zeros((nH1, ))

    # layer definitions - functions of layers
    F1 = lambda x: softplus(T.dot(x, W1) + B1)
    G1 = lambda h1: sigm(T.dot(h1, V1) + C1)

    F2 = lambda h1: sigm(T.dot(h1, W2) + B2)
    G2 = lambda h2: softplus(T.dot(h2, V2) + C2)

    i, e = T.lscalar(), T.fscalar()
    X, Y = T.fmatrices(2)

    givens_train = lambda i: {
        X: train_x[i * batch_size:(i + 1) * batch_size],
        Y: train_y[i * batch_size:(i + 1) * batch_size]
    }
    givens_valid, givens_test = {
        X: valid_x,
        Y: valid_y
    }, {
        X: test_x,
        Y: test_y
    }
    givens_empty = {
        X: sharedX(np.zeros((10000, 784))),
        Y: sharedX(np.zeros((10000, 10)))
    }

    def iteration(X, k, alpha, beta=0.01):  # infer h1 and h2 from x
        H1 = F1(X)
        H2 = F2(H1)
        for i in range(k):
            H2 = H2 + alpha * (F2(H1) - F2(G2(H2)))
            H1 = H1 + alpha * (F1(X) - F1(G1(H1))) + alpha * beta * (G2(H2) -
                                                                     H1)
        return H1, H2

    H1, H2 = F1(X), F2(F1(X))
    H1_, H2_ = iteration(X, 15, 0.1)

    def avg_bin(x, k):  # average of sampled random binary values
        S = 0. * x
        for i in range(k):
            S = S + samp(x)
        return S / k

    # get gradients
    g_V1, g_C1 = T.grad(mse(G1(gaussian(H1_, 0.3)), X), [V1, C1],
                        consider_constant=[H1_, X])
    g_W1, g_B1 = T.grad(mse(F1(gaussian(X, 0.5)), H1_), [W1, B1],
                        consider_constant=[X, H1_])

    g_V2, g_C2 = T.grad(mse(G2(avg_bin(H2_, 3)), H1_), [V2, C2],
                        consider_constant=[H2_, H1_])
    g_W2, g_B2 = T.grad(mse(F2(gaussian(H1_, 0.5)), H2_), [W2, B2],
                        consider_constant=[H1_, H2_])

    cost = mse(G1(G2(F2(F1(X)))), X)

    # training
    train_sync = theano.function([i, e], [cost],
                                 givens=givens_train(i),
                                 on_unused_input='ignore',
                                 updates=rms_prop(
                                     {
                                         W1: g_W1,
                                         B1: g_B1,
                                         V1: g_V1,
                                         C1: g_C1,
                                         W2: g_W2,
                                         B2: g_B2,
                                         V2: g_V2,
                                         C2: g_C2
                                     }, __lr))

    def get_samples():  # get samples from the model
        X, Y = T.fmatrices(2)
        givens_train_samples = {X: train_x[0:50000], Y: train_y[0:50000]}

        H1, H2 = iteration(X, 15, 0.1)

        # get prior statistics (100 mean and std)
        H2_mean = T.mean(H2, axis=0)
        H2_std = T.std(H2, axis=0)
        # sampling h2 from prior
        H2_ = RNG.normal((10000, 100),
                         avg=H2_mean,
                         std=4 * H2_std,
                         ndim=None,
                         dtype=H2.dtype,
                         nstreams=None)

        # iterative sampling from samples h2
        X_ = G1(G2(H2_))
        for i in range(3):
            H1_, H2_ = iteration(X_, 15, 0.1, 3)
            X_ = G1(H1_)
        #H1_, H2_ = iteration(X_, 1, 0.1, 3)
        #X_ = G1(H1_)

        sampling = theano.function([],
                                   X_,
                                   on_unused_input='ignore',
                                   givens=givens_train_samples)
        samples = sampling()
        np.save('samples', samples)
        return samples

    # get test log-likelihood
    def test_ll(sigma):
        samples = get_samples()
        return get_ll(np_test_x, theano_parzen(samples, sigma), batch_size=10)

    test_cost = theano.function([i, e], [cost],
                                on_unused_input='ignore',
                                givens=givens_test)

    print('epochs test_loglikelihood time')

    # training loop
    t = time.time()
    monitor = {
        'train': [],
        'valid': [],
        'test': [],
        'test_ll': [],
        'test_ll_base': []
    }
    for e in range(1, max_epochs + 1):
        monitor['train'].append(
            np.array([train_sync(i, e)
                      for i in range(n_batches)]).mean(axis=0))

        if e % 5 == 0:
            monitor['test'].append(test_cost(0, 0))
            monitor['test_ll'].append(np.mean(test_ll(0.2)))
            print(e, monitor['test_ll'][-1], time.time() - t)
    # Input Layer
    l_in         = InputLayer((batch_size, n_in), input_var=input_var)
    # Recurrent EI Net
    l_in_hid     = DenseLayer(l_in, n_hid, nonlinearity=lasagne.nonlinearities.rectify)

    # Output Layer
    l_shp        = ReshapeLayer(l_in_hid, (-1, n_hid))
    l_dense      = DenseLayer(l_shp, num_units=n_out, nonlinearity=lasagne.nonlinearities.sigmoid)
    # To reshape back to our original shape, we can use the symbolic shape variables we retrieved above.
    l_out        = ReshapeLayer(l_dense, (batch_size, n_out))

    return l_out, l_in_hid

if __name__ == '__main__':
    # Define the input and expected output variable
    input_var, target_var = T.fmatrices('input', 'target')
    
    # The generator to sample examples from
    tr_cond               = 'two_gains'
    test_cond             = 'all_gains'
    generator             = CausalInferenceTaskFFWD(max_iter=250001, batch_size=100, n_in=50, n_out=1, sigma_sq=100.0, tr_cond=tr_cond)
    test_generator        = CausalInferenceTaskFFWD(max_iter=2501,   batch_size=100, n_in=50, n_out=1, sigma_sq=100.0, tr_cond=test_cond)

    l_out, l_rec          = model(input_var, batch_size=generator.batch_size, n_in=2*generator.n_in, n_out=generator.n_out, n_hid=200)
    
    # The generated output variable and the loss function
#    all_layers            = lasagne.layers.get_all_layers(l_out)
#    l2_penalty            = lasagne.regularization.regularize_layer_params(all_layers, lasagne.regularization.l2) * 1e-6
    pred_var              = T.clip(lasagne.layers.get_output(l_out), 1e-6, 1. - 1e-6)
    loss                  = T.mean(lasagne.objectives.squared_error(pred_var, target_var)) # + l2_penalty
    
Пример #19
0
 def RnnEvaluator(weights):
     """Build a Theano function that computes the internal state of the network
     when called.
     
     """
     numInputs = 0
     numOutputs = 0
     for neurons, activator, isInput, isOutput, weightFrame in weights:
         if isInput:
             numInputs += 1
         if isOutput:
             numOutputs += 1
     
     
     def evaluate_net(*states):
         activations = T.fvectors(len(weights))
         idx = 0
         for neurons, activator, isInput, isOutput, weightFrame in weights:
             sumParts = []
             for i, info in enumerate(weightFrame):
                 srcIdx, w = info
                 sumParts.append(T.dot(states[srcIdx], w.transpose()))
             
             if len(sumParts):
                 sumParts = T.stack(*sumParts)
                 activity = T.sum(sumParts, axis=0)
                 if activator == TIDENTITY:
                     activation = activity
                 elif activator == TLOGISTIC:
                     activation = 1. / (1. + T.exp(-activity))
                 elif activator == THYPERBOLIC:
                     activation = T.tanh(activity)
                 elif activator == TTHRESHOLD:
                     activation = T.sgn(activity)
                 elif activator == TBIAS:
                     activation = T.ones_like(activity, dtype='float32')
                 elif activator == TRADIAL:
                     activation = T.exp(-activity*activity/2.0)
                 else:
                     raise Exception("Unknown activation function for layer {0}" + layer.id)
             else:
                 activation = T.zeros_like(states[idx])#states[idx]
                 
             activations[idx] = activation
             idx += 1
         
         checklist = [T.all(T.eq(a,s)) for a,s in zip(activations, states)]
         condition = T.all(T.as_tensor_variable(checklist))
         return activations, {}, theano.scan_module.until(condition )
     
     def make_states(*inputs):
         states = []
         idx = 0
         numPoints = len(inputs) and inputs[0].shape[0] or 1
         for neurons, activator, isInput, isOutput, weightFrame in weights:
             if isInput:
                 states.append(inputs[idx])
                 idx += 1
             else:
                 states.append(T.ones((numPoints,neurons), dtype='float32'))
         return states
     
     def project_output(states):
         outputs = []
         idx = 0
         for neurons, activator, isInput, isOutput, weightFrame in weights:
             if isOutput:
                 outputs.append(states[idx])
             idx += 1
         return outputs
     
     inputs = T.fmatrices(numInputs)
     times = T.iscalar()
     netValue, updates = theano.scan(
         fn=evaluate_net,
         outputs_info=make_states(*inputs),
         n_steps=times
     )
     
     result = [n[-1] for n in netValue]
     
     outputs = project_output(result)
     
     
     net = theano.function(inputs + [times], outputs)
     
     def fix_inputs(inputs, times=5):
         reshape = False
         if len(inputs) and (len(np.shape(inputs[0])) == 1):
             reshape = True
             inputs = [np.reshape(i, (1,i.shape[0])) for i in inputs]
         args = list(inputs) + [times]
         outputs = net(*args)
         if reshape:
             return [o[0] for o in outputs]
         return outputs
     
     return fix_inputs
Пример #20
0
    def __init__(self, We_initial, params):
        self.textfile = open(params.outfile, 'w')
        We = theano.shared(We_initial)
        embsize = We_initial.shape[1]
        hidden = params.hidden

        self.num_labels = params.num_labels
        self.de_hidden_size = params.de_hidden_size
        self.en_hidden_size = params.en_hidden_size

        print params.de_hidden_size, hidden, params.num_labels

        self.lstm_layers_num = 1

        input_var = T.imatrix(name='inputs')
        target_var = T.imatrix(name='targets')
        target_var_in = T.imatrix(name='in_targets')
        mask_var = T.fmatrix(name='masks')
        mask_var1 = T.fmatrix(name='masks1')
        length = T.iscalar()
        length0 = T.iscalar()
        t_t = T.fscalar()
        t_t0 = T.fscalar()

        Wyy0 = np.random.uniform(
            -0.02, 0.02,
            (self.num_labels + 1, self.num_labels + 1)).astype('float32')
        Wyy = theano.shared(Wyy0)

        l_in_word = lasagne.layers.InputLayer((None, None))
        l_mask_word = lasagne.layers.InputLayer(shape=(None, None))

        if params.emb == 1:
            l_emb_word = lasagne.layers.EmbeddingLayer(
                l_in_word,
                input_size=We_initial.shape[0],
                output_size=embsize,
                W=We)
        else:
            l_emb_word = lasagne_embedding_layer_2(l_in_word, embsize, We)

        l_lstm_wordf = lasagne.layers.LSTMLayer(l_emb_word,
                                                512,
                                                mask_input=l_mask_word)
        l_lstm_wordb = lasagne.layers.LSTMLayer(l_emb_word,
                                                512,
                                                mask_input=l_mask_word,
                                                backwards=True)

        concat = lasagne.layers.concat([l_lstm_wordf, l_lstm_wordb], axis=2)

        l_reshape_concat = lasagne.layers.ReshapeLayer(concat, (-1, 2 * 512))

        l_local = lasagne.layers.DenseLayer(
            l_reshape_concat,
            num_units=self.num_labels,
            nonlinearity=lasagne.nonlinearities.linear)

        network_params = lasagne.layers.get_all_params(l_local, trainable=True)
        network_params.append(Wyy)

        print len(network_params)
        f = open(
            'ccctag_CRF_Bilstm_Viterbi_.Batchsize_10_dropout_0_LearningRate_0.01_0.0512_tagversoin_2.pickle',
            'r')
        data = pickle.load(f)
        f.close()

        for idx, p in enumerate(network_params):
            #print data[idx].shape
            p.set_value(data[idx])

        self.params = []
        self.hos = []
        self.Cos = []
        self.encoder_lstm_layers = []
        self.decoder_lstm_layers = []

        ei, di, dt = T.imatrices(3)  #place holders
        decoderInputs0, em, em1, dm, tf, di0 = T.fmatrices(6)

        #### the last one is for the stary symbole
        self.de_lookuptable = theano.shared(name="Decoder LookUpTable",
                                            value=init_xavier_uniform(
                                                self.num_labels + 1,
                                                self.de_hidden_size),
                                            borrow=True)

        self.linear = theano.shared(
            name="Linear",
            value=init_xavier_uniform(
                self.de_hidden_size + 2 * self.en_hidden_size,
                self.num_labels),
            borrow=True)
        self.linear_bias = theano.shared(
            name="Hidden to Bias",
            value=np.asarray(np.random.randn(self.num_labels, ) * 0.,
                             dtype=theano.config.floatX),
            borrow=True)
        #self.hidden_decode = theano.shared(name="Hidden to Decode", value= init_xavier_uniform(2*hidden, self.de_hidden_size), borrow = True)

        #self.hidden_bias = theano.shared(
        #        name="Hidden to Bias",
        #        value=np.asarray(np.random.randn(self.de_hidden_size, )*0., dtype=theano.config.floatX) ,
        #        borrow=True
        #        )

        input_var_shuffle = input_var.dimshuffle(1, 0)
        mask_var_shuffle = mask_var.dimshuffle(1, 0)
        target_var_in_shuffle = target_var_in.dimshuffle(1, 0)
        target_var_shuffle = target_var.dimshuffle(1, 0)

        self.params += [self.linear, self.linear_bias,
                        self.de_lookuptable]  #concatenate
        state_below = We[input_var_shuffle.flatten()].reshape(
            (input_var_shuffle.shape[0], input_var_shuffle.shape[1], embsize))
        enclstm_f = LSTM(embsize, self.en_hidden_size)
        enclstm_b = LSTM(embsize, self.en_hidden_size, True)
        self.encoder_lstm_layers.append(enclstm_f)  #append
        self.encoder_lstm_layers.append(enclstm_b)  #append
        self.params += enclstm_f.params + enclstm_b.params  #concatenate

        hs_f, Cs_f = enclstm_f.forward(state_below, mask_var_shuffle)
        hs_b, Cs_b = enclstm_b.forward(state_below, mask_var_shuffle)

        hs = T.concatenate([hs_f, hs_b], axis=2)
        Cs = T.concatenate([Cs_f, Cs_b], axis=2)

        hs0 = T.concatenate([hs_f[-1], hs_b[0]], axis=1)
        Cs0 = T.concatenate([Cs_f[-1], Cs_b[0]], axis=1)
        #self.hos += T.tanh(tensor.dot(hs0, self.hidden_decode) + self.hidden_bias),
        #self.Cos += T.tanh(tensor.dot(Cs0, self.hidden_decode) + self.hidden_bias),
        self.hos += T.alloc(np.asarray(0., dtype=theano.config.floatX),
                            input_var_shuffle.shape[1], self.de_hidden_size),
        self.Cos += T.alloc(np.asarray(0., dtype=theano.config.floatX),
                            input_var_shuffle.shape[1], self.de_hidden_size),

        Encoder = hs

        ei, di, dt = T.imatrices(3)  #place holders
        em, dm, tf, di0 = T.fmatrices(4)
        self.encoder_function = theano.function(inputs=[ei, em],
                                                outputs=Encoder,
                                                givens={
                                                    input_var: ei,
                                                    mask_var: em
                                                })

        state_below = self.de_lookuptable[
            target_var_in_shuffle.flatten()].reshape(
                (target_var_in_shuffle.shape[0],
                 target_var_in_shuffle.shape[1], self.de_hidden_size))
        for i in range(self.lstm_layers_num):
            declstm = LSTM(self.de_hidden_size, self.de_hidden_size)
            self.decoder_lstm_layers += declstm,  #append
            self.params += declstm.params  #concatenate
            ho, Co = self.hos[i], self.Cos[i]
            state_below, Cs = declstm.forward(state_below, mask_var_shuffle,
                                              ho, Co)

        decoder_lstm_outputs = T.concatenate([Encoder, state_below], axis=2)

        linear_outputs = T.dot(decoder_lstm_outputs,
                               self.linear) + self.linear_bias[None, None, :]
        softmax_outputs, updates = theano.scan(
            fn=lambda x: T.nnet.softmax(x),
            sequences=[linear_outputs],
        )

        def _NLL(pred, y, m):
            return -m * T.log(pred[T.arange(input_var.shape[0]), y])

        def _step2(ctx_, state_, hs_, Cs_):

            #print ctx_.shape, state_.shape, hs_.shape, Cs_.shape

            hs, Cs = [], []
            token_idxs = T.cast(state_.argmax(axis=-1), "int32")
            msk_ = T.fill((T.zeros_like(token_idxs, dtype="float32")), 1)
            msk_ = msk_.dimshuffle('x', 0)
            state_below0 = self.de_lookuptable[token_idxs].reshape(
                (1, ctx_.shape[0], self.de_hidden_size))
            for i, lstm in enumerate(self.decoder_lstm_layers):
                h, C = lstm.forward(state_below0, msk_, hs_[i],
                                    Cs_[i])  #mind msk
                hs += h[-1],
                Cs += C[-1],
                state_below0 = h

            hs, Cs = T.as_tensor_variable(hs), T.as_tensor_variable(Cs)
            state_below0 = state_below0.reshape(
                (ctx_.shape[0], self.de_hidden_size))
            state_below0 = T.concatenate([ctx_, state_below0], axis=1)

            newpred = T.dot(state_below0,
                            self.linear) + self.linear_bias[None, :]
            state_below = T.nnet.softmax(newpred)

            extra_p = T.zeros_like(hs[:, :, 0])
            state_below = T.concatenate([state_below, extra_p.T], axis=1)

            return state_below, hs, Cs

        ctx_0, state_0 = T.fmatrices(2)
        hs_0 = T.ftensor3()
        Cs_0 = T.ftensor3()
        state_below_tmp, hs_tmp, Cs_tmp = _step2(ctx_0, state_0, hs_0, Cs_0)
        self.f_next = theano.function([ctx_0, state_0, hs_0, Cs_0],
                                      [state_below_tmp, hs_tmp, Cs_tmp],
                                      name='f_next')

        hs0, Cs0 = T.as_tensor_variable(
            self.hos, name="hs0"), T.as_tensor_variable(self.Cos, name="Cs0")
        train_outputs, _ = theano.scan(fn=_step2,
                                       sequences=[Encoder],
                                       outputs_info=[decoderInputs0, hs0, Cs0],
                                       n_steps=input_var_shuffle.shape[0])

        predy = train_outputs[0].dimshuffle(1, 0, 2)
        predy = predy[:, :, :-1] * mask_var[:, :, None]
        predy0 = predy.reshape((-1, self.num_labels))

        def inner_function(targets_one_step, mask_one_step, prev_label,
                           tg_energy):
            """
                        :param targets_one_step: [batch_size, t]
                        :param prev_label: [batch_size, t]
                        :param tg_energy: [batch_size]
                        :return:
                        """
            new_ta_energy = T.dot(prev_label, Wyy[:-1, :-1])
            new_ta_energy_t = tg_energy + T.sum(
                new_ta_energy * targets_one_step, axis=1)
            tg_energy_t = T.switch(mask_one_step, new_ta_energy_t, tg_energy)

            return [targets_one_step, tg_energy_t]

        local_energy = lasagne.layers.get_output(l_local, {
            l_in_word: input_var,
            l_mask_word: mask_var
        })
        local_energy = local_energy.reshape((-1, length, self.num_labels))
        local_energy = local_energy * mask_var[:, :, None]

        #####################
        # for the end symbole of a sequence
        ####################

        end_term = Wyy[:-1, -1]
        local_energy = local_energy + end_term.dimshuffle(
            'x', 'x', 0) * mask_var1[:, :, None]

        #predy0 = lasagne.layers.get_output(l_local_a, {l_in_word_a:input_var, l_mask_word_a:mask_var})

        predy_in = T.argmax(predy0, axis=1)
        A = T.extra_ops.to_one_hot(predy_in, self.num_labels)
        A = A.reshape((-1, length, self.num_labels))

        #predy = predy0.reshape((-1, length, 25))
        #predy = predy*mask_var[:,:,None]

        targets_shuffled = predy.dimshuffle(1, 0, 2)
        target_time0 = targets_shuffled[0]

        masks_shuffled = mask_var.dimshuffle(1, 0)

        initial_energy0 = T.dot(target_time0, Wyy[-1, :-1])

        initials = [target_time0, initial_energy0]
        [_, target_energies], _ = theano.scan(
            fn=inner_function,
            outputs_info=initials,
            sequences=[targets_shuffled[1:], masks_shuffled[1:]])
        cost11 = target_energies[-1] + T.sum(
            T.sum(local_energy * predy, axis=2) * mask_var, axis=1)

        # compute the ground-truth energy

        targets_shuffled0 = A.dimshuffle(1, 0, 2)
        target_time00 = targets_shuffled0[0]

        initial_energy00 = T.dot(target_time00, Wyy[-1, :-1])

        initials0 = [target_time00, initial_energy00]
        [_, target_energies0], _ = theano.scan(
            fn=inner_function,
            outputs_info=initials0,
            sequences=[targets_shuffled0[1:], masks_shuffled[1:]])
        cost110 = target_energies0[-1] + T.sum(
            T.sum(local_energy * A, axis=2) * mask_var, axis=1)

        #predy_f =  predy.reshape((-1, 25))
        y_f = target_var.flatten()

        if (params.annealing == 0):
            lamb = params.L3
        elif (params.annealing == 1):
            lamb = params.L3 * (1 - 0.01 * t_t)

        if (params.regutype == 0):
            ce_hinge = lasagne.objectives.categorical_crossentropy(
                predy0 + eps, y_f)
            ce_hinge = ce_hinge.reshape((-1, length))
            ce_hinge = T.sum(ce_hinge * mask_var, axis=1)
            cost = T.mean(-cost11) + lamb * T.mean(ce_hinge)
        else:

            entropy_term = -T.sum(predy0 * T.log(predy0 + eps), axis=1)
            entropy_term = entropy_term.reshape((-1, length))
            entropy_term = T.sum(entropy_term * mask_var, axis=1)
            cost = T.mean(-cost11) - lamb * T.mean(entropy_term)
        """
		f = open('F0_simple.pickle')
                PARA = pickle.load(f)
                f.close()
                l2_term = sum(lasagne.regularization.l2(x-PARA[index]) for index, x in enumerate(a_params))


                cost = T.mean(-cost11) + params.L2*l2_term
		"""

        ##from adam import adam
        ##updates_a = adam(cost, self.params, params.eta)

        #updates_a = lasagne.updates.sgd(cost, self.params, params.eta)
        #updates_a = lasagne.updates.apply_momentum(updates_a, self.params, momentum=0.9)

        from momentum import momentum
        updates_a = momentum(cost, self.params, params.eta, momentum=0.9)

        if (params.regutype == 0):
            self.train_fn = theano.function(
                inputs=[ei, dt, em, em1, length0, t_t0, di0],
                outputs=[cost, ce_hinge],
                updates=updates_a,
                on_unused_input='ignore',
                givens={
                    input_var: ei,
                    target_var: dt,
                    mask_var: em,
                    mask_var1: em1,
                    length: length0,
                    t_t: t_t0,
                    decoderInputs0: di0
                })
            #self.train_fn = theano.function([input_var, target_var, mask_var, mask_var1, length, t_t], [cost, ce_hinge], updates = updates_a, on_unused_input='ignore')
        else:

            self.train_fn = theano.function(
                inputs=[ei, dt, em, em1, length0, t_t0, di0],
                outputs=[cost, entropy_term],
                updates=updates_a,
                on_unused_input='ignore',
                givens={
                    input_var: ei,
                    target_var: dt,
                    mask_var: em,
                    mask_var1: em1,
                    length: length0,
                    t_t: t_t0,
                    decoderInputs0: di0
                })
            #self.train_fn = theano.function([input_var, target_var, mask_var, mask_var1, length, t_t], [cost, entropy_term], updates = updates_a, on_unused_input='ignore')

        prediction = T.argmax(predy, axis=2)
        corr = T.eq(prediction, target_var)
        corr_train = (corr * mask_var).sum(dtype=theano.config.floatX)
        num_tokens = mask_var.sum(dtype=theano.config.floatX)

        self.eval_fn = theano.function(
            inputs=[ei, dt, em, em1, length0, di0],
            outputs=[cost11, cost110, corr_train, num_tokens, prediction],
            on_unused_input='ignore',
            givens={
                input_var: ei,
                target_var: dt,
                mask_var: em,
                mask_var1: em1,
                length: length0,
                decoderInputs0: di0
            })
Пример #21
0
    def __init__(self,Ne,Ni,n_inp,W_inp=None,W_inner=None):
        '''class SNNgroup's self Parameters:
            self.A:       update matrix
            self.S:       neuron state varaibles
            self.W_inner: inner-connect weights in the group
            self.W_inp:   input weights
            self.spikes:  the spikes matrix in the time t
            self.SpkC  :  spike containers
            input : '''
        self.number = Ne+Ni
        self.Ne = Ne
        self.Ni = Ni
        self.mV=self.ms=1e-3    # units
        dt=1*self.ms     # timestep
        self.dt = dt
        taum=20*self.ms    # membrane time constant
        taue=5*self.ms
        taui=10*self.ms
        #self.Vt=-1*self.mV      # threshold = -50+49
        self.Vt = 15*self.mV     #threshold = -55+70
        #self.Vr=-11*self.mV     # reset = -60+49
        self.Vr = 0*self.mV      # reset = -70+70
        self.Vi = -10*self.mV    # VI = -80+70
        self.dApre = .0001
        #self.dApre = .95 #changed into .95
        self.dApost = -self.dApre*1.05
        self.tauP = 20*self.ms
        #self.input = input
        self.n_inp = n_inp
        self.weight = .001
        self.weightIn = 1.
        self.wmax = 200*self.weight
        zero = np.array([0]).astype(theano.config.floatX)
        self.zero = theano.shared(zero,name='zero',borrow=True)
        """
        Equations
        ---------
        eqs='''
        dv/dt = (ge*70mV-gi*10-(v+70*mV))/(20*ms) : volt
        dge/dt = -ge/(5*self.ms) : volt
        dgi/dt = -gi/(10*self.ms) : volt
        '''
        """
        # Update matrix
        A = np.array([[np.exp(-dt/taum),0,0],
                      [taue/(taum-taue)*(np.exp(-dt/taum)-np.exp(-dt/taue)),np.exp(-dt/taue),0],
                      [-taui/(taum-taui)*(np.exp(-dt/taum)-np.exp(-dt/taui)),0,np.exp(-dt/taui)]
                      ],dtype=theano.config.floatX).T
        A = theano.shared(value=A,name='A',borrow=True)
        self.A = A
        # State varible : [v;ge;gi] (size=3*self.number)
        S = np.ones((1,self.number),dtype=theano.config.floatX)*self.Vr
        S = np.vstack((S,np.zeros((2,self.number),dtype=theano.config.floatX)))
        self.S_init = S
        S = theano.shared(value=S,name='S',borrow=True)
        self.S = S
        if W_inner == None:
        # weights of inner connections (size= self.number*self.number)
            self.W_inner_ini = np.ones((self.number,self.number),dtype=theano.config.floatX)*self.weight
            #self.W_inner_ini[Ne:,:] = self.weightIn
            self.W_inner_ini[Ne:,:] = self.weight
            wtmp = np.eye(self.number)
            ind = wtmp.nonzero()
            self.W_inner_ini[ind]=0
            W_inner = theano.shared(value=self.W_inner_ini,name='W_inner',borrow=True)
            self.W_inner = W_inner
        else:
            self.W_inner = theano.shared(W_inner,name='W_inner',borrow=True)
        # weights of input connections (size=n_inp*self.number
        rng = np.random.RandomState(1234)
        if W_inp ==None:
            #W_inp = np.ones((self.n_inp,self.number)).astype(theano.config.floatX) #needs specification later
            #W_inp = np.random.rand(self.n_inp,self.number).astype(theano.config.floatX)*.00001*self.ms #needs specification later
            self.W_inp_ini = np.ones((self.n_inp,self.number)).astype(theano.config.floatX)*self.weight
            self.W_inp_ini[:,self.Ne:] = self.weightIn
            W_inp = theano.shared(self.W_inp_ini,name='W_inp',borrow=True)
            self.W_inp = W_inp
        else:
            self.W_inp = theano.shared(W_inp,name='W_inp',borrow=True)
        # Spike Container
        #spkC = theano.shared(value=np.empty((1,self.number)).astype(theano.config.floatX),name='spkC',borrow=True)
        spkC = np.empty((1,self.number)).astype(theano.config.floatX)
        self.spkC = spkC
        #spikes=np.empty((self.number,1),dtype=theano.config.floatX)
        #self.spikes = theano.shared(value=spikes,name='spikes',borrow=True)
        # not sure the dtype of sp_history
        self.sp_history = np.array([])
        #output = np.empty(self.number,dtype=theano.config.floatX)
        #self.output = theano.shared(value=output,name='output',borrow=True)
        self.V_record = np.empty((1,self.number))
        self.ge_record = np.empty((1,self.number))
        self.gi_record = np.empty((1,self.number))
        #================================================
        # Process Function Initial
        # input:: 0-1 vector

        '''Update Schedule:
        1.Update state variables of SNNgroup: dot(A,S)
        1.Update state variables of Synapses: dot(exp(-dt/tau),Ssynapse), including W_inp and W_inner
        2.Call thresholding function: S[0,:]>Vt
        3.Push spikes into SpikeContainer
        4.Propagate spikes via Connection(possibly with delays)
        5.Update state variables of Synapses (STDP)
        6.Call reset function on neurons which has spiked'''
        Ne = self.Ne
        Ni = self.Ni
        m = T.fmatrix(name='m')
        #self.Vt = T.as_tensor_variable(self.Vt,'Vt')

        # "Update state function:: stat()"
        # return np array
        # shape(stat()) = shape(self.S)
        S_update = T.dot(self.A,self.S)
        self.stat = theano.function(
            inputs = [],
            outputs = [],
            updates = {self.S : S_update})
        #============================================================
        # Update state of Synapses
        # Update matrix of Synapse
        A_STDP = np.array([[np.exp(-self.dt/self.tauP),0],[0,np.exp(-self.dt/self.tauP)]],dtype=theano.config.floatX)
        # Spre_inner :: pre  synapse of inner connections
        # Spost_inner:: post synapse of inner connections
        # Spre_inp   :: pre  synapse of input conenctions
        # Spost_inp  :: post synapse of input connections
        self.Spre_inner_ini = np.zeros((self.number,self.number),dtype=theano.config.floatX)
        Spre_inner = theano.shared(self.Spre_inner_ini,name='Spre_inner',borrow=True)
        self.Spre_inner = Spre_inner
        self.Spost_inner_ini = np.zeros((self.number,self.number),dtype=theano.config.floatX)
        Spost_inner = theano.shared(value=self.Spost_inner_ini,name='Spost_inner',borrow=True)
        self.Spost_inner = Spost_inner
        self.Spre_inp_ini = np.zeros((self.n_inp,self.number)).astype(theano.config.floatX) #needs specification later
        Spre_inp = theano.shared(value=self.Spre_inp_ini,name='Spre_inp',borrow=True)
        self.Spre_inp = Spre_inp
        self.Spost_inp_ini = np.zeros((self.n_inp,self.number)).astype(theano.config.floatX) #needs specification later
        Spost_inp = theano.shared(value=self.Spost_inp_ini,name='Spost_inp',borrow=True)
        self.Spost_inp = Spost_inp
        U = T.fscalar('U')
        UM = T.fmatrix('UM')
        #UpreV = theano.shared(A_STDP[0,0],name='UpreV',borrow=True) # Wpre = UpreV*Wpre
        #UpostV = theano.shared(A_STDP[1,1],name='UpostV',borrow=True)
        self.tmp = np.array(np.exp(-self.dt/self.tauP).astype(theano.config.floatX))
        self.SynFresh = theano.shared(self.tmp,name='SynFresh',borrow=True)
        self.UpdateSpre_inner = theano.function(inputs=[],outputs=None,updates={self.Spre_inner:T.dot(self.SynFresh,self.Spre_inner)},allow_input_downcast=True)
        self.UpdateSpost_inner = theano.function(inputs=[],outputs=None,updates={self.Spost_inner:T.dot(self.SynFresh,self.Spost_inner)},allow_input_downcast=True)
        self.UpdateSpre_inp = theano.function(inputs=[],outputs=None,updates={self.Spre_inp:T.dot(self.SynFresh,self.Spre_inp)},allow_input_downcast=True)
        self.UpdateSpost_inp = theano.function(inputs=[],outputs=None,updates={self.Spost_inp:T.dot(self.SynFresh,self.Spost_inp)},allow_input_downcast=True)
        #------------------------------------------

        #tmp = math.exp(-self.dt/self.tauP)
        #tmp = T.as_tensor(0.95122945)
        #================================================================
        #------------------------------------------
        # "thresholding function:: spike_fun()"
        # type return :: np.ndarray list
        # shape return:: shape(spike_fun()) = (self.number,)
        self.spike_fun = theano.function(
            inputs = [U], #[self.S]
            outputs = (T.gt(self.S[0,:],U))) #type outputs: np.ndarray,shape::(nL,)
            #'outputs = (self.S[0,:]>Vt).astype(theano.config.floatX)), #type outputs: list'
            #'updates={self.spikes:(self.S[0,:]>Vt).astype(theano.config.floatX)}'

        #------------------------------------
        #------------------------------------
        #=================================================================
        # "Push spike into Container function:: spCfun(vector)"
        # type vector :: np.array([],dtype=theano.config.floatX)!!!
        # type return :: np array
        # shape return:: shape(spCfun()) = ( shape(self.spkC)[0]+1 , shape(self.spkC)[1] )
            #updates={self.spkC:T.stack(self.spkC,sp)})
        '''spike_prop = theano.function( #wrong
            inputs = [],
            outputs =[],
            updates = {self.S:np.dot(self.W_inner,self.spikes)+self.S})#wrong'''
        #-------------------------------
        #--------------------------------
        #====================================================================
        # Propagate spikes
          # inner connection:
          # S_inner = f(inputs, outputs, updates)
          #   Param:: inputs: spike 0-1 vector
          #   Param:: inputs: spike is from function-> spike_fun
          #   S_inner(spk)::-> for i in spk[0:Ne].nonzero()[0]:
          #                        S[1,:] = Winner[i,:]+S[1,:]  (excitatory conenction)
          #                    for j in spk[Ne,:].nonzero()[0]:
          #                        S[2,:] = Winner[j,:]+S[2,:]  (inhibitory connection)
        vinner = T.fvector(name='vinner') # vinner = spk :: np.array((1,self.number)
        def add_f1(i,p,q):
            np = T.inc_subtensor(p[1,:],q[i,:]) #ge
            return {p:np}
        def add_f2(i,p,q):
            np = T.inc_subtensor(p[2,:],q[i,:]) #gi
            return {p:np}
        #deltaWinner1,updates1 = theano.scan(fn=lambda i: self.W_inner[i,:]*i+self.S[1,:], sequences=vinner[0:Ne])
        deltaWinner1,updates1 = theano.scan(fn=add_f1, sequences=vinner[0:Ne].nonzero()[0],non_sequences=[self.S,self.W_inner])
        #deltaWinner2,updates2 = theano.scan(fn=lambda i: self.W_inner[i,:]*i+self.S[2,:], sequences=vinner[Ne:])
        deltaWinner2,updates2 = theano.scan(fn=add_f2, sequences=vinner[Ne:].nonzero()[0]+self.Ne,non_sequences=[self.S,self.W_inner])
        # S = S+W
        self.S_inner1 = theano.function(inputs=[vinner],outputs=None,updates=updates1,allow_input_downcast=True)
        self.S_inner2 = theano.function(inputs=[vinner],outputs=None,updates=updates2,allow_input_downcast=True)
        #------------------------------------------
        #------------------------------------------
         # outter connection (input spikes):
         # type input: index list
        voutter = T.fvector(name='voutter')
        #deltaWoutter = theano.scan(fn=lambda j: self.W_inp[j,:]+self.S[1,:],sequences=voutter)
        deltaWoutter,updatesout1 = theano.scan(fn=add_f1,sequences=voutter.nonzero()[0],non_sequences=[self.S,self.W_inp])
        self.S_inp = theano.function(inputs=[voutter],outputs=None,updates=updatesout1,allow_input_downcast=True)
        #------------------------------------
        #-------------------------------------
        #=====================================================================

        # Update Synapses (STDP | STDC)

        # Pre::  Apre += self.dApre, w+=Apost
        # Post:: Apost+=self.dApost, w+=Apre
        #
        # USpreInner :: Perform Pre function No.1 in inner connections
        # UWInner    :: Perform Pre function No.2 in inner connections
        # UpreInner  :: Function
        def add_synap_pre(i,p,po,s,q):
            # i :: sequence
            # p :: pre | post
            # s :: dApre | dApost
            # q :: W
            index = T.nonzero(q[i,:self.Ne])
            np = T.inc_subtensor(p[i,index],s)
##            tmp = p[i,:]
##            tmp=T.inc_subtensor(tmp[index],s)
##            np=T.set_subtensor(p[i,:],tmp)
            #np = T.inc_subtensor(p[i,:],s)
            nw = T.inc_subtensor(q[i,:],po[i,:])
            nw=T.clip(nw,0,self.wmax)
            return {p:np,q:nw}

        def add_synap_pre_inp(i,p,po,s,q):
            # i :: sequence
            # p :: pre | post
            # s :: dApre | dApost
            # q :: W
            index = T.nonzero(q[i,:self.Ne])
            np = T.inc_subtensor(p[i,index],s)
##            tmp = p[i,:]
##            tmp=T.inc_subtensor(tmp[index],s)
##            np=T.set_subtensor(p[i,:],tmp)
            #np = T.inc_subtensor(p[i,:],s)
            nw = T.inc_subtensor(q[i,:],po[i,:])
            nw=T.clip(nw,0,self.wmax)
            return {p:np,q:nw}

        def add_synap_post(i,po,p,s,q):
            # i:: sequence
            # po:: post
            # p:: pre
            # s:: dA
            # q:: W
            index = T.nonzero(q[:self.Ne,i])
            npo = T.inc_subtensor(po[index,i],s)
            nw = T.inc_subtensor(q[:,i],p[:,i])
            nw = T.clip(nw,0,self.wmax)
            return {po:npo,q:nw}

        def add_synap_post_inp(i,po,p,s,q):
            # i:: sequence
            # po:: post
            # p:: pre
            # s:: dA
            # q:: W
            index = T.nonzero(q[:self.Ne,i])
            npo = T.inc_subtensor(po[index,i],s)
            nw = T.inc_subtensor(q[:,i],p[:,i])
            nw = T.clip(nw,0,self.wmax)
            return {po:npo,q:nw}

        add_dA = T.fscalar('add_dA')
        add_p,add_po,add_q = T.fmatrices('add_p','add_po','add_q')
        #-------------------------------------------------------------------------
        #USinner,updatesUinner = theano.scan(fn=add_synap_pre,sequences=vinner,non_sequences=[self.Spre_inner,self.Spost_inp,self.dApre,self.W_inner])
        'USinner,updatesUinner = theano.scan(fn=add_synap_pre,sequences=vinner.nonzero()[0],non_sequences=[add_p,add_po,add_dA,add_q])'
        #USinner1,updatesUinner1 = theano.scan(fn=add_synap_pre,sequences=vinner,non_sequences=[self.Spost_inner,self.Spre_inner,self.dApost,self.W_inner])
        #-------------------------------------------------------------------------
        #UpostInner = theano.function(inputs[vinner],updates={self.Spost_inner:USpostInner})
        #UpostInp = theano.function(inputs=[vinner],updates={self.W_inner:UWInnerpost})
        'USinner_f = theano.function(inputs=[vinner,add_p,add_po,add_dA,add_q],outputs=None,updates=updatesUinner)'
        #USinner_step2 = theano.function(inputs=[vinner,add_p,add_po,add_dA,add_q],outputs=None,updates=updatesUinner)
        USinner_inner_pre,updatesUinner_inner_pre = theano.scan(fn=add_synap_pre,sequences=vinner[:self.Ne].nonzero()[0],non_sequences=[self.Spre_inner,self.Spost_inner,add_dA,self.W_inner])
        self.USinner_f_inner_pre = theano.function(inputs=[vinner,add_dA],outputs=None,updates=updatesUinner_inner_pre,allow_input_downcast=True)

        USinner_innerpost,updatesUinner_inner_post = theano.scan(fn=add_synap_post,sequences=vinner[:self.Ne].nonzero()[0],non_sequences=[self.Spost_inner,self.Spre_inner,add_dA,self.W_inner])
        self.USinner_f_inner_post = theano.function(inputs=[vinner,add_dA],outputs=None,updates=updatesUinner_inner_post,allow_input_downcast=True)

        USinner_inp_pre,updatesUSinner_inp_pre =theano.scan(fn=add_synap_pre_inp,sequences=vinner.nonzero()[0],non_sequences=[self.Spre_inp,self.Spost_inp,add_dA,self.W_inp])
        self.USinner_f_inp_pre = theano.function(inputs=[vinner,add_dA],outputs=None,updates=updatesUSinner_inp_pre,allow_input_downcast=True)

        USinner_inp_post,updatesUSinner_inp_post =theano.scan(fn=add_synap_post_inp,sequences=vinner[:self.Ne].nonzero()[0],non_sequences=[self.Spost_inp,self.Spre_inp,add_dA,self.W_inp])
        self.USinner_f_inp_post = theano.function(inputs=[vinner,add_dA],outputs=None,updates=updatesUSinner_inp_post,allow_input_downcast=True)
        # Call reset function
        def reset_v(index,vr):
            nv = T.set_subtensor(self.S[0,index],vr)
            return{self.S:nv}
        resetV,resetV_update = theano.scan(fn=reset_v,sequences=vinner.nonzero()[0],non_sequences=[U])
        self.resetV_f = theano.function(inputs=[vinner,U],outputs=None,updates=resetV_update,allow_input_downcast=True)

        setvalue = T.fscalar('setvalue')
        iv = T.ivector('iv')
        def reset_state(i,value,state):
            nstate = T.set_subtensor(state[i,:],value)
            return {state:nstate}
        reset_S_state,Upreset_S_state = theano.scan(fn=reset_state,sequences=iv,non_sequences=[setvalue,self.S])
        self.reset_S_fn = theano.function(inputs=[iv,setvalue],outputs=None,updates=Upreset_S_state)
    def __init__(self, We, char_embedd_table_initial, params):

        lstm_layers_num = 1
        emb_size = We.shape[1]
        self.eta = params.eta
        self.num_labels = params.num_labels
        self.en_hidden_size = params.en_hidden_size
        self.de_hidden_size = params.de_hidden_size

        self.lstm_layers_num = params.lstm_layers_num
        self._train = None
        self._utter = None
        self.params = []
        self.encoder_lstm_layers = []
        self.decoder_lstm_layers = []
        self.hos = []
        self.Cos = []

        char_embedd_dim = params.char_embedd_dim
        char_dic_size = len(params.char_dic)
        char_embedd_table = theano.shared(char_embedd_table_initial)

        encoderInputs = tensor.imatrix()
        decoderInputs, decoderTarget = tensor.imatrices(2)
        encoderMask, TF, decoderMask, decoderInputs0 = tensor.fmatrices(4)

        char_input_var = tensor.itensor3(name='char-inputs')
        ci = tensor.itensor3()

        use_dropout = tensor.fscalar()
        use_dropout0 = tensor.fscalar()

        self.lookuptable = theano.shared(We)

        #### the last one is for the stary symbole
        self.de_lookuptable = theano.shared(name="Decoder LookUpTable",
                                            value=init_xavier_uniform(
                                                self.num_labels + 1,
                                                self.de_hidden_size),
                                            borrow=True)

        self.linear = theano.shared(
            name="Linear",
            value=init_xavier_uniform(
                self.de_hidden_size + 2 * self.en_hidden_size,
                self.num_labels),
            borrow=True)
        self.linear_bias = theano.shared(
            name="Hidden to Bias",
            value=np.asarray(np.random.randn(self.num_labels, ) * 0.,
                             dtype=theano.config.floatX),
            borrow=True)

        #self.hidden_decode = theano.shared(name="Hidden to Decode", value= init_xavier_uniform(2*en_hidden_size, self.de_hidden_size), borrow = True)

        #self.hidden_bias = theano.shared(
        #        name="Hidden to Bias",
        #        value=np.asarray(np.random.randn(self.de_hidden_size, )*0., dtype=theano.config.floatX) ,
        #        borrow=True
        #        )

        #self.params += [self.linear, self.de_lookuptable, self.hidden_decode, self.hidden_bias]    #concatenate
        self.params += [
            self.lookuptable, self.linear, self.linear_bias,
            self.de_lookuptable
        ]  #the initial hidden state of decoder lstm is zeros
        #(max_sent_size, batch_size, hidden_size)
        state_below = self.lookuptable[encoderInputs.flatten()].reshape(
            (encoderInputs.shape[0], encoderInputs.shape[1], emb_size))

        layer_char_input = lasagne.layers.InputLayer(shape=(None, None,
                                                            Max_Char_Length),
                                                     input_var=char_input_var,
                                                     name='char-input')

        layer_char = lasagne.layers.reshape(layer_char_input, (-1, [2]))
        layer_char_embedding = lasagne.layers.EmbeddingLayer(
            layer_char,
            input_size=char_dic_size,
            output_size=char_embedd_dim,
            W=char_embedd_table,
            name='char_embedding')

        layer_char = lasagne.layers.DimshuffleLayer(layer_char_embedding,
                                                    pattern=(0, 2, 1))

        # first get some necessary dimensions or parameters
        conv_window = 3
        num_filters = params.num_filters

        # construct convolution layer
        cnn_layer = lasagne.layers.Conv1DLayer(
            layer_char,
            num_filters=num_filters,
            filter_size=conv_window,
            pad='full',
            nonlinearity=lasagne.nonlinearities.tanh,
            name='cnn')
        # infer the pool size for pooling (pool size should go through all time step of cnn)
        _, _, pool_size = cnn_layer.output_shape

        # construct max pool layer
        pool_layer = lasagne.layers.MaxPool1DLayer(cnn_layer,
                                                   pool_size=pool_size)
        # reshape the layer to match lstm incoming layer [batch * sent_length, num_filters, 1] --> [batch, sent_length, num_filters]
        output_cnn_layer = lasagne.layers.reshape(
            pool_layer, (-1, encoderInputs.shape[0], [1]))

        char_params = lasagne.layers.get_all_params(output_cnn_layer,
                                                    trainable=True)
        self.params += char_params

        char_state_below = lasagne.layers.get_output(output_cnn_layer)

        char_state_below = dropout_layer(char_state_below, use_dropout, trng)

        char_state_shuff = char_state_below.dimshuffle(1, 0, 2)
        state_below = tensor.concatenate([state_below, char_state_shuff],
                                         axis=2)
        state_below = dropout_layer(state_below, use_dropout, trng)

        for _ in range(self.lstm_layers_num):

            enclstm_f = LSTM(emb_size + num_filters, self.en_hidden_size)
            enclstm_b = LSTM(emb_size + num_filters, self.en_hidden_size, True)
            self.encoder_lstm_layers.append(enclstm_f)  #append
            self.encoder_lstm_layers.append(enclstm_b)  #append
            self.params += enclstm_f.params + enclstm_b.params  #concatenate

            hs_f, Cs_f = enclstm_f.forward(state_below, encoderMask)
            hs_b, Cs_b = enclstm_b.forward(state_below, encoderMask)

            hs = tensor.concatenate([hs_f, hs_b], axis=2)
            Cs = tensor.concatenate([Cs_f, Cs_b], axis=2)
            hs0 = tensor.concatenate([hs_f[-1], hs_b[0]], axis=1)
            Cs0 = tensor.concatenate([Cs_f[-1], Cs_b[0]], axis=1)
            #self.hos += tensor.tanh(tensor.dot(hs0, self.hidden_decode) + self.hidden_bias),
            #self.Cos += tensor.tanh(tensor.dot(Cs0, self.hidden_decode) + self.hidden_bias),
            self.hos += tensor.alloc(
                np.asarray(0., dtype=theano.config.floatX),
                encoderInputs.shape[1], self.de_hidden_size),
            self.Cos += tensor.alloc(
                np.asarray(0., dtype=theano.config.floatX),
                encoderInputs.shape[1], self.de_hidden_size),
            state_below = hs

        Encoder = state_below

        state_below = self.de_lookuptable[decoderInputs.flatten()].reshape(
            (decoderInputs.shape[0], decoderInputs.shape[1],
             self.de_hidden_size))
        for i in range(self.lstm_layers_num):
            declstm = LSTM(self.de_hidden_size, self.de_hidden_size)
            self.decoder_lstm_layers += declstm,  #append
            self.params += declstm.params  #concatenate
            ho, Co = self.hos[i], self.Cos[i]
            state_below, Cs = declstm.forward(state_below, decoderMask, ho, Co)

        ##### Here we include the representation from the decoder
        decoder_lstm_outputs = tensor.concatenate([state_below, Encoder],
                                                  axis=2)

        ei, di, dt = tensor.imatrices(3)  #place holders
        em, dm, tf, di0 = tensor.fmatrices(4)
        #####################################################
        #####################################################
        linear_outputs = tensor.dot(decoder_lstm_outputs,
                                    self.linear) + self.linear_bias[None,
                                                                    None, :]
        softmax_outputs, _ = theano.scan(
            fn=lambda x: tensor.nnet.softmax(x),
            sequences=[linear_outputs],
        )

        def _NLL(pred, y, m):
            return -m * tensor.log(pred[tensor.arange(encoderInputs.shape[1]),
                                        y])

        costs, _ = theano.scan(
            fn=_NLL, sequences=[softmax_outputs, decoderTarget, decoderMask])
        loss = costs.sum() / decoderMask.sum() + params.L2 * sum(
            lasagne.regularization.l2(x) for x in self.params)

        #updates = lasagne.updates.adam(loss, self.params, self.eta)
        #updates = lasagne.updates.apply_momentum(updates, self.params, momentum=0.9)

        ###################################################
        #### using the ground truth when training
        ##################################################
        #self._train = theano.function(
        #	inputs=[ei, em, di, dm, dt],
        #	outputs=[loss, softmax_outputs],
        #	updates=updates,
        #	givens={encoderInputs:ei, encoderMask:em, decoderInputs:di, decoderMask:dm, decoderTarget:dt}
        #	)

        #########################################################################
        ### For schedule sampling
        #########################################################################

        ###### always use privous predict as next input
        def _step2(ctx_, state_, hs_, Cs_):

            hs, Cs = [], []
            token_idxs = tensor.cast(state_.argmax(axis=-1), "int32")
            msk_ = tensor.fill(
                (tensor.zeros_like(token_idxs, dtype="float32")), 1.)
            msk_ = msk_.dimshuffle('x', 0)
            state_below0 = self.de_lookuptable[token_idxs].reshape(
                (1, encoderInputs.shape[1], self.de_hidden_size))
            for i, lstm in enumerate(self.decoder_lstm_layers):
                h, C = lstm.forward(state_below0, msk_, hs_[i],
                                    Cs_[i])  #mind msk
                hs += h[-1],
                Cs += C[-1],
                state_below0 = h

            hs, Cs = tensor.as_tensor_variable(hs), tensor.as_tensor_variable(
                Cs)
            state_below0 = state_below0.reshape(
                (encoderInputs.shape[1], self.de_hidden_size))
            state_below0 = tensor.concatenate([ctx_, state_below0], axis=1)
            newpred = tensor.dot(state_below0,
                                 self.linear) + self.linear_bias[None, :]
            state_below = tensor.nnet.softmax(newpred)
            ##### the beging symbole probablity is 0
            extra_p = tensor.zeros_like(hs[:, :, 0])
            state_below = tensor.concatenate([state_below, extra_p.T], axis=1)

            return state_below, hs, Cs

        hs0, Cs0 = tensor.as_tensor_variable(
            self.hos, name="hs0"), tensor.as_tensor_variable(self.Cos,
                                                             name="Cs0")
        train_outputs, _ = theano.scan(fn=_step2,
                                       sequences=[Encoder],
                                       outputs_info=[decoderInputs0, hs0, Cs0],
                                       n_steps=encoderInputs.shape[0])

        train_predict = train_outputs[0]
        train_costs, _ = theano.scan(
            fn=_NLL, sequences=[train_predict, decoderTarget, decoderMask])

        train_loss = train_costs.sum() / decoderMask.sum() + params.L2 * sum(
            lasagne.regularization.l2(x) for x in self.params)

        #from adam import adam
        #train_updates = adam(train_loss, self.params, self.eta)
        #train_updates = lasagne.updates.apply_momentum(train_updates, self.params, momentum=0.9)
        #train_updates = lasagne.updates.sgd(train_loss, self.params, self.eta)
        #train_updates = lasagne.updates.apply_momentum(train_updates, self.params, momentum=0.9)
        from momentum import momentum
        train_updates = momentum(train_loss,
                                 self.params,
                                 params.eta,
                                 momentum=0.9)

        self._train2 = theano.function(
            inputs=[ei, ci, em, di0, dm, dt, use_dropout0],
            outputs=[train_loss, train_predict],
            updates=train_updates,
            givens={
                encoderInputs: ei,
                char_input_var: ci,
                encoderMask: em,
                decoderInputs0: di0,
                decoderMask: dm,
                decoderTarget: dt,
                use_dropout: use_dropout0
            }
            #givens={encoderInputs:ei, encoderMask:em, decoderInputs:di, decoderMask:dm, decoderTarget:dt, TF:tf}
        )

        listof_token_idx = train_predict.argmax(axis=-1)
        self._utter = theano.function(inputs=[ei, ci, em, di0, use_dropout0],
                                      outputs=listof_token_idx,
                                      givens={
                                          encoderInputs: ei,
                                          char_input_var: ci,
                                          encoderMask: em,
                                          decoderInputs0: di0,
                                          use_dropout: use_dropout0
                                      })
def UnitTest_OnestepAttend():
	N = 2 #number of sample
	D = 5 #dimension of input
	H = 4 #dimension of hidden
	T_new = 1 #length of per each sample
	context_dim = 3
	K = 5

	x = np.linspace(-0.4, 0.6, num=N*T_new*D, dtype = theano.config.floatX).reshape(T_new, N, D)
	h0= np.linspace(-0.4, 0.8, num=N*H, dtype = theano.config.floatX).reshape(N, H)
	Wx= np.linspace(-0.2, 0.9, num=4*D*H, dtype = theano.config.floatX).reshape(D, 4*H)
	Wh= np.linspace(-0.3,0.6, num =4*H*H, dtype = theano.config.floatX).reshape(H,4*H)
	b = np.linspace(0.0, 0.0, num = 4*H, dtype = theano.config.floatX)
	Wz= np.linspace(-0.3, 0.6, num=4*H*context_dim, dtype = theano.config.floatX).reshape(context_dim, 4*H)
	Hcontext = np.linspace(-0.2, 0.6, num=H*K, dtype = theano.config.floatX).reshape(H, K)
	Zcontext = np.linspace(-0.2, 0.5, num=context_dim*K, dtype= theano.config.floatX).reshape(context_dim, K)
	Va= np.linspace(0.1, 0.4, num=K, dtype = theano.config.floatX)
	Va_reshape = Va.reshape(K,1)

	image_feature_3D = np.linspace(-0.2, 0.5, num=10*N*context_dim, dtype = theano.config.floatX).reshape(N,10, context_dim)

	h0_theano = h0.reshape(1, N, H)
	# h0_symb   = theano.tensor.ftensor3("h_symb")
	# lstm_theano_layer.h_m1.set_value(h0_theano)

	c0_theano = np.zeros((1, N, H), dtype = theano.config.floatX)
	# c0_symb   = theano.tensor.ftensor3("c_symb")
	# lstm_theano_layer.c_m1.set_value(c0_theano)

	z0_theano = np.zeros((1, N, context_dim), dtype = theano.config.floatX)

	x_theano = x.reshape(T_new, N, D, 1)
	image_feature_input = image_feature_3D

	weight_y_in_value = np.zeros(( 10, context_dim) , dtype= theano.config.floatX)
	b_theano= b.reshape(1, 1, 4*H)
	pdb.set_trace()

	#symbolic variables
	initial_h0_layer_out = theano.tensor.tensor3(name = 'h0_initial', dtype = theano.config.floatX)
	initial_c0_layer_out = theano.tensor.tensor3(name = 'c0_initial', dtype = theano.config.floatX)
	initial_z0			 = T.tensor3(name= 'z0_initial', dtype = theano.config.floatX)
	weight_y_in = theano.tensor.fmatrix("weight_y")	
	input_data = theano.tensor.tensor3(name ='x', dtype=theano.config.floatX)
	image_feature_region = theano.tensor.tensor3(name = 'feature_region', dtype = theano.config.floatX)

	Wi_sym, Wf_sym, Wc_sym, Wo_sym, Ui_sym, Uf_sym, Uc_sym, Uo_sym, Zi_sym, Zf_sym, Zc_sym, Zo_sym = T.fmatrices(12)
	Zcontext_sym, Hcontext_sym = T.fmatrices(2)
	bi  = T.ftensor3("bi")
	bf  = T.ftensor3("bf")
	bc  = T.ftensor3("bc")
	bo  = T.ftensor3("bo")
	Va_sym = T.fcol("Va")


	out_sym = onestep_attend_tell(input_data, initial_h0_layer_out, initial_c0_layer_out, initial_z0, 
		Wi_sym, Wf_sym, Wc_sym, Wo_sym, Ui_sym, Uf_sym, Uc_sym, Uo_sym, Zi_sym, Zf_sym, Zc_sym, Zo_sym,
		Zcontext_sym, Hcontext_sym, Va_sym,
		bi, bf, bc, bo, image_feature_region, weight_y_in)

	onestep_func = theano.function([input_data, initial_h0_layer_out, initial_c0_layer_out, initial_z0, 
		Wi_sym, Wf_sym, Wc_sym, Wo_sym, Ui_sym, Uf_sym, Uc_sym, Uo_sym, Zi_sym, Zf_sym, Zc_sym, Zo_sym,
		Zcontext_sym, Hcontext_sym, Va_sym,
		bi, bf, bc, bo, image_feature_region, weight_y_in], out_sym)

	list_output = onestep_func(x, h0_theano, c0_theano, z0_theano,
		Wx[:, :H], Wx[:, H:2*H], Wx[:, 2*H:3*H], Wx[:, 3*H:],
		Wh[:, :H], Wh[:, H:2*H], Wh[:, 2*H:3*H], Wh[:, 3*H:],
		Wz[:, :H], Wz[:, H:2*H], Wz[:, 2*H:3*H], Wz[:, 3*H:],
		Zcontext,Hcontext,
		Va_reshape,
		b_theano[:,: , :H], b_theano[:, :, H:2*H], b_theano[:, :, 2*H:3*H], b_theano[:, :, 3*H:], 
		image_feature_input, weight_y_in_value)


	pdb.set_trace()

	print(list_output[0].shape)
	print(list_output[1].shape)
	print(list_output[2].shape)

	pdb.set_trace()
Пример #24
0
def find_all_step_visible_data(all_step_original_data_shared,
                               all_step_visible_data_shared,
                               sigmas_shared,
                               N,
                               steps,
                               output_dims,
                               n_epochs,
                               initial_lr,
                               final_lr,
                               lr_switch,
                               initial_momentum,
                               final_momentum,
                               momentum_switch,
                               penalty_lambda,
                               metric,
                               verbose=0):
    """Optimize cost wrt all_step_visible_data[t], simultaneously for all t"""

    # Optimization hyper-parameters
    initial_lr = np.array(initial_lr, dtype=floath)
    final_lr = np.array(final_lr, dtype=floath)
    initial_momentum = np.array(initial_momentum, dtype=floath)
    final_momentum = np.array(final_momentum, dtype=floath)

    lr = T.fscalar('lr')
    lr_shared = theano.shared(initial_lr)

    momentum = T.fscalar('momentum')
    momentum_shared = theano.shared(initial_momentum)

    # Penalty hyper-parameter
    penalty_lambda_var = T.fscalar('penalty_lambda')
    penalty_lambda_shared = theano.shared(
        np.array(penalty_lambda, dtype=floath))

    # Yv velocities
    all_step_visible_progress_shared = []
    zero_velocities = np.zeros((N, output_dims), dtype=floath)
    for t in range(steps):
        all_step_visible_progress_shared.append(
            theano.shared(np.array(zero_velocities)))

    # Cost
    all_step_original_data_vars = T.fmatrices(steps)
    all_step_visible_data_vars = T.fmatrices(steps)
    all_step_visible_progress_vars = T.fmatrices(steps)
    sigmas_vars = T.fvectors(steps)

    c_vars = []
    for t in range(steps):
        c_vars.append(
            cost_var(all_step_original_data_vars[t],
                     all_step_visible_data_vars[t], sigmas_vars[t], metric))

    cost = T.sum(c_vars) + penalty_lambda_var * movement_penalty(
        all_step_visible_data_vars, N)

    # Setting update for all_step_visible_data velocities
    grad_Y = T.grad(cost, all_step_visible_data_vars)

    givens = {
        lr: lr_shared,
        momentum: momentum_shared,
        penalty_lambda_var: penalty_lambda_shared
    }
    updates = []
    for t in range(steps):
        updates.append(
            (all_step_visible_progress_shared[t],
             momentum * all_step_visible_progress_vars[t] - lr * grad_Y[t]))

        givens[
            all_step_original_data_vars[t]] = all_step_original_data_shared[t]
        givens[all_step_visible_data_vars[t]] = all_step_visible_data_shared[t]
        givens[all_step_visible_progress_vars[
            t]] = all_step_visible_progress_shared[t]
        givens[sigmas_vars[t]] = sigmas_shared[t]

    update_Yvs = theano.function([], cost, givens=givens, updates=updates)

    # Setting update for all_step_visible_data positions
    updates = []
    givens = dict()
    for t in range(steps):
        updates.append(
            (all_step_visible_data_shared[t], all_step_visible_data_vars[t] +
             all_step_visible_progress_vars[t]))
        givens[all_step_visible_data_vars[t]] = all_step_visible_data_shared[t]
        givens[all_step_visible_progress_vars[
            t]] = all_step_visible_progress_shared[t]

    update_all_step_visible_data = theano.function([], [],
                                                   givens=givens,
                                                   updates=updates)

    # Momentum-based gradient descent
    for epoch in range(n_epochs):
        if epoch == lr_switch:
            lr_shared.set_value(final_lr)
        if epoch == momentum_switch:
            momentum_shared.set_value(final_momentum)

        c = update_Yvs()
        update_all_step_visible_data()
        if verbose:
            print('Epoch: {0}. Cost: {1:.6f}.'.format(epoch + 1, float(c)))

    all_step_visible_data = []
    for t in range(steps):
        all_step_visible_data.append(
            np.array(all_step_visible_data_shared[t].get_value(),
                     dtype=floath))

    return all_step_visible_data
Пример #25
0
def SGD(eta,
        n_epochs,
        valid_steps,
        momentum,
        low,
        high,
        init,
        random_init='gaussian'):
    t0 = time.time()
    index = T.iscalar('index')
    x, y, z, alpha = T.fmatrices('x', 'y', 'z', 'alpha')
    n_minibatch = max_minibatch - 2
    model = Model(n_tree, n_nodes, low, high, init, random_init)
    model_op, auto_upd = model.op(x)
    valid_op, valid_upd = model.valid_op(z, valid_steps)

    loss = model.loss(y, model_op)
    valid_loss = model.loss(alpha, valid_op)

    print "Updation to be compiled yet"

    params = model.params
    train_upd = gradient_updates_momentum(loss, params, eta,
                                          momentum) + auto_upd
    train_output = [model_op, loss]
    valid_output = [valid_op, valid_loss]

    print "Train function to be compiled"
    train_fn = theano.function(
        [index],
        train_output,
        updates=train_upd,
        givens={
            x: train_x[:, n_in * index:n_in * (index + 1)],
            y: train_x[:, (n_in * index + n_tree):(n_in * (index + 1) + 1)]
        },
        name='train_fn')

    valid_fn = theano.function(
        [index],
        valid_output,
        updates=valid_upd,
        givens={
            z:
            train_x[:, n_tree * index:n_tree * (index + 1)],
            alpha:
            train_x[:, (n_in * index + n_tree):(n_in * index + n_tree +
                                                valid_steps)]
        },
        name='valid_fn')

    print "Train function compiled"

    # Compilation over
    #################
    ## TRAIN MODEL ##
    #################
    print 'The compilation time is', time.time() - t0
    loss_list = []
    for i in range(n_epochs):
        epoch_loss = 0

        t1 = time.time()
        for idx in range(n_minibatch):
            print 'The current idx is ', idx, ' and the epoch number is  ', i
            output, loss_ = train_fn(idx)[:-1], train_fn(idx)[-1]
            if idx % 500 == 0:
                v_output, v_loss = valid_fn(idx / 500)[:-1][0], valid_fn(
                    idx / 500)[-1]
                print 'v_pred is', ' '.join(
                    [mappings_words[prediction(abc)] for abc in v_output])
                print 'v_loss is', np.array(v_loss)
            print 'The loss is', loss_
            epoch_loss += loss_
            loss_list.append(loss_)

            print '==' * 20
        print 'The mean loss for the epoch was', epoch_loss / float(
            n_minibatch)
        print 'Time taken by this epoch is', time.time() - t1
        print '-' * 50
    pyplot.plot(loss_list)
    pyplot.show()
Пример #26
0
'''
A theano implementation of the T-LSTM
'''

import theano.tensor as T
from theano import function
import numpy as np
import collections
import pdb
import os
#np.seterr(under='warn')
h, b = T.fvectors('h', 'b')
W, X = T.fmatrices('W', 'X')

dotvec = function([h,b], T.dot(h,b))

dot = function([W, h], T.dot(W, h))
#dotF = function([W, h], T.dot(W, h))
#dot = lambda W, h: dotF(W, h.squeeze())
dotW = function([W, X], T.dot(W,X))

layer = function([W, h, b], T.dot(W, h) + b)
#layerF = function([W, h, b], T.dot(W, h) + b)
#layer = lambda W, h, b: layerF(W, h.squeeze(), b.squeeze())
sigmoid = function([h], T.nnet.ultra_fast_sigmoid(h))
#sigmoidF = function([h], T.nnet.ultra_fast_sigmoid(h))
#sigmoid = lambda h: sigmoidF(h.squeeze())
tanh = function([h], T.tanh(h))
#tanhF = function([h], T.tanh(h))
#tanh = lambda h: tanhF(h.squeeze())
add = function([h, b], h+b)
Пример #27
0
    def __init__(self, We, params):

        lstm_layers_num = 1
        en_hidden_size = We.shape[1]
        self.eta = params.eta
        self.num_labels = params.num_labels
        self.en_hidden_size = en_hidden_size
        self.de_hidden_size = params.de_hidden_size

        self.lstm_layers_num = params.lstm_layers_num
        self._train = None
        self._utter = None
        self.params = []
        self.encoder_lstm_layers = []
        self.decoder_lstm_layers = []
        self.hos = []
        self.Cos = []

        encoderInputs = tensor.imatrix()
        decoderInputs, decoderTarget = tensor.imatrices(2)
        encoderMask, TF, decoderMask, decoderInputs0 = tensor.fmatrices(4)

        self.lookuptable = theano.shared(We)

        #### the last one is for the stary symbole
        self.de_lookuptable = theano.shared(name="Decoder LookUpTable",
                                            value=init_xavier_uniform(
                                                self.num_labels + 1,
                                                self.de_hidden_size),
                                            borrow=True)

        self.linear = theano.shared(name="Linear",
                                    value=init_xavier_uniform(
                                        self.de_hidden_size, self.num_labels),
                                    borrow=True)

        self.hidden_decode = theano.shared(name="Hidden to Decode",
                                           value=init_xavier_uniform(
                                               2 * en_hidden_size,
                                               self.de_hidden_size),
                                           borrow=True)

        self.hidden_bias = theano.shared(
            name="Hidden to Bias",
            value=np.asarray(np.random.randn(self.de_hidden_size, ) * 0.,
                             dtype=theano.config.floatX),
            borrow=True)

        self.params += [
            self.linear, self.de_lookuptable, self.hidden_decode,
            self.hidden_bias
        ]  #concatenate

        #(max_sent_size, batch_size, hidden_size)
        state_below = self.lookuptable[encoderInputs.flatten()].reshape(
            (encoderInputs.shape[0], encoderInputs.shape[1],
             self.en_hidden_size))
        for _ in range(self.lstm_layers_num):

            enclstm_f = LSTM(self.en_hidden_size)
            enclstm_b = LSTM(self.en_hidden_size, True)
            self.encoder_lstm_layers.append(enclstm_f)  #append
            self.encoder_lstm_layers.append(enclstm_b)  #append
            self.params += enclstm_f.params + enclstm_b.params  #concatenate

            hs_f, Cs_f = enclstm_f.forward(state_below, encoderMask)
            hs_b, Cs_b = enclstm_b.forward(state_below, encoderMask)

            hs = tensor.concatenate([hs_f, hs_b], axis=2)
            Cs = tensor.concatenate([Cs_f, Cs_b], axis=2)
            self.hos += tensor.tanh(
                tensor.dot(hs[-1], self.hidden_decode) + self.hidden_bias),
            self.Cos += tensor.tanh(
                tensor.dot(Cs[-1], self.hidden_decode) + self.hidden_bias),
            state_below = hs

        state_below = self.de_lookuptable[decoderInputs.flatten()].reshape(
            (decoderInputs.shape[0], decoderInputs.shape[1],
             self.de_hidden_size))
        for i in range(self.lstm_layers_num):
            declstm = LSTM(self.de_hidden_size)
            self.decoder_lstm_layers += declstm,  #append
            self.params += declstm.params  #concatenate
            ho, Co = self.hos[i], self.Cos[i]
            state_below, Cs = declstm.forward(state_below, decoderMask, ho, Co)

        decoder_lstm_outputs = state_below

        ei, di, dt = tensor.imatrices(3)  #place holders
        em, dm, tf, di0 = tensor.fmatrices(4)
        #####################################################
        #####################################################
        linear_outputs = tensor.dot(decoder_lstm_outputs, self.linear)
        softmax_outputs, updates = theano.scan(
            fn=lambda x: tensor.nnet.softmax(x),
            sequences=[linear_outputs],
        )

        def _NLL(pred, y, m):
            return -m * tensor.log(pred[tensor.arange(encoderInputs.shape[1]),
                                        y])

        costs, _ = theano.scan(
            fn=_NLL, sequences=[softmax_outputs, decoderTarget, decoderMask])
        loss = costs.sum() / decoderMask.sum()

        updates = lasagne.updates.adam(loss, self.params, self.eta)
        #updates = lasagne.updates.apply_momentum(updates, self.params, momentum=0.9)

        ###################################################
        #### using the ground truth when training
        ##################################################
        self._train = theano.function(inputs=[ei, em, di, dm, dt],
                                      outputs=[loss, softmax_outputs],
                                      updates=updates,
                                      givens={
                                          encoderInputs: ei,
                                          encoderMask: em,
                                          decoderInputs: di,
                                          decoderMask: dm,
                                          decoderTarget: dt
                                      })

        #########################################################################
        ### For schedule sampling
        #########################################################################

        ###### always use privous predict as next input
        def _step2(state_, hs_, Cs_):

            hs, Cs = [], []
            token_idxs = tensor.cast(state_.argmax(axis=-1), "int32")
            msk_ = tensor.fill(
                (tensor.zeros_like(token_idxs, dtype="float32")), 1)
            msk_ = msk_.dimshuffle('x', 0)
            state_below0 = self.de_lookuptable[token_idxs].reshape(
                (1, encoderInputs.shape[1], self.de_hidden_size))
            for i, lstm in enumerate(self.decoder_lstm_layers):
                h, C = lstm.forward(state_below0, msk_, hs_[i],
                                    Cs_[i])  #mind msk
                hs += h[-1],
                Cs += C[-1],
                state_below0 = h

            hs, Cs = tensor.as_tensor_variable(hs), tensor.as_tensor_variable(
                Cs)

            newpred = tensor.dot(state_below0, self.linear).reshape(
                (encoderInputs.shape[1], self.num_labels))
            state_below = tensor.nnet.softmax(newpred)

            return state_below, hs, Cs

        hs0, Cs0 = tensor.as_tensor_variable(
            self.hos, name="hs0"), tensor.as_tensor_variable(self.Cos,
                                                             name="Cs0")
        train_outputs, _ = theano.scan(fn=_step2,
                                       outputs_info=[decoderInputs0, hs0, Cs0],
                                       n_steps=encoderInputs.shape[0])

        train_predict = train_outputs[0]
        train_costs, _ = theano.scan(
            fn=_NLL, sequences=[train_predict, decoderTarget, decoderMask])

        train_loss = train_costs.sum() / decoderMask.sum()

        train_updates = lasagne.updates.adam(train_loss, self.params, self.eta)
        #train_updates = lasagne.updates.apply_momentum(train_updates, self.params, momentum=0.9)

        self._train2 = theano.function(
            inputs=[ei, em, di0, dm, dt],
            outputs=[train_loss, train_predict],
            updates=train_updates,
            givens={
                encoderInputs: ei,
                encoderMask: em,
                decoderInputs0: di0,
                decoderMask: dm,
                decoderTarget: dt
            }
            #givens={encoderInputs:ei, encoderMask:em, decoderInputs:di, decoderMask:dm, decoderTarget:dt, TF:tf}
        )

        listof_token_idx = train_predict.argmax(axis=-1)
        self._utter = theano.function(inputs=[ei, em, di0],
                                      outputs=listof_token_idx,
                                      givens={
                                          encoderInputs: ei,
                                          encoderMask: em,
                                          decoderInputs0: di0
                                      })
Пример #28
0
import mnist

def init_weights(n_in, n_out):
    weights = np.random.randn(n_in, n_out) / np.sqrt(n_in)
    return theano.shared(np.asarray(weights, dtype=theano.config.floatX))

def feed_forward(X, w_h, w_o):
    h = T.nnet.sigmoid(T.dot(X, w_h))
    return T.nnet.softmax(T.dot(h, w_o))

trX, trY, teX, teY = mnist.load_data(one_hot=True)

w_h, w_o = init_weights(28*28, 100), init_weights(100, 10)
num_epochs, batch_size, learn_rate = 30, 10, 0.2

X, Y = T.fmatrices('X', 'Y')
y_ = feed_forward(X, w_h, w_o)

weights = [w_h, w_o]
grads = T.grad(cost=T.nnet.categorical_crossentropy(y_, Y).mean(), wrt=weights)
train = theano.function(
    inputs=[X, Y],
    updates=[[w, w - g * learn_rate] for w, g in zip(weights, grads)],
    allow_input_downcast=True)
predict = theano.function(inputs=[X], outputs=T.argmax(y_, axis=1))

for i in range(num_epochs):
    for j in xrange(0, len(trX), batch_size):
        train(trX[j:j+batch_size], trY[j:j+batch_size])
    print i, np.mean(predict(teX) == np.argmax(teY, axis=1))
Пример #29
0
    def __init__(self,
                 state       = 'x',
                 measurement = 'z',
                 motion_transition      = None,
                 measurement_transition = None):

        self.N = len(state.split(' '))
        self.M = len(measurement.split(' '))


        self.X, self.Z         = T.fvectors('X','Z')
        self.P, self.Q, self.R = T.fmatrices('P','Q','R')
        self.F, self.H         = T.matrices('F','H')
        self.dt                = T.scalar('dt')


        self.X_  = T.dot(self.F, self.X)
        self.fX_ = G.jacobian(T.flatten(self.X_), self.X)
        self.P_  = T.dot(T.dot(self.fX_, self.P), T.transpose(self.fX_)) + self.dt * self.Q

        self.h = T.dot(self.H, self.X_)
        self.y = self.Z - self.h

        self.hX_ = G.jacobian(self.h, self.X_)

        self.matrix_inv = T.nlinalg.MatrixInverse()

        self.S = T.dot(T.dot(self.hX_, self.P_), T.transpose(self.hX_)) + self.R
        self.K = T.dot(T.dot(self.P_, T.transpose(self.hX_)), self.matrix_inv(self.S))

        self.X__ = self.X_ + T.dot(self.K, self.y)
        self.P__ = T.dot(T.identity_like(self.P) - T.dot(self.K, self.hX_), self.P_)


        self.prediction = theano.function(inputs  = [self.X,
                                                     self.P,
                                                     self.Q,
                                                     self.F,
                                                     self.dt],
                                          outputs = [self.X_,
                                                     self.P_],
                                          allow_input_downcast = True)

        self.update = theano.function(inputs  = [self.X,
                                                 self.Z,
                                                 self.P,
                                                 self.Q,
                                                 self.R,
                                                 self.F,
                                                 self.H,
                                                 self.dt],
                                      outputs = [self.X__,
                                                 self.P__],
                                      allow_input_downcast = True)

        if motion_transition == None:
            self.motion_transition = np.eye(self.N)
        else:
            self.motion_transition = np.array(motion_transition)

        if measurement_transition == None:
            self.measurement_transition = np.eye(self.M)
        else:
            self.measurement_transition = np.array(motion_transition)
Пример #30
0
def find_Ys(Xs_shared,
            Ys_shared,
            sigmas_shared,
            N,
            steps,
            output_dims,
            n_epochs,
            initial_lr,
            final_lr,
            lr_switch,
            init_stdev,
            initial_momentum,
            final_momentum,
            momentum_switch,
            lmbda,
            metric,
            verbose=0):
    """Optimize cost wrt Ys[t], simultaneously for all t"""
    # Optimization hyperparameters
    initial_lr = np.array(initial_lr, dtype=floath)
    final_lr = np.array(final_lr, dtype=floath)
    initial_momentum = np.array(initial_momentum, dtype=floath)
    final_momentum = np.array(final_momentum, dtype=floath)

    lr = T.fscalar('lr')
    lr_shared = theano.shared(initial_lr)

    momentum = T.fscalar('momentum')
    momentum_shared = theano.shared(initial_momentum)

    # Penalty hyperparameter
    lmbda_var = T.fscalar('lmbda')
    lmbda_shared = theano.shared(np.array(lmbda, dtype=floath))

    # Yv velocities
    Yvs_shared = []
    zero_velocities = np.zeros((N, output_dims), dtype=floath)
    for t in range(steps):
        Yvs_shared.append(theano.shared(np.array(zero_velocities)))

    # Cost
    Xvars = T.fmatrices(steps)
    Yvars = T.fmatrices(steps)
    Yv_vars = T.fmatrices(steps)
    sigmas_vars = T.fvectors(steps)

    c_vars = []
    for t in range(steps):
        c_vars.append(cost_var(Xvars[t], Yvars[t], sigmas_vars[t], metric))

    cost = T.sum(c_vars) + lmbda_var * movement_penalty(Yvars, N)

    # Setting update for Ys velocities
    grad_Y = T.grad(cost, Yvars)

    givens = {
        lr: lr_shared,
        momentum: momentum_shared,
        lmbda_var: lmbda_shared
    }
    updates = []
    for t in range(steps):
        updates.append((Yvs_shared[t], momentum * Yv_vars[t] - lr * grad_Y[t]))

        givens[Xvars[t]] = Xs_shared[t]
        givens[Yvars[t]] = Ys_shared[t]
        givens[Yv_vars[t]] = Yvs_shared[t]
        givens[sigmas_vars[t]] = sigmas_shared[t]

    update_Yvs = theano.function([], cost, givens=givens, updates=updates)

    # Setting update for Ys positions
    updates = []
    givens = dict()
    for t in range(steps):
        updates.append((Ys_shared[t], Yvars[t] + Yv_vars[t]))
        givens[Yvars[t]] = Ys_shared[t]
        givens[Yv_vars[t]] = Yvs_shared[t]

    update_Ys = theano.function([], [], givens=givens, updates=updates)

    # Momentum-based gradient descent
    for epoch in range(n_epochs):
        if epoch == lr_switch:
            lr_shared.set_value(final_lr)
        if epoch == momentum_switch:
            momentum_shared.set_value(final_momentum)

        c = update_Yvs()
        update_Ys()
        if verbose:
            print('Epoch: {0}. Cost: {1:.6f}.'.format(epoch + 1, float(c)))

    Ys = []
    for t in range(steps):
        Ys.append(np.array(Ys_shared[t].get_value(), dtype=floath))

    return Ys
Пример #31
0
    def __init__(self, inf=1e37):

        pos, vel = T.fmatrices(['pos', 'vel'])
        nc, N, n_steps = T.iscalars(['nc', 'N', 'n_steps'])
        ra, rb, re, r0 = T.fscalars(['ra', 'rb', 're', 'r0'])
        v0, j, b = T.fscalars(['v0', 'J', 'b'])

        nu = trng.uniform(size=(N, 2), low=0.0, high=3.14159, dtype='floatX')

        def distance_tensor(X):
            E = X.reshape((X.shape[0], 1, -1)) - X.reshape((1, X.shape[0], -1))
            D = T.sqrt(T.sum(T.square(E), axis=2))
            return D

        def direction_tensor(X):
            E = X.reshape((X.shape[0], 1, -1)) - X.reshape((1, X.shape[0], -1))
            L = T.sqrt(T.sum(T.square(E), axis=2))
            L = T.pow(L + T.identity_like(L), -1)
            L = T.stack([L, L, L], axis=2)
            return L * E

        def neighbourhood(X):
            D = distance_tensor(X)
            N = T.argsort(D, axis=0)
            mask = T.cast(T.lt(N, nc), 'float32')
            return N[1:nc + 1], mask

        def alignment(X, Y):
            n, d = neighbourhood(X)
            return T.sum(Y[n], axis=0)

        def cohesion(X, inf=100.0):
            D = distance_tensor(X)
            E = direction_tensor(X)
            n, d = neighbourhood(X)

            F = T.zeros_like(E)
            D = T.stack([D, D, D], axis=2)
            d = T.stack([d, d, d], axis=2)

            c1 = T.lt(D, rb)
            c2 = T.and_(T.gt(D, rb), T.lt(D, ra))
            c3 = T.and_(T.gt(D, ra), T.lt(D, r0))

            F = T.set_subtensor(F[c1], -E[c1])
            F = T.set_subtensor(F[c2], 0.25 * (D[c2] - re) / (ra - re) * E[c2])
            F = T.set_subtensor(F[c3], E[c3])

            return T.sum(d * F, axis=0)

        def perturbation(nu=nu):
            phi = nu[:, 0]
            theta = 2.0 * nu[:, 1]

            return T.stack([
                T.sin(theta) * T.sin(phi),
                T.cos(theta) * T.sin(phi),
                T.cos(phi)
            ],
                           axis=1)

        def step(X, dX):
            X_ = X + dX
            V_ = j * nc / v0 * (alignment(
                X, dX)) + b * (cohesion(X)) + nc * (perturbation())
            dV = T.sqrt(T.sum(T.square(V_), axis=1)).reshape(V_.shape[0], 1)
            dV = T.stack([dV, dV, dV], axis=1)
            V = v0 * V_ / dV

            return T.cast(X_, 'float32'), T.cast(V, 'float32')

        def probability(X, Y):
            n, d = neighbourhood(X)
            vDv = T.batched_dot(Y[n].swapaxes(0, 1), Y)
            p = T.exp((j / 2.0) * T.sum(vDv, axis=1))

            return p / T.sum(p)

        sim, update = theano.scan(step,
                                  outputs_info=[pos, vel],
                                  n_steps=n_steps)

        pos_, vel_ = sim

        mean_final_velocity = 1 / (N * v0) * T.sqrt(
            T.sum(T.square(T.sum(vel_[-1], axis=0))))

        particle_probability = probability(pos_[-1], vel_[-1])

        self.f = theano.function(
            [pos, vel, nc, ra, rb, r0, re, j, v0, b, N, n_steps], [pos_, vel_],
            allow_input_downcast=True)

        self.g = theano.function(
            [pos, vel, nc, ra, rb, r0, re, j, v0, b, N, n_steps],
            mean_final_velocity,
            allow_input_downcast=True)

        self.h = theano.function(
            [pos, vel, nc, ra, rb, r0, re, j, v0, b, N, n_steps],
            particle_probability,
            allow_input_downcast=True)
Пример #32
0
    def __init__(self, Ne, Ni, n_inp, W_inp=None, W_inner=None):
        '''class SNNgroup's self Parameters:
            self.A:       update matrix
            self.S:       neuron state varaibles
            self.W_inner: inner-connect weights in the group
            self.W_inp:   input weights
            self.spikes:  the spikes matrix in the time t
            self.SpkC  :  spike containers
            input : '''
        self.number = Ne + Ni
        self.Ne = Ne
        self.Ni = Ni
        self.mV = self.ms = 1e-3  # units
        dt = 1 * self.ms  # timestep
        self.dt = dt
        taum = 20 * self.ms  # membrane time constant
        taue = 5 * self.ms
        taui = 10 * self.ms
        #self.Vt=-1*self.mV      # threshold = -50+49
        self.Vt = 15 * self.mV  #threshold = -55+70
        #self.Vr=-11*self.mV     # reset = -60+49
        self.Vr = 0 * self.mV  # reset = -70+70
        self.Vi = -10 * self.mV  # VI = -80+70
        self.dApre = .0001
        #self.dApre = .95 #changed into .95
        self.dApost = -self.dApre * 1.05
        self.tauP = 20 * self.ms
        #self.input = input
        self.n_inp = n_inp
        self.weight = .001
        self.weightIn = 1.
        self.wmax = 200 * self.weight
        zero = np.array([0]).astype(theano.config.floatX)
        self.zero = theano.shared(zero, name='zero', borrow=True)
        """
        Equations
        ---------
        eqs='''
        dv/dt = (ge*70mV-gi*10-(v+70*mV))/(20*ms) : volt
        dge/dt = -ge/(5*self.ms) : volt
        dgi/dt = -gi/(10*self.ms) : volt
        '''
        """
        # Update matrix
        A = np.array([[np.exp(-dt / taum), 0, 0],
                      [
                          taue / (taum - taue) *
                          (np.exp(-dt / taum) - np.exp(-dt / taue)),
                          np.exp(-dt / taue), 0
                      ],
                      [
                          -taui / (taum - taui) *
                          (np.exp(-dt / taum) - np.exp(-dt / taui)), 0,
                          np.exp(-dt / taui)
                      ]],
                     dtype=theano.config.floatX).T
        A = theano.shared(value=A, name='A', borrow=True)
        self.A = A
        # State varible : [v;ge;gi] (size=3*self.number)
        S = np.ones((1, self.number), dtype=theano.config.floatX) * self.Vr
        S = np.vstack((S, np.zeros((2, self.number),
                                   dtype=theano.config.floatX)))
        self.S_init = S
        S = theano.shared(value=S, name='S', borrow=True)
        self.S = S
        if W_inner == None:
            # weights of inner connections (size= self.number*self.number)
            self.W_inner_ini = np.ones(
                (self.number, self.number),
                dtype=theano.config.floatX) * self.weight
            #self.W_inner_ini[Ne:,:] = self.weightIn
            self.W_inner_ini[Ne:, :] = self.weight
            wtmp = np.eye(self.number)
            ind = wtmp.nonzero()
            self.W_inner_ini[ind] = 0
            W_inner = theano.shared(value=self.W_inner_ini,
                                    name='W_inner',
                                    borrow=True)
            self.W_inner = W_inner
        else:
            self.W_inner = theano.shared(W_inner, name='W_inner', borrow=True)
        # weights of input connections (size=n_inp*self.number
        rng = np.random.RandomState(1234)
        if W_inp == None:
            #W_inp = np.ones((self.n_inp,self.number)).astype(theano.config.floatX) #needs specification later
            #W_inp = np.random.rand(self.n_inp,self.number).astype(theano.config.floatX)*.00001*self.ms #needs specification later
            self.W_inp_ini = np.ones((self.n_inp, self.number)).astype(
                theano.config.floatX) * self.weight
            self.W_inp_ini[:, self.Ne:] = self.weightIn
            W_inp = theano.shared(self.W_inp_ini, name='W_inp', borrow=True)
            self.W_inp = W_inp
        else:
            self.W_inp = theano.shared(W_inp, name='W_inp', borrow=True)
        # Spike Container
        #spkC = theano.shared(value=np.empty((1,self.number)).astype(theano.config.floatX),name='spkC',borrow=True)
        spkC = np.empty((1, self.number)).astype(theano.config.floatX)
        self.spkC = spkC
        #spikes=np.empty((self.number,1),dtype=theano.config.floatX)
        #self.spikes = theano.shared(value=spikes,name='spikes',borrow=True)
        # not sure the dtype of sp_history
        self.sp_history = np.array([])
        #output = np.empty(self.number,dtype=theano.config.floatX)
        #self.output = theano.shared(value=output,name='output',borrow=True)
        self.V_record = np.empty((1, self.number))
        self.ge_record = np.empty((1, self.number))
        self.gi_record = np.empty((1, self.number))
        #================================================
        # Process Function Initial
        # input:: 0-1 vector
        '''Update Schedule:
        1.Update state variables of SNNgroup: dot(A,S)
        1.Update state variables of Synapses: dot(exp(-dt/tau),Ssynapse), including W_inp and W_inner
        2.Call thresholding function: S[0,:]>Vt
        3.Push spikes into SpikeContainer
        4.Propagate spikes via Connection(possibly with delays)
        5.Update state variables of Synapses (STDP)
        6.Call reset function on neurons which has spiked'''
        Ne = self.Ne
        Ni = self.Ni
        m = T.fmatrix(name='m')
        #self.Vt = T.as_tensor_variable(self.Vt,'Vt')

        # "Update state function:: stat()"
        # return np array
        # shape(stat()) = shape(self.S)
        S_update = T.dot(self.A, self.S)
        self.stat = theano.function(inputs=[],
                                    outputs=[],
                                    updates={self.S: S_update})
        #============================================================
        # Update state of Synapses
        # Update matrix of Synapse
        A_STDP = np.array([[np.exp(-self.dt / self.tauP), 0],
                           [0, np.exp(-self.dt / self.tauP)]],
                          dtype=theano.config.floatX)
        # Spre_inner :: pre  synapse of inner connections
        # Spost_inner:: post synapse of inner connections
        # Spre_inp   :: pre  synapse of input conenctions
        # Spost_inp  :: post synapse of input connections
        self.Spre_inner_ini = np.zeros((self.number, self.number),
                                       dtype=theano.config.floatX)
        Spre_inner = theano.shared(self.Spre_inner_ini,
                                   name='Spre_inner',
                                   borrow=True)
        self.Spre_inner = Spre_inner
        self.Spost_inner_ini = np.zeros((self.number, self.number),
                                        dtype=theano.config.floatX)
        Spost_inner = theano.shared(value=self.Spost_inner_ini,
                                    name='Spost_inner',
                                    borrow=True)
        self.Spost_inner = Spost_inner
        self.Spre_inp_ini = np.zeros((self.n_inp, self.number)).astype(
            theano.config.floatX)  #needs specification later
        Spre_inp = theano.shared(value=self.Spre_inp_ini,
                                 name='Spre_inp',
                                 borrow=True)
        self.Spre_inp = Spre_inp
        self.Spost_inp_ini = np.zeros((self.n_inp, self.number)).astype(
            theano.config.floatX)  #needs specification later
        Spost_inp = theano.shared(value=self.Spost_inp_ini,
                                  name='Spost_inp',
                                  borrow=True)
        self.Spost_inp = Spost_inp
        U = T.fscalar('U')
        UM = T.fmatrix('UM')
        #UpreV = theano.shared(A_STDP[0,0],name='UpreV',borrow=True) # Wpre = UpreV*Wpre
        #UpostV = theano.shared(A_STDP[1,1],name='UpostV',borrow=True)
        self.tmp = np.array(
            np.exp(-self.dt / self.tauP).astype(theano.config.floatX))
        self.SynFresh = theano.shared(self.tmp, name='SynFresh', borrow=True)
        self.UpdateSpre_inner = theano.function(
            inputs=[],
            outputs=None,
            updates={self.Spre_inner: T.dot(self.SynFresh, self.Spre_inner)},
            allow_input_downcast=True)
        self.UpdateSpost_inner = theano.function(
            inputs=[],
            outputs=None,
            updates={self.Spost_inner: T.dot(self.SynFresh, self.Spost_inner)},
            allow_input_downcast=True)
        self.UpdateSpre_inp = theano.function(
            inputs=[],
            outputs=None,
            updates={self.Spre_inp: T.dot(self.SynFresh, self.Spre_inp)},
            allow_input_downcast=True)
        self.UpdateSpost_inp = theano.function(
            inputs=[],
            outputs=None,
            updates={self.Spost_inp: T.dot(self.SynFresh, self.Spost_inp)},
            allow_input_downcast=True)
        #------------------------------------------

        #tmp = math.exp(-self.dt/self.tauP)
        #tmp = T.as_tensor(0.95122945)
        #================================================================
        #------------------------------------------
        # "thresholding function:: spike_fun()"
        # type return :: np.ndarray list
        # shape return:: shape(spike_fun()) = (self.number,)
        self.spike_fun = theano.function(
            inputs=[U],  #[self.S]
            outputs=(T.gt(self.S[0, :],
                          U)))  #type outputs: np.ndarray,shape::(nL,)
        #'outputs = (self.S[0,:]>Vt).astype(theano.config.floatX)), #type outputs: list'
        #'updates={self.spikes:(self.S[0,:]>Vt).astype(theano.config.floatX)}'

        #------------------------------------
        #------------------------------------
        #=================================================================
        # "Push spike into Container function:: spCfun(vector)"
        # type vector :: np.array([],dtype=theano.config.floatX)!!!
        # type return :: np array
        # shape return:: shape(spCfun()) = ( shape(self.spkC)[0]+1 , shape(self.spkC)[1] )
        #updates={self.spkC:T.stack(self.spkC,sp)})
        '''spike_prop = theano.function( #wrong
            inputs = [],
            outputs =[],
            updates = {self.S:np.dot(self.W_inner,self.spikes)+self.S})#wrong'''
        #-------------------------------
        #--------------------------------
        #====================================================================
        # Propagate spikes
        # inner connection:
        # S_inner = f(inputs, outputs, updates)
        #   Param:: inputs: spike 0-1 vector
        #   Param:: inputs: spike is from function-> spike_fun
        #   S_inner(spk)::-> for i in spk[0:Ne].nonzero()[0]:
        #                        S[1,:] = Winner[i,:]+S[1,:]  (excitatory conenction)
        #                    for j in spk[Ne,:].nonzero()[0]:
        #                        S[2,:] = Winner[j,:]+S[2,:]  (inhibitory connection)
        vinner = T.fvector(
            name='vinner')  # vinner = spk :: np.array((1,self.number)

        def add_f1(i, p, q):
            np = T.inc_subtensor(p[1, :], q[i, :])  #ge
            return {p: np}

        def add_f2(i, p, q):
            np = T.inc_subtensor(p[2, :], q[i, :])  #gi
            return {p: np}

        #deltaWinner1,updates1 = theano.scan(fn=lambda i: self.W_inner[i,:]*i+self.S[1,:], sequences=vinner[0:Ne])
        deltaWinner1, updates1 = theano.scan(
            fn=add_f1,
            sequences=vinner[0:Ne].nonzero()[0],
            non_sequences=[self.S, self.W_inner])
        #deltaWinner2,updates2 = theano.scan(fn=lambda i: self.W_inner[i,:]*i+self.S[2,:], sequences=vinner[Ne:])
        deltaWinner2, updates2 = theano.scan(
            fn=add_f2,
            sequences=vinner[Ne:].nonzero()[0] + self.Ne,
            non_sequences=[self.S, self.W_inner])
        # S = S+W
        self.S_inner1 = theano.function(inputs=[vinner],
                                        outputs=None,
                                        updates=updates1,
                                        allow_input_downcast=True)
        self.S_inner2 = theano.function(inputs=[vinner],
                                        outputs=None,
                                        updates=updates2,
                                        allow_input_downcast=True)
        #------------------------------------------
        #------------------------------------------
        # outter connection (input spikes):
        # type input: index list
        voutter = T.fvector(name='voutter')
        #deltaWoutter = theano.scan(fn=lambda j: self.W_inp[j,:]+self.S[1,:],sequences=voutter)
        deltaWoutter, updatesout1 = theano.scan(
            fn=add_f1,
            sequences=voutter.nonzero()[0],
            non_sequences=[self.S, self.W_inp])
        self.S_inp = theano.function(inputs=[voutter],
                                     outputs=None,
                                     updates=updatesout1,
                                     allow_input_downcast=True)

        #------------------------------------
        #-------------------------------------
        #=====================================================================

        # Update Synapses (STDP | STDC)

        # Pre::  Apre += self.dApre, w+=Apost
        # Post:: Apost+=self.dApost, w+=Apre
        #
        # USpreInner :: Perform Pre function No.1 in inner connections
        # UWInner    :: Perform Pre function No.2 in inner connections
        # UpreInner  :: Function
        def add_synap_pre(i, p, po, s, q):
            # i :: sequence
            # p :: pre | post
            # s :: dApre | dApost
            # q :: W
            index = T.nonzero(q[i, :self.Ne])
            np = T.inc_subtensor(p[i, index], s)
            ##            tmp = p[i,:]
            ##            tmp=T.inc_subtensor(tmp[index],s)
            ##            np=T.set_subtensor(p[i,:],tmp)
            #np = T.inc_subtensor(p[i,:],s)
            nw = T.inc_subtensor(q[i, :], po[i, :])
            nw = T.clip(nw, 0, self.wmax)
            return {p: np, q: nw}

        def add_synap_pre_inp(i, p, po, s, q):
            # i :: sequence
            # p :: pre | post
            # s :: dApre | dApost
            # q :: W
            index = T.nonzero(q[i, :self.Ne])
            np = T.inc_subtensor(p[i, index], s)
            ##            tmp = p[i,:]
            ##            tmp=T.inc_subtensor(tmp[index],s)
            ##            np=T.set_subtensor(p[i,:],tmp)
            #np = T.inc_subtensor(p[i,:],s)
            nw = T.inc_subtensor(q[i, :], po[i, :])
            nw = T.clip(nw, 0, self.wmax)
            return {p: np, q: nw}

        def add_synap_post(i, po, p, s, q):
            # i:: sequence
            # po:: post
            # p:: pre
            # s:: dA
            # q:: W
            index = T.nonzero(q[:self.Ne, i])
            npo = T.inc_subtensor(po[index, i], s)
            nw = T.inc_subtensor(q[:, i], p[:, i])
            nw = T.clip(nw, 0, self.wmax)
            return {po: npo, q: nw}

        def add_synap_post_inp(i, po, p, s, q):
            # i:: sequence
            # po:: post
            # p:: pre
            # s:: dA
            # q:: W
            index = T.nonzero(q[:self.Ne, i])
            npo = T.inc_subtensor(po[index, i], s)
            nw = T.inc_subtensor(q[:, i], p[:, i])
            nw = T.clip(nw, 0, self.wmax)
            return {po: npo, q: nw}

        add_dA = T.fscalar('add_dA')
        add_p, add_po, add_q = T.fmatrices('add_p', 'add_po', 'add_q')
        #-------------------------------------------------------------------------
        #USinner,updatesUinner = theano.scan(fn=add_synap_pre,sequences=vinner,non_sequences=[self.Spre_inner,self.Spost_inp,self.dApre,self.W_inner])
        'USinner,updatesUinner = theano.scan(fn=add_synap_pre,sequences=vinner.nonzero()[0],non_sequences=[add_p,add_po,add_dA,add_q])'
        #USinner1,updatesUinner1 = theano.scan(fn=add_synap_pre,sequences=vinner,non_sequences=[self.Spost_inner,self.Spre_inner,self.dApost,self.W_inner])
        #-------------------------------------------------------------------------
        #UpostInner = theano.function(inputs[vinner],updates={self.Spost_inner:USpostInner})
        #UpostInp = theano.function(inputs=[vinner],updates={self.W_inner:UWInnerpost})
        'USinner_f = theano.function(inputs=[vinner,add_p,add_po,add_dA,add_q],outputs=None,updates=updatesUinner)'
        #USinner_step2 = theano.function(inputs=[vinner,add_p,add_po,add_dA,add_q],outputs=None,updates=updatesUinner)
        USinner_inner_pre, updatesUinner_inner_pre = theano.scan(
            fn=add_synap_pre,
            sequences=vinner[:self.Ne].nonzero()[0],
            non_sequences=[
                self.Spre_inner, self.Spost_inner, add_dA, self.W_inner
            ])
        self.USinner_f_inner_pre = theano.function(
            inputs=[vinner, add_dA],
            outputs=None,
            updates=updatesUinner_inner_pre,
            allow_input_downcast=True)

        USinner_innerpost, updatesUinner_inner_post = theano.scan(
            fn=add_synap_post,
            sequences=vinner[:self.Ne].nonzero()[0],
            non_sequences=[
                self.Spost_inner, self.Spre_inner, add_dA, self.W_inner
            ])
        self.USinner_f_inner_post = theano.function(
            inputs=[vinner, add_dA],
            outputs=None,
            updates=updatesUinner_inner_post,
            allow_input_downcast=True)

        USinner_inp_pre, updatesUSinner_inp_pre = theano.scan(
            fn=add_synap_pre_inp,
            sequences=vinner.nonzero()[0],
            non_sequences=[self.Spre_inp, self.Spost_inp, add_dA, self.W_inp])
        self.USinner_f_inp_pre = theano.function(
            inputs=[vinner, add_dA],
            outputs=None,
            updates=updatesUSinner_inp_pre,
            allow_input_downcast=True)

        USinner_inp_post, updatesUSinner_inp_post = theano.scan(
            fn=add_synap_post_inp,
            sequences=vinner[:self.Ne].nonzero()[0],
            non_sequences=[self.Spost_inp, self.Spre_inp, add_dA, self.W_inp])
        self.USinner_f_inp_post = theano.function(
            inputs=[vinner, add_dA],
            outputs=None,
            updates=updatesUSinner_inp_post,
            allow_input_downcast=True)

        # Call reset function
        def reset_v(index, vr):
            nv = T.set_subtensor(self.S[0, index], vr)
            return {self.S: nv}

        resetV, resetV_update = theano.scan(fn=reset_v,
                                            sequences=vinner.nonzero()[0],
                                            non_sequences=[U])
        self.resetV_f = theano.function(inputs=[vinner, U],
                                        outputs=None,
                                        updates=resetV_update,
                                        allow_input_downcast=True)

        setvalue = T.fscalar('setvalue')
        iv = T.ivector('iv')

        def reset_state(i, value, state):
            nstate = T.set_subtensor(state[i, :], value)
            return {state: nstate}

        reset_S_state, Upreset_S_state = theano.scan(
            fn=reset_state, sequences=iv, non_sequences=[setvalue, self.S])
        self.reset_S_fn = theano.function(inputs=[iv, setvalue],
                                          outputs=None,
                                          updates=Upreset_S_state)
Пример #33
0
def init_weights(n_in, n_out):
    weights = np.random.randn(n_in, n_out) / np.sqrt(n_in)
    return theano.shared(np.asarray(weights, dtype=theano.config.floatX))


def feed_forward(X, w_h, w_o):
    h = T.nnet.sigmoid(T.dot(X, w_h))
    return T.nnet.softmax(T.dot(h, w_o))


trX, trY, teX, teY = mnist.load_data(one_hot=True)

w_h, w_o = init_weights(28 * 28, 100), init_weights(100, 10)
num_epochs, batch_size, learn_rate = 30, 10, 0.2

X, Y = T.fmatrices('X', 'Y')
y_ = feed_forward(X, w_h, w_o)

weights = [w_h, w_o]
grads = T.grad(cost=T.nnet.categorical_crossentropy(y_, Y).mean(), wrt=weights)
train = theano.function(inputs=[X, Y],
                        updates=[[w, w - g * learn_rate]
                                 for w, g in zip(weights, grads)],
                        allow_input_downcast=True)
predict = theano.function(inputs=[X], outputs=T.argmax(y_, axis=1))

for i in range(num_epochs):
    for j in xrange(0, len(trX), batch_size):
        train(trX[j:j + batch_size], trY[j:j + batch_size])
    print i, np.mean(predict(teX) == np.argmax(teY, axis=1))
    def __init__(self, We, params):

        lstm_layers_num = 1
        en_hidden_size = We.shape[1]
        self.eta = params.eta
        self.num_labels = params.num_labels
        self.en_hidden_size = en_hidden_size
        self.de_hidden_size = params.de_hidden_size

        self.lstm_layers_num = params.lstm_layers_num
        self._train = None
        self._utter = None
        self.params = []
        self.encoder_lstm_layers = []
        self.decoder_lstm_layers = []
        self.hos = []
        self.Cos = []

        encoderInputs = tensor.imatrix()
        decoderInputs, decoderTarget = tensor.imatrices(2)
        encoderMask, TF, decoderMask, decoderInputs0 = tensor.fmatrices(4)

        self.lookuptable = theano.shared(We)

        #### the last one is for the stary symbole
        self.de_lookuptable = theano.shared(name="Decoder LookUpTable",
                                            value=init_xavier_uniform(
                                                self.num_labels + 1,
                                                self.de_hidden_size),
                                            borrow=True)

        self.linear = theano.shared(
            name="Linear",
            value=init_xavier_uniform(self.de_hidden_size + 2 * en_hidden_size,
                                      self.num_labels),
            borrow=True)
        self.linear_bias = theano.shared(
            name="Hidden to Bias",
            value=np.asarray(np.random.randn(self.num_labels, ) * 0.,
                             dtype=theano.config.floatX),
            borrow=True)

        #self.hidden_decode = theano.shared(name="Hidden to Decode", value= init_xavier_uniform(2*en_hidden_size, self.de_hidden_size), borrow = True)

        #self.hidden_bias = theano.shared(
        #        name="Hidden to Bias",
        #        value=np.asarray(np.random.randn(self.de_hidden_size, )*0., dtype=theano.config.floatX) ,
        #        borrow=True
        #        )

        #self.params += [self.linear, self.de_lookuptable, self.hidden_decode, self.hidden_bias]    #concatenate
        self.params += [self.linear, self.linear_bias, self.de_lookuptable
                        ]  #the initial hidden state of decoder lstm is zeros
        #(max_sent_size, batch_size, hidden_size)
        state_below = self.lookuptable[encoderInputs.flatten()].reshape(
            (encoderInputs.shape[0], encoderInputs.shape[1],
             self.en_hidden_size))
        for _ in range(self.lstm_layers_num):

            enclstm_f = LSTM(self.en_hidden_size)
            enclstm_b = LSTM(self.en_hidden_size, True)
            self.encoder_lstm_layers.append(enclstm_f)  #append
            self.encoder_lstm_layers.append(enclstm_b)  #append
            self.params += enclstm_f.params + enclstm_b.params  #concatenate

            hs_f, Cs_f = enclstm_f.forward(state_below, encoderMask)
            hs_b, Cs_b = enclstm_b.forward(state_below, encoderMask)

            hs = tensor.concatenate([hs_f, hs_b], axis=2)
            Cs = tensor.concatenate([Cs_f, Cs_b], axis=2)
            hs0 = tensor.concatenate([hs_f[-1], hs_b[0]], axis=1)
            Cs0 = tensor.concatenate([Cs_f[-1], Cs_b[0]], axis=1)
            #self.hos += tensor.tanh(tensor.dot(hs0, self.hidden_decode) + self.hidden_bias),
            #self.Cos += tensor.tanh(tensor.dot(Cs0, self.hidden_decode) + self.hidden_bias),
            self.hos += tensor.alloc(
                np.asarray(0., dtype=theano.config.floatX),
                encoderInputs.shape[1], self.de_hidden_size),
            self.Cos += tensor.alloc(
                np.asarray(0., dtype=theano.config.floatX),
                encoderInputs.shape[1], self.de_hidden_size),
            state_below = hs

        Encoder = state_below

        ei, di, dt = tensor.imatrices(3)  #place holders
        em, dm, tf, di0 = tensor.fmatrices(4)

        self.encoder_function = theano.function(inputs=[ei, em],
                                                outputs=Encoder,
                                                givens={
                                                    encoderInputs: ei,
                                                    encoderMask: em
                                                })

        #####################################################
        #####################################################
        state_below = self.de_lookuptable[decoderInputs.flatten()].reshape(
            (decoderInputs.shape[0], decoderInputs.shape[1],
             self.de_hidden_size))
        for i in range(self.lstm_layers_num):
            declstm = LSTM(self.de_hidden_size)
            self.decoder_lstm_layers += declstm,  #append
            self.params += declstm.params  #concatenate
            ho, Co = self.hos[i], self.Cos[i]
            state_below, Cs = declstm.forward(state_below, decoderMask, ho, Co)

        ##### Here we include the representation from the decoder
        decoder_lstm_outputs = tensor.concatenate([state_below, Encoder],
                                                  axis=2)

        linear_outputs = tensor.dot(decoder_lstm_outputs,
                                    self.linear) + self.linear_bias[None,
                                                                    None, :]
        softmax_outputs, _ = theano.scan(
            fn=lambda x: tensor.nnet.softmax(x),
            sequences=[linear_outputs],
        )

        def _NLL(pred, y, m):
            return -m * tensor.log(pred[tensor.arange(encoderInputs.shape[1]),
                                        y])

        costs, _ = theano.scan(
            fn=_NLL, sequences=[softmax_outputs, decoderTarget, decoderMask])
        loss = costs.sum() / decoderMask.sum() + params.L2 * sum(
            lasagne.regularization.l2(x) for x in self.params)

        updates = lasagne.updates.adam(loss, self.params, self.eta)
        #updates = lasagne.updates.apply_momentum(updates, self.params, momentum=0.9)

        ###################################################
        #### using the ground truth when training
        ##################################################
        self._train = theano.function(inputs=[ei, em, di, dm, dt],
                                      outputs=[loss, softmax_outputs],
                                      updates=updates,
                                      givens={
                                          encoderInputs: ei,
                                          encoderMask: em,
                                          decoderInputs: di,
                                          decoderMask: dm,
                                          decoderTarget: dt
                                      })

        #########################################################################
        ### For schedule sampling
        #########################################################################

        ###### always use privous predict as next input
        def _step2(ctx_, state_, hs_, Cs_):
            ### ctx_: b x h
            ### state_ : b x h
            ### hs_ : 1 x b x h    the first dimension is the number of the decoder layers
            ### Cs_ : 1 x b x h    the first dimension is the number of the decoder layers

            hs, Cs = [], []
            token_idxs = tensor.cast(state_.argmax(axis=-1), "int32")
            msk_ = tensor.fill(
                (tensor.zeros_like(token_idxs, dtype="float32")), 1)
            msk_ = msk_.dimshuffle('x', 0)
            state_below0 = self.de_lookuptable[token_idxs].reshape(
                (1, ctx_.shape[0], self.de_hidden_size))
            for i, lstm in enumerate(self.decoder_lstm_layers):
                h, C = lstm.forward(state_below0, msk_, hs_[i],
                                    Cs_[i])  #mind msk
                hs += h[-1],
                Cs += C[-1],
                state_below0 = h

            hs, Cs = tensor.as_tensor_variable(hs), tensor.as_tensor_variable(
                Cs)
            state_below0 = state_below0.reshape(
                (ctx_.shape[0], self.de_hidden_size))
            state_below0 = tensor.concatenate([ctx_, state_below0], axis=1)
            newpred = tensor.dot(state_below0,
                                 self.linear) + self.linear_bias[None, :]
            state_below = tensor.nnet.softmax(newpred)

            ##### the beging symbole probablity is 0
            extra_p = tensor.zeros_like(hs[:, :, 0])
            state_below = tensor.concatenate([state_below, extra_p.T], axis=1)

            return state_below, hs, Cs

        ctx_0, state_0 = tensor.fmatrices(2)
        hs_0 = tensor.ftensor3()
        Cs_0 = tensor.ftensor3()

        state_below_tmp, hs_tmp, Cs_tmp = _step2(ctx_0, state_0, hs_0, Cs_0)
        self.f_next = theano.function([ctx_0, state_0, hs_0, Cs_0],
                                      [state_below_tmp, hs_tmp, Cs_tmp],
                                      name='f_next')

        hs0, Cs0 = tensor.as_tensor_variable(
            self.hos, name="hs0"), tensor.as_tensor_variable(self.Cos,
                                                             name="Cs0")
        train_outputs, _ = theano.scan(fn=_step2,
                                       sequences=[Encoder],
                                       outputs_info=[decoderInputs0, hs0, Cs0],
                                       n_steps=encoderInputs.shape[0])

        train_predict = train_outputs[0]
        train_costs, _ = theano.scan(
            fn=_NLL, sequences=[train_predict, decoderTarget, decoderMask])

        train_loss = train_costs.sum() / decoderMask.sum() + params.L2 * sum(
            lasagne.regularization.l2(x) for x in self.params)

        ##from adam import adam
        ##train_updates = adam(train_loss, self.params, self.eta)
        #train_updates = lasagne.updates.apply_momentum(train_updates, self.params, momentum=0.9)
        #train_updates = lasagne.updates.sgd(train_loss, self.params, self.eta)
        #train_updates = lasagne.updates.apply_momentum(train_updates, self.params, momentum=0.9)
        from momentum import momentum
        train_updates = momentum(train_loss,
                                 self.params,
                                 params.eta,
                                 momentum=0.9)

        self._train2 = theano.function(
            inputs=[ei, em, di0, dm, dt],
            outputs=[train_loss, train_predict],
            updates=train_updates,
            givens={
                encoderInputs: ei,
                encoderMask: em,
                decoderInputs0: di0,
                decoderMask: dm,
                decoderTarget: dt
            }
            #givens={encoderInputs:ei, encoderMask:em, decoderInputs:di, decoderMask:dm, decoderTarget:dt, TF:tf}
        )

        listof_token_idx = train_predict.argmax(axis=-1)
        self._utter = theano.function(inputs=[ei, em, di0],
                                      outputs=listof_token_idx,
                                      givens={
                                          encoderInputs: ei,
                                          encoderMask: em,
                                          decoderInputs0: di0
                                      })
Пример #35
0
def find_Ys(Xs_shared, Ys_shared, sigmas_shared, N, steps, output_dims,
            n_epochs, initial_lr, final_lr, lr_switch, init_stdev,
            initial_momentum, final_momentum, momentum_switch, lmbda, metric,
            verbose=0):
    """Optimize cost wrt Ys[t], simultaneously for all t"""
    # Optimization hyperparameters
    initial_lr = np.array(initial_lr, dtype=floath)
    final_lr = np.array(final_lr, dtype=floath)
    initial_momentum = np.array(initial_momentum, dtype=floath)
    final_momentum = np.array(final_momentum, dtype=floath)

    lr = T.fscalar('lr')
    lr_shared = theano.shared(initial_lr)

    momentum = T.fscalar('momentum')
    momentum_shared = theano.shared(initial_momentum)

    # Penalty hyperparameter
    lmbda_var = T.fscalar('lmbda')
    lmbda_shared = theano.shared(np.array(lmbda, dtype=floath))

    # Yv velocities
    Yvs_shared = []
    zero_velocities = np.zeros((N, output_dims), dtype=floath)
    for t in range(steps):
        Yvs_shared.append(theano.shared(np.array(zero_velocities)))

    # Cost
    Xvars = T.fmatrices(steps)
    Yvars = T.fmatrices(steps)
    Yv_vars = T.fmatrices(steps)
    sigmas_vars = T.fvectors(steps)

    c_vars = []
    for t in range(steps):
        c_vars.append(cost_var(Xvars[t], Yvars[t], sigmas_vars[t], metric))

    cost = T.sum(c_vars) + lmbda_var*movement_penalty(Yvars, N)

    # Setting update for Ys velocities
    grad_Y = T.grad(cost, Yvars)

    givens = {lr: lr_shared, momentum: momentum_shared,
              lmbda_var: lmbda_shared}
    updates = []
    for t in range(steps):
        updates.append((Yvs_shared[t], momentum*Yv_vars[t] - lr*grad_Y[t]))

        givens[Xvars[t]] = Xs_shared[t]
        givens[Yvars[t]] = Ys_shared[t]
        givens[Yv_vars[t]] = Yvs_shared[t]
        givens[sigmas_vars[t]] = sigmas_shared[t]

    update_Yvs = theano.function([], cost, givens=givens, updates=updates)

    # Setting update for Ys positions
    updates = []
    givens = dict()
    for t in range(steps):
        updates.append((Ys_shared[t], Yvars[t] + Yv_vars[t]))
        givens[Yvars[t]] = Ys_shared[t]
        givens[Yv_vars[t]] = Yvs_shared[t]

    update_Ys = theano.function([], [], givens=givens, updates=updates)

    # Momentum-based gradient descent
    for epoch in range(n_epochs):
        if epoch == lr_switch:
            lr_shared.set_value(final_lr)
        if epoch == momentum_switch:
            momentum_shared.set_value(final_momentum)

        c = update_Yvs()
        update_Ys()
        if verbose:
            print('Epoch: {0}. Cost: {1:.6f}.'.format(epoch + 1, float(c)))

    Ys = []
    for t in range(steps):
        Ys.append(np.array(Ys_shared[t].get_value(), dtype=floath))

    return Ys