def advance(u, u_1, u_2, f_a, Cx2, Cy2, dt2, V=None, step1=False): u_in, u_1_in, u_2_in = T.fmatrices('u_in','u_1_in','u_2_in') f_a_in, V_in = T.fmatrices('f_in','V_in') step1_in = T.lscalar('step1_in') Cx2_in, Cy2_in, dt2_in = T.fscalars('Cx2_in','Cy2_in','dt2_in') u_out = T.fmatrix('u_out') if V is None: V = np.zeros_like(f_a) step_f = theano.function([u_in, u_1_in, u_2_in, f_a_in, Cx2_in, Cy2_in, dt2_in, V_in, step1_in], u_out, step, on_unused_input='ignore') u_out = step_f(u, u_1, u_2, f_a, Cx2, Cy2, dt2, V, step1) return u_out
def get_samples(): # get samples from the model X, Y = T.fmatrices(2) givens_train_samples = {X: train_x[0:50000], Y: train_y[0:50000]} H1, H2 = iteration(X, 15, 0.1) # get prior statistics (100 mean and std) H2_mean = T.mean(H2, axis=0) H2_std = T.std(H2, axis=0) # sampling h2 from prior H2_ = RNG.normal((10000, 100), avg=H2_mean, std=4 * H2_std, ndim=None, dtype=H2.dtype, nstreams=None) # iterative sampling from samples h2 X_ = G1(G2(H2_)) for i in range(3): H1_, H2_ = iteration(X_, 15, 0.1, 3) X_ = G1(H1_) #H1_, H2_ = iteration(X_, 1, 0.1, 3) #X_ = G1(H1_) sampling = theano.function([], X_, on_unused_input='ignore', givens=givens_train_samples) samples = sampling() np.save('samples', samples) return samples
def __init__(self, steps = 1, num_layers = 2, num_units = 32, eps = 1e-2): self.X, self.Z = T.fvectors('X','Z') self.P, self.Q, self.R = T.fmatrices('P','Q','R') self.dt = T.scalar('dt') self.matrix_inv = T.nlinalg.MatrixInverse() self.ar = AutoRegressiveModel(steps = steps, num_layers = num_layers, num_units = num_units, eps = eps) l = InputLayer(input_var = self.X, shape = (steps,)) l = ReshapeLayer(l, shape = (1,steps,)) l = self.ar.network(l) l = ReshapeLayer(l, shape=(1,)) self.l_ = l self.f_ = get_output(self.l_) self.X_ = T.concatenate([self.f_, T.dot(T.eye(steps)[:-1], self.X)], axis=0) self.fX_ = G.jacobian(self.X_.flatten(), self.X) self.P_ = T.dot(T.dot(self.fX_, self.P), T.transpose(self.fX_)) + \ T.dot(T.dot(T.eye(steps)[:,0:1], self.dt * self.Q), T.eye(steps)[0:1,:]) self.h = T.dot(T.eye(steps)[0:1], self.X_) self.y = self.Z - self.h self.hX_ = G.jacobian(self.h, self.X_) self.S = T.dot(T.dot(self.hX_, self.P_), T.transpose(self.hX_)) + self.R self.K = T.dot(T.dot(self.P_, T.transpose(self.hX_)), self.matrix_inv(self.S)) self.X__ = self.X_ + T.dot(self.K, self.y) self.P__ = T.dot(T.identity_like(self.P) - T.dot(self.K, self.hX_), self.P_) self.prediction = theano.function(inputs = [self.X, self.P, self.Q, self.dt], outputs = [self.X_, self.P_], allow_input_downcast = True) self.update = theano.function(inputs = [self.X, self.Z, self.P, self.Q, self.R, self.dt], outputs = [self.X__, self.P__], allow_input_downcast = True)
def SGD(eta, n_epochs, valid_steps, momentum, low, high, init, random_init='gaussian'): t0 = time.time() index = T.iscalar('index') x, y, z, alpha = T.fmatrices('x', 'y', 'z', 'alpha') n_minibatch = max_minibatch - 2 model = Model(n_tree, n_nodes, low, high, init, random_init) model_op, auto_upd = model.op(x) valid_op, valid_upd = model.valid_op(z, valid_steps) loss = model.loss(y, model_op) valid_loss = model.loss(alpha, valid_op) print "Updation to be compiled yet" params = model.params train_upd = gradient_updates_momentum(loss, params, eta, momentum) + auto_upd train_output = [model_op, loss] valid_output = [valid_op, valid_loss] print "Train function to be compiled" train_fn = theano.function([index], train_output, updates=train_upd, givens={x: train_x[:, n_in * index: n_in * (index + 1)], y: train_x[:, (n_in * index + n_tree): (n_in * (index + 1) + 1)]}, name='train_fn') valid_fn = theano.function([index], valid_output, updates=valid_upd, givens={z: train_x[:, n_tree * index: n_tree * (index + 1)], alpha: train_x[:, (n_in * index + n_tree): (n_in * index + n_tree + valid_steps)]}, name='valid_fn') print "Train function compiled" # Compilation over ################# ## TRAIN MODEL ## ################# print 'The compilation time is', time.time() - t0 loss_list = [] for i in range(n_epochs): epoch_loss = 0 t1 = time.time() for idx in range(n_minibatch): print 'The current idx is ', idx,' and the epoch number is ', i output, loss_ = train_fn(idx)[:-1], train_fn(idx)[-1] if idx%500 == 0: v_output, v_loss = valid_fn(idx/500)[:-1][0], valid_fn(idx/500)[-1] print 'v_pred is', ' '.join([mappings_words[prediction(abc)] for abc in v_output]) print 'v_loss is', np.array(v_loss) print 'The loss is', loss_ epoch_loss +=loss_ loss_list.append(loss_) print '=='*20 print 'The mean loss for the epoch was', epoch_loss/float(n_minibatch) print 'Time taken by this epoch is', time.time()-t1 print '-'*50 pyplot.plot(loss_list) pyplot.show()
def get_cost_updates(self, corruption_level, learning_rate, sample_method, enc_function): """ This function computes the cost and the updates for one trainng step of the dA """ tilde_x = self.get_corrupted_input(self.x, corruption_level) y = self.get_hidden_values(tilde_x, enc_function) z = self.get_reconstructed_input(y, enc_function) L = T.fmatrices() # if only encoding but not sample if sample_method == -1: if self.error_type == 1: L = - T.sum(tilde_x * T.log(z) + (1 - tilde_x) * T.log(1 - z), axis=1) #square error, added by feng #print 'using' if self.error_type == 0: L = T.sum((tilde_x - z)**2, axis=1) else: sampled_x = self.get_sampled(tilde_x) sampled_z = self.get_sampled(z) #sampled version if self.error_type == 1: L = - T.sum(sampled_x * T.log(sampled_z) + (1 - sampled_x) * T.log(1 - sampled_z), axis=1) #square error, added by feng #print 'using' if self.error_type == 0: L = T.sum((sampled_x - sampled_z)**2, axis = 1) # note : L is now a vector, where each element is the # cross-entropy cost of the reconstruction of the # corresponding example of the minibatch. We need to # compute the average of all these to get the cost of # the minibatch cost = T.mean(L) # compute the gradients of the cost of the `dA` with respect # to its parameters gparams = T.grad(cost, self.params) # generate the list of updates updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(self.params, gparams) ] return (cost, updates)
def test_advinc_subtensor1(): """ Test the second case in the opt local_gpu_advanced_incsubtensor1 """ shared = cuda.shared_constructor # shared = tensor.shared xval = numpy.asarray([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype="float32") yval = numpy.asarray([[10, 10, 10], [10, 10, 10]], dtype="float32") x = shared(xval, name="x") y = T.fmatrices("y") expr = T.advanced_inc_subtensor1(x, y, [0, 2]) f = theano.function([y], expr, mode=mode_with_gpu) assert sum([isinstance(node.op, cuda.GpuAdvancedIncSubtensor1) for node in f.maker.env.toposort()]) == 1 assert numpy.allclose(f(yval), [[11.0, 12.0, 13.0], [4.0, 5.0, 6.0], [17.0, 18.0, 19.0]])
def test_set_subtensor(): shared = cuda.shared_constructor #shared = tensor.shared x,y = T.fmatrices('x','y') xval = numpy.asarray([[1,2,3], [4,5,6], [7,8,9]], dtype='float32') yval = numpy.asarray([[10,10,10], [10,10,10], [10,10,10]], dtype='float32') expr = T.set_subtensor(x[:,1:3], y[:,1:3]) f=theano.function([x,y], expr, mode=mode_with_gpu) assert sum([isinstance(node.op,cuda.GpuSubtensor) for node in f.maker.env.toposort() ])==1 assert sum([isinstance(node.op,cuda.GpuIncSubtensor) and node.op.set_instead_of_inc==True for node in f.maker.env.toposort() ])==1 print f(xval,yval)
def test_advinc_subtensor1(): """ Test the second case in the opt local_gpu_advanced_incsubtensor1 """ shared = cuda.shared_constructor #shared = tensor.shared xval = numpy.asarray([[1,2,3], [4,5,6], [7,8,9]], dtype='float32') yval = numpy.asarray([[10,10,10], [10,10,10]], dtype='float32') x = shared(xval, name = 'x') y = T.fmatrices('y') expr = T.advanced_inc_subtensor1(x,y,[0,2]) f=theano.function([y], expr, mode=mode_with_gpu) assert sum([isinstance(node.op,cuda.GpuAdvancedIncSubtensor1) for node in f.maker.env.toposort() ])==1 assert numpy.allclose(f(yval),[[11.,12.,13.], [4.,5.,6.], [17.,18.,19.]])
def test_abs_cost(): ySym,yhatSym = T.fmatrices('y','yhat') ac = theano.function([yhatSym,ySym], outputs=absoluteError(yhatSym,ySym)) yhat = np.asarray([[1],[2],[3]],dtype=theano.config.floatX) y = np.asarray([[1],[2],[3]],dtype=theano.config.floatX) assert np.abs(ac(yhat,y)) < 1e-5 yhat = np.asarray([[1],[2.1],[3]],dtype=theano.config.floatX) assert np.abs(ac(yhat,y) - 0.1/3) < 1e-5
def test_squared_error_cost(): ySym,yhatSym = T.fmatrices('y','yhat') sqerr = theano.function([yhatSym,ySym], outputs=squaredError(yhatSym,ySym)) yhat = np.asarray([[1],[2],[3]],dtype=theano.config.floatX) y = np.asarray([[1],[2],[3]],dtype=theano.config.floatX) assert np.abs(sqerr(yhat,y)) < 1e-5 yhat = np.asarray([[1],[2.1],[3]],dtype=theano.config.floatX) assert np.abs(sqerr(yhat,y) - 0.01/3) < 1e-5
def test_inc_subtensor(): shared = cuda.shared_constructor #shared = tensor.shared x, y = T.fmatrices('x', 'y') xval = numpy.asarray([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype='float32') yval = numpy.asarray([[10, 10, 10], [10, 10, 10], [10, 10, 10]], dtype='float32') expr = T.inc_subtensor(x[:, 1:3], y[:, 1:3]) f = theano.function([x, y], expr, mode=mode_with_gpu) assert sum([isinstance(node.op, cuda.GpuSubtensor) for node in f.maker.fgraph.toposort()]) == 1 assert sum([isinstance(node.op, cuda.GpuIncSubtensor) and node.op.set_instead_of_inc==False for node in f.maker.fgraph.toposort()]) == 1 assert numpy.allclose(f(xval, yval), [[1., 12., 13.], [4., 15., 16.], [7., 18., 19.]])
from theano import * import theano.tensor as T import numpy as np # Logistic function x = T.matrix('x', 'float32') op = 1 / (1 + T.exp(-x)) logistic = function([x], op) mat1 = [[0, 1], [-1, -2]] print(logistic(mat1)) # Multiple outputs a, b = T.fmatrices('a', 'b') diff = a - b absDiff = abs(diff) sqrDiff = diff**2 f = function([a, b], [diff, absDiff, sqrDiff]) mat2 = [[10, 5], [5, 10]] mat3 = [[5, 10], [10, 5]] print(f(mat2, mat3)) # Default values x, y = T.fscalars('x', 'y') z = x + y
def __init__(self, We_initial, char_embedd_table_initial, params): We = theano.shared(We_initial) # initial embedding for the InfNet We_inf = theano.shared(We_initial) embsize = We_initial.shape[1] hidden = params.hidden self.en_hidden_size = params.hidden_inf self.num_labels = 17 self.de_hidden_size = params.de_hidden_size char_embedd_dim = params.char_embedd_dim char_dic_size = len(params.char_dic) char_embedd_table = theano.shared(char_embedd_table_initial) char_embedd_table_inf = theano.shared(char_embedd_table_initial) input_var = T.imatrix(name='inputs') target_var = T.imatrix(name='targets') target_var_in = T.imatrix(name='targets') mask_var = T.fmatrix(name='masks') mask_var1 = T.fmatrix(name='masks1') char_input_var = T.itensor3(name='char-inputs') length = T.iscalar() length0 = T.iscalar() t_t = T.fscalar() t_t0 = T.fscalar() use_dropout = T.fscalar() use_dropout0 = T.fscalar() Wyy0 = np.random.uniform(-0.02, 0.02, (self.num_labels +1 , self.num_labels + 1)).astype('float32') Wyy = theano.shared(Wyy0) l_in_word = lasagne.layers.InputLayer((None, None)) l_mask_word = lasagne.layers.InputLayer(shape=(None, None)) if params.emb ==1: l_emb_word = lasagne.layers.EmbeddingLayer(l_in_word, input_size= We_initial.shape[0] , output_size = embsize, W =We, name='word_embedding') else: l_emb_word = lasagne_embedding_layer_2(l_in_word, embsize, We) layer_char_input = lasagne.layers.InputLayer(shape=(None, None, Max_Char_Length ), input_var=char_input_var, name='char-input') layer_char = lasagne.layers.reshape(layer_char_input, (-1, [2])) layer_char_embedding = lasagne.layers.EmbeddingLayer(layer_char, input_size=char_dic_size, output_size=char_embedd_dim, W=char_embedd_table, name='char_embedding') layer_char = lasagne.layers.DimshuffleLayer(layer_char_embedding, pattern=(0, 2, 1)) # first get some necessary dimensions or parameters conv_window = 3 num_filters = params.num_filters # construct convolution layer cnn_layer = lasagne.layers.Conv1DLayer(layer_char, num_filters=num_filters, filter_size=conv_window, pad='full', nonlinearity=lasagne.nonlinearities.tanh, name='cnn') # infer the pool size for pooling (pool size should go through all time step of cnn) _, _, pool_size = cnn_layer.output_shape # construct max pool layer pool_layer = lasagne.layers.MaxPool1DLayer(cnn_layer, pool_size=pool_size) # reshape the layer to match lstm incoming layer [batch * sent_length, num_filters, 1] --> [batch, sent_length, num_filters] output_cnn_layer = lasagne.layers.reshape(pool_layer, (-1, length, [1])) # finally, concatenate the two incoming layers together. incoming = lasagne.layers.concat([output_cnn_layer, l_emb_word], axis=2) l_lstm_wordf = lasagne.layers.LSTMLayer(incoming, hidden, mask_input=l_mask_word) l_lstm_wordb = lasagne.layers.LSTMLayer(incoming, hidden, mask_input=l_mask_word, backwards = True) concat = lasagne.layers.concat([l_lstm_wordf, l_lstm_wordb], axis=2) l_reshape_concat = lasagne.layers.ReshapeLayer(concat,(-1,2*hidden)) l_local = lasagne.layers.DenseLayer(l_reshape_concat, num_units= self.num_labels + 1, nonlinearity=lasagne.nonlinearities.linear) network_params = lasagne.layers.get_all_params(l_local, trainable=True) network_params.append(Wyy) print len(network_params) f = open('NER_BiLSTM_CNN_CRF_.Batchsize_10_dropout_1_LearningRate_0.005_0.0_50_hidden_200.pickle','r') data = pickle.load(f) f.close() for idx, p in enumerate(network_params): p.set_value(data[idx]) self.params = [] self.hos = [] self.Cos = [] self.encoder_lstm_layers = [] self.decoder_lstm_layers = [] self.lstm_layers_num = 1 ei, di, dt = T.imatrices(3) #place holders decoderInputs0 ,em, em1, dm, tf, di0 =T.fmatrices(6) ci = T.itensor3() #### the last one is for the stary symbole self.de_lookuptable = theano.shared(name="Decoder LookUpTable", value=init_xavier_uniform(self.num_labels +1, self.de_hidden_size), borrow=True) self.linear = theano.shared(name="Linear", value = init_xavier_uniform(self.de_hidden_size + 2*self.en_hidden_size, self.num_labels), borrow= True) self.linear_bias = theano.shared(name="Hidden to Bias", value=np.asarray(np.random.randn(self.num_labels, )*0., dtype=theano.config.floatX), borrow=True) #self.hidden_decode = theano.shared(name="Hidden to Decode", value= init_xavier_uniform(2*hidden, self.de_hidden_size), borrow = True) #self.hidden_bias = theano.shared( # name="Hidden to Bias", # value=np.asarray(np.random.randn(self.de_hidden_size, )*0., dtype=theano.config.floatX) , # borrow=True # ) input_var_shuffle = input_var.dimshuffle(1, 0) mask_var_shuffle = mask_var.dimshuffle(1, 0) target_var_in_shuffle = target_var_in.dimshuffle(1,0) target_var_shuffle = target_var.dimshuffle(1,0) self.params += [We_inf, self.linear, self.de_lookuptable, self.linear_bias] ######[batch, sent_length, embsize] state_below = We_inf[input_var_shuffle.flatten()].reshape((input_var_shuffle.shape[0], input_var_shuffle.shape[1], embsize)) ###### character word embedding layer_char_input_inf = lasagne.layers.InputLayer(shape=(None, None, Max_Char_Length ), input_var=char_input_var, name='char-input') layer_char_inf = lasagne.layers.reshape(layer_char_input_inf, (-1, [2])) layer_char_embedding_inf = lasagne.layers.EmbeddingLayer(layer_char_inf, input_size=char_dic_size, output_size=char_embedd_dim, W=char_embedd_table_inf, name='char_embedding_inf') layer_char_inf = lasagne.layers.DimshuffleLayer(layer_char_embedding_inf, pattern=(0, 2, 1)) #layer_char_inf = lasagne.layers.DropoutLayer(layer_char_inf, p=0.5) cnn_layer_inf = lasagne.layers.Conv1DLayer(layer_char_inf, num_filters=num_filters, filter_size=conv_window, pad='full', nonlinearity=lasagne.nonlinearities.tanh, name='cnn_inf') pool_layer_inf = lasagne.layers.MaxPool1DLayer(cnn_layer_inf, pool_size=pool_size) output_cnn_layer_inf = lasagne.layers.reshape(pool_layer_inf, (-1, length, [1])) char_params = lasagne.layers.get_all_params(output_cnn_layer_inf, trainable=True) self.params += char_params ###### [batch, sent_length, num_filters] #char_state_below = lasagne.layers.get_output(output_cnn_layer_inf, {layer_char_input_inf:char_input_var}) char_state_below = lasagne.layers.get_output(output_cnn_layer_inf) char_state_below = dropout_layer(char_state_below, use_dropout, trng) char_state_shuff = char_state_below.dimshuffle(1,0, 2) state_below = T.concatenate([state_below, char_state_shuff], axis=2) state_below = dropout_layer(state_below, use_dropout, trng) enclstm_f = LSTM(embsize+num_filters, self.en_hidden_size) enclstm_b = LSTM(embsize+num_filters, self.en_hidden_size, True) self.encoder_lstm_layers.append(enclstm_f) #append self.encoder_lstm_layers.append(enclstm_b) #append self.params += enclstm_f.params + enclstm_b.params #concatenate hs_f, Cs_f = enclstm_f.forward(state_below, mask_var_shuffle) hs_b, Cs_b = enclstm_b.forward(state_below, mask_var_shuffle) hs = T.concatenate([hs_f, hs_b], axis=2) Cs = T.concatenate([Cs_f, Cs_b], axis=2) hs0 = T.concatenate([hs_f[-1], hs_b[0]], axis=1) Cs0 = T.concatenate([Cs_f[-1], Cs_b[0]], axis=1) #self.hos += T.tanh(tensor.dot(hs0, self.hidden_decode) + self.hidden_bias), #self.Cos += T.tanh(tensor.dot(Cs0, self.hidden_decode) + self.hidden_bias), self.hos += T.alloc(np.asarray(0., dtype=theano.config.floatX), input_var_shuffle.shape[1], self.de_hidden_size), self.Cos += T.alloc(np.asarray(0., dtype=theano.config.floatX), input_var_shuffle.shape[1], self.de_hidden_size), Encoder = hs state_below = self.de_lookuptable[target_var_in_shuffle.flatten()].reshape((target_var_in_shuffle.shape[0], target_var_in_shuffle.shape[1], self.de_hidden_size)) for i in range(self.lstm_layers_num): declstm = LSTM(self.de_hidden_size, self.de_hidden_size) self.decoder_lstm_layers += declstm, #append self.params += declstm.params #concatenate ho, Co = self.hos[i], self.Cos[i] state_below, Cs = declstm.forward(state_below, mask_var_shuffle, ho, Co) decoder_lstm_outputs = T.concatenate([state_below, Encoder], axis=2) linear_outputs = T.dot(decoder_lstm_outputs, self.linear) + self.linear_bias[None, None, :] softmax_outputs, updates = theano.scan( fn=lambda x: T.nnet.softmax(x), sequences=[linear_outputs], ) def _NLL(pred, y, m): return -m * T.log(pred[T.arange(input_var.shape[0]), y]) """ costs, _ = theano.scan(fn=_NLL, sequences=[softmax_outputs, target_var_shuffle, mask_var_shuffle]) #loss = costs.sum() / mask_var.sum() + params.L2*sum(lasagne.regularization.l2(x) for x in self.params) loss = costs.sum() / mask_var.sum() updates = lasagne.updates.sgd(loss, self.params, self.eta) updates = lasagne.updates.apply_momentum(updates, self.params, momentum=0.9) ################################################### #### using the ground truth when training ################################################## self._train = theano.function( inputs=[ei, em, di, dm, dt], outputs=[loss, softmax_outputs], updates=updates, givens={input_var:ei, mask_var:em, target_var_in:di, decoderMask:dm, target_var:dt} ) """ def _step2(ctx_, state_, hs_, Cs_): hs, Cs = [], [] token_idxs = T.cast(state_.argmax(axis=-1), "int32" ) msk_ = T.fill( (T.zeros_like(token_idxs, dtype="float32")), 1.) msk_ = msk_.dimshuffle('x', 0) state_below0 = self.de_lookuptable[token_idxs].reshape((1, ctx_.shape[0], self.de_hidden_size)) for i, lstm in enumerate(self.decoder_lstm_layers): h, C = lstm.forward(state_below0, msk_, hs_[i], Cs_[i]) #mind msk hs += h[-1], Cs += C[-1], state_below0 = h hs, Cs = T.as_tensor_variable(hs), T.as_tensor_variable(Cs) state_below0 = state_below0.reshape((ctx_.shape[0], self.de_hidden_size)) state_below0 = T.concatenate([ctx_, state_below0], axis =1) newpred = T.dot(state_below0, self.linear) + self.linear_bias[None, :] state_below = T.nnet.softmax(newpred) ##### the beging symbole probablity is 0 extra_p = T.zeros_like(hs[:,:,0]) state_below = T.concatenate([state_below, extra_p.T], axis=1) return state_below, hs, Cs hs0, Cs0 = T.as_tensor_variable(self.hos, name="hs0"), T.as_tensor_variable(self.Cos, name="Cs0") train_outputs, _ = theano.scan( fn=_step2, sequences = [Encoder], outputs_info=[decoderInputs0, hs0, Cs0], n_steps=input_var_shuffle.shape[0] ) predy = train_outputs[0].dimshuffle(1, 0 , 2) predy = predy[:,:,:-1]*mask_var[:,:,None] predy0 = predy.reshape((-1, 17)) def inner_function( targets_one_step, mask_one_step, prev_label, tg_energy): """ :param targets_one_step: [batch_size, t] :param prev_label: [batch_size, t] :param tg_energy: [batch_size] :return: """ new_ta_energy = T.dot(prev_label, Wyy[:-1,:-1]) new_ta_energy_t = tg_energy + T.sum(new_ta_energy*targets_one_step, axis =1) tg_energy_t = T.switch(mask_one_step, new_ta_energy_t, tg_energy) return [targets_one_step, tg_energy_t] local_energy = lasagne.layers.get_output(l_local, {l_in_word: input_var, l_mask_word: mask_var, layer_char_input:char_input_var}) local_energy = local_energy.reshape((-1, length, 17)) local_energy = local_energy*mask_var[:,:,None] ##################### # for the end symbole of a sequence #################### end_term = Wyy[:-1,-1] local_energy = local_energy + end_term.dimshuffle('x', 'x', 0)*mask_var1[:,:, None] #predy0 = lasagne.layers.get_output(l_local_a, {l_in_word_a:input_var, l_mask_word_a:mask_var}) predy_in = T.argmax(predy0, axis=1) A = T.extra_ops.to_one_hot(predy_in, 17) A = A.reshape((-1, length, 17)) #predy = predy0.reshape((-1, length, 25)) #predy = predy*mask_var[:,:,None] targets_shuffled = predy.dimshuffle(1, 0, 2) target_time0 = targets_shuffled[0] masks_shuffled = mask_var.dimshuffle(1, 0) initial_energy0 = T.dot(target_time0, Wyy[-1,:-1]) initials = [target_time0, initial_energy0] [ _, target_energies], _ = theano.scan(fn=inner_function, outputs_info=initials, sequences=[targets_shuffled[1:], masks_shuffled[1:]]) cost11 = target_energies[-1] + T.sum(T.sum(local_energy*predy, axis=2)*mask_var, axis=1) cost = T.mean(-cost11) from momentum import momentum updates_a = momentum(cost, self.params, params.eta, momentum=0.9) self.train_fn = theano.function( inputs=[ei, ci, em, em1, length0, di0, use_dropout0], outputs=[cost], updates=updates_a, on_unused_input='ignore', givens={input_var:ei, char_input_var:ci, mask_var:em, mask_var1:em1, length: length0, decoderInputs0:di0, use_dropout:use_dropout0} ) prediction = T.argmax(predy, axis=2) corr = T.eq(prediction, target_var) corr_train = (corr * mask_var).sum(dtype=theano.config.floatX) num_tokens = mask_var.sum(dtype=theano.config.floatX) self.eval_fn = theano.function( inputs=[ei, ci, em, em1, length0, di0, use_dropout0], outputs=[prediction, -cost11], on_unused_input='ignore', givens={input_var:ei, char_input_var:ci, mask_var:em, mask_var1:em1, length: length0, decoderInputs0:di0, use_dropout:use_dropout0} )
from theano import * import theano.tensor as T import numpy as np # Logistic function x = T.matrix('x', 'float32') op = 1 / (1 + T.exp(-x)) logistic = function([x], op) mat1 = [[0, 1], [-1, -2]] print(logistic(mat1)) # Multiple outputs a, b = T.fmatrices('a', 'b') diff = a - b absDiff = abs(diff) sqrDiff = diff ** 2 f = function([a, b], [diff, absDiff, sqrDiff]) mat2 = [[10, 5], [5, 10]] mat3 = [[5, 10], [10, 5]] print(f(mat2, mat3)) # Default values x, y = T.fscalars('x', 'y') z = x + y
def __init__(self, steps = 1, num_layers = 2, num_units = 32, eps = 1e-2, alpha = 1e-2, beta = 2.0, kappa = 0.0): lam = alpha * alpha * (steps + kappa) - steps + beta self.X, self.Z = T.fvectors('X','Z') self.P, self.Q, self.R = T.fmatrices('P','Q','R') self.dt = T.scalar('dt') sqrtm = MatrixSqrt() self.matrix_inv = T.nlinalg.MatrixInverse() self.ar = AutoRegressiveModel(steps = steps, num_layers = num_layers, num_units = num_units, eps = eps) def weighted_mean(A,w): mu = T.zeros((steps, 1)) for i in range(2 * steps + 1): mu += w[i] * A[:,i:i+1] return mu def weighted_covariance(A,B,a,b,w): sigma = T.zeros((steps,steps)) for i in range(2 * steps + 1): sigma += w[i] * T.dot((A[:,i:i+1] - a), (B[:,i:i+1] - b).T) return sigma self.sqrtP = sqrtm(self.P) self.XB = T.dot(T.stack(self.X).T, T.ones((1, 2 * steps +1))) + T.concatenate([T.zeros((steps,1)), T.sqrt(steps + lam) * self.sqrtP, -T.sqrt(steps + lam) * self.sqrtP], axis=1) l = InputLayer(input_var = self.XB.T, shape = (2 * steps + 1, steps)) l = self.ar.network(l) l = ReshapeLayer(l, shape=(1, 2 * steps + 1)) self.l_ = l self.f_ = get_output(self.l_) self.XC = T.concatenate([self.f_, T.dot(T.eye(steps)[:-1], self.XB)], axis=0) W_m = T.concatenate([(lam / (steps + lam)) * T.ones(1), (1.0 / (2.0 * (steps + lam))) * T.ones(2 * steps)], axis=0) W_c = T.concatenate([(lam / (steps + lam) + (1.0 - alpha * alpha + beta)) * T.ones(1), (1.0 / (2.0 * (steps + lam))) * T.ones(2 * steps)], axis=0) self.X_ = weighted_mean(self.XC, W_m) self.P_ = weighted_covariance(self.XC, self.XC, self.X_, self.X_, W_c) + \ T.dot(T.dot(T.eye(steps)[:,0:1], self.dt * self.Q), T.eye(steps)[0:1,:]) self.ZB = T.dot(T.eye(steps)[0:1,:], self.XC) self.Z_ = weighted_mean(self.ZB, W_m) self.S = weighted_covariance(self.ZB, self.ZB, self.Z_, self.Z_, W_c) + self.R self.K = T.dot(weighted_covariance(self.XC, self.ZB, self.X_, self.Z_, W_c), self.matrix_inv(self.S)) self.X__ = self.X_ + T.dot(self.K, self.Z - self.Z_) self.P__ = self.P_ - T.dot(T.dot(self.K, self.S), self.K.T) self.prediction = theano.function(inputs = [self.X, self.P, self.Q, self.dt], outputs = [self.X_, self.P_], allow_input_downcast = True) self.update = theano.function(inputs = [self.X, self.Z, self.P, self.Q, self.R, self.dt], outputs = [self.X__, self.P__], allow_input_downcast = True)
def exp(__lr): max_epochs, batch_size, n_batches = 1000, 100, 500 # = 50000/100 nX, nH1, nH2 = 784, 1000, 100 W1 = rand_ortho((nX, nH1), np.sqrt(6. / (nX + nH1))) B1 = zeros((nH1, )) W2 = rand_ortho((nH1, nH2), np.sqrt(6. / (nH1 + nH2))) B2 = zeros((nH2, )) V1 = rand_ortho((nH1, nX), np.sqrt(6. / (nH1 + nX))) C1 = zeros((nX, )) V2 = rand_ortho((nH2, nH1), np.sqrt(6. / (nH2 + nH1))) C2 = zeros((nH1, )) # layer definitions - functions of layers F1 = lambda x: softplus(T.dot(x, W1) + B1) G1 = lambda h1: sigm(T.dot(h1, V1) + C1) F2 = lambda h1: sigm(T.dot(h1, W2) + B2) G2 = lambda h2: softplus(T.dot(h2, V2) + C2) i, e = T.lscalar(), T.fscalar() X, Y = T.fmatrices(2) givens_train = lambda i: { X: train_x[i * batch_size:(i + 1) * batch_size], Y: train_y[i * batch_size:(i + 1) * batch_size] } givens_valid, givens_test = { X: valid_x, Y: valid_y }, { X: test_x, Y: test_y } givens_empty = { X: sharedX(np.zeros((10000, 784))), Y: sharedX(np.zeros((10000, 10))) } def iteration(X, k, alpha, beta=0.01): # infer h1 and h2 from x H1 = F1(X) H2 = F2(H1) for i in range(k): H2 = H2 + alpha * (F2(H1) - F2(G2(H2))) H1 = H1 + alpha * (F1(X) - F1(G1(H1))) + alpha * beta * (G2(H2) - H1) return H1, H2 H1, H2 = F1(X), F2(F1(X)) H1_, H2_ = iteration(X, 15, 0.1) def avg_bin(x, k): # average of sampled random binary values S = 0. * x for i in range(k): S = S + samp(x) return S / k # get gradients g_V1, g_C1 = T.grad(mse(G1(gaussian(H1_, 0.3)), X), [V1, C1], consider_constant=[H1_, X]) g_W1, g_B1 = T.grad(mse(F1(gaussian(X, 0.5)), H1_), [W1, B1], consider_constant=[X, H1_]) g_V2, g_C2 = T.grad(mse(G2(avg_bin(H2_, 3)), H1_), [V2, C2], consider_constant=[H2_, H1_]) g_W2, g_B2 = T.grad(mse(F2(gaussian(H1_, 0.5)), H2_), [W2, B2], consider_constant=[H1_, H2_]) cost = mse(G1(G2(F2(F1(X)))), X) # training train_sync = theano.function([i, e], [cost], givens=givens_train(i), on_unused_input='ignore', updates=rms_prop( { W1: g_W1, B1: g_B1, V1: g_V1, C1: g_C1, W2: g_W2, B2: g_B2, V2: g_V2, C2: g_C2 }, __lr)) def get_samples(): # get samples from the model X, Y = T.fmatrices(2) givens_train_samples = {X: train_x[0:50000], Y: train_y[0:50000]} H1, H2 = iteration(X, 15, 0.1) # get prior statistics (100 mean and std) H2_mean = T.mean(H2, axis=0) H2_std = T.std(H2, axis=0) # sampling h2 from prior H2_ = RNG.normal((10000, 100), avg=H2_mean, std=4 * H2_std, ndim=None, dtype=H2.dtype, nstreams=None) # iterative sampling from samples h2 X_ = G1(G2(H2_)) for i in range(3): H1_, H2_ = iteration(X_, 15, 0.1, 3) X_ = G1(H1_) #H1_, H2_ = iteration(X_, 1, 0.1, 3) #X_ = G1(H1_) sampling = theano.function([], X_, on_unused_input='ignore', givens=givens_train_samples) samples = sampling() np.save('samples', samples) return samples # get test log-likelihood def test_ll(sigma): samples = get_samples() return get_ll(np_test_x, theano_parzen(samples, sigma), batch_size=10) test_cost = theano.function([i, e], [cost], on_unused_input='ignore', givens=givens_test) print('epochs test_loglikelihood time') # training loop t = time.time() monitor = { 'train': [], 'valid': [], 'test': [], 'test_ll': [], 'test_ll_base': [] } for e in range(1, max_epochs + 1): monitor['train'].append( np.array([train_sync(i, e) for i in range(n_batches)]).mean(axis=0)) if e % 5 == 0: monitor['test'].append(test_cost(0, 0)) monitor['test_ll'].append(np.mean(test_ll(0.2))) print(e, monitor['test_ll'][-1], time.time() - t)
# Input Layer l_in = InputLayer((batch_size, n_in), input_var=input_var) # Recurrent EI Net l_in_hid = DenseLayer(l_in, n_hid, nonlinearity=lasagne.nonlinearities.rectify) # Output Layer l_shp = ReshapeLayer(l_in_hid, (-1, n_hid)) l_dense = DenseLayer(l_shp, num_units=n_out, nonlinearity=lasagne.nonlinearities.sigmoid) # To reshape back to our original shape, we can use the symbolic shape variables we retrieved above. l_out = ReshapeLayer(l_dense, (batch_size, n_out)) return l_out, l_in_hid if __name__ == '__main__': # Define the input and expected output variable input_var, target_var = T.fmatrices('input', 'target') # The generator to sample examples from tr_cond = 'two_gains' test_cond = 'all_gains' generator = CausalInferenceTaskFFWD(max_iter=250001, batch_size=100, n_in=50, n_out=1, sigma_sq=100.0, tr_cond=tr_cond) test_generator = CausalInferenceTaskFFWD(max_iter=2501, batch_size=100, n_in=50, n_out=1, sigma_sq=100.0, tr_cond=test_cond) l_out, l_rec = model(input_var, batch_size=generator.batch_size, n_in=2*generator.n_in, n_out=generator.n_out, n_hid=200) # The generated output variable and the loss function # all_layers = lasagne.layers.get_all_layers(l_out) # l2_penalty = lasagne.regularization.regularize_layer_params(all_layers, lasagne.regularization.l2) * 1e-6 pred_var = T.clip(lasagne.layers.get_output(l_out), 1e-6, 1. - 1e-6) loss = T.mean(lasagne.objectives.squared_error(pred_var, target_var)) # + l2_penalty
def RnnEvaluator(weights): """Build a Theano function that computes the internal state of the network when called. """ numInputs = 0 numOutputs = 0 for neurons, activator, isInput, isOutput, weightFrame in weights: if isInput: numInputs += 1 if isOutput: numOutputs += 1 def evaluate_net(*states): activations = T.fvectors(len(weights)) idx = 0 for neurons, activator, isInput, isOutput, weightFrame in weights: sumParts = [] for i, info in enumerate(weightFrame): srcIdx, w = info sumParts.append(T.dot(states[srcIdx], w.transpose())) if len(sumParts): sumParts = T.stack(*sumParts) activity = T.sum(sumParts, axis=0) if activator == TIDENTITY: activation = activity elif activator == TLOGISTIC: activation = 1. / (1. + T.exp(-activity)) elif activator == THYPERBOLIC: activation = T.tanh(activity) elif activator == TTHRESHOLD: activation = T.sgn(activity) elif activator == TBIAS: activation = T.ones_like(activity, dtype='float32') elif activator == TRADIAL: activation = T.exp(-activity*activity/2.0) else: raise Exception("Unknown activation function for layer {0}" + layer.id) else: activation = T.zeros_like(states[idx])#states[idx] activations[idx] = activation idx += 1 checklist = [T.all(T.eq(a,s)) for a,s in zip(activations, states)] condition = T.all(T.as_tensor_variable(checklist)) return activations, {}, theano.scan_module.until(condition ) def make_states(*inputs): states = [] idx = 0 numPoints = len(inputs) and inputs[0].shape[0] or 1 for neurons, activator, isInput, isOutput, weightFrame in weights: if isInput: states.append(inputs[idx]) idx += 1 else: states.append(T.ones((numPoints,neurons), dtype='float32')) return states def project_output(states): outputs = [] idx = 0 for neurons, activator, isInput, isOutput, weightFrame in weights: if isOutput: outputs.append(states[idx]) idx += 1 return outputs inputs = T.fmatrices(numInputs) times = T.iscalar() netValue, updates = theano.scan( fn=evaluate_net, outputs_info=make_states(*inputs), n_steps=times ) result = [n[-1] for n in netValue] outputs = project_output(result) net = theano.function(inputs + [times], outputs) def fix_inputs(inputs, times=5): reshape = False if len(inputs) and (len(np.shape(inputs[0])) == 1): reshape = True inputs = [np.reshape(i, (1,i.shape[0])) for i in inputs] args = list(inputs) + [times] outputs = net(*args) if reshape: return [o[0] for o in outputs] return outputs return fix_inputs
def __init__(self, We_initial, params): self.textfile = open(params.outfile, 'w') We = theano.shared(We_initial) embsize = We_initial.shape[1] hidden = params.hidden self.num_labels = params.num_labels self.de_hidden_size = params.de_hidden_size self.en_hidden_size = params.en_hidden_size print params.de_hidden_size, hidden, params.num_labels self.lstm_layers_num = 1 input_var = T.imatrix(name='inputs') target_var = T.imatrix(name='targets') target_var_in = T.imatrix(name='in_targets') mask_var = T.fmatrix(name='masks') mask_var1 = T.fmatrix(name='masks1') length = T.iscalar() length0 = T.iscalar() t_t = T.fscalar() t_t0 = T.fscalar() Wyy0 = np.random.uniform( -0.02, 0.02, (self.num_labels + 1, self.num_labels + 1)).astype('float32') Wyy = theano.shared(Wyy0) l_in_word = lasagne.layers.InputLayer((None, None)) l_mask_word = lasagne.layers.InputLayer(shape=(None, None)) if params.emb == 1: l_emb_word = lasagne.layers.EmbeddingLayer( l_in_word, input_size=We_initial.shape[0], output_size=embsize, W=We) else: l_emb_word = lasagne_embedding_layer_2(l_in_word, embsize, We) l_lstm_wordf = lasagne.layers.LSTMLayer(l_emb_word, 512, mask_input=l_mask_word) l_lstm_wordb = lasagne.layers.LSTMLayer(l_emb_word, 512, mask_input=l_mask_word, backwards=True) concat = lasagne.layers.concat([l_lstm_wordf, l_lstm_wordb], axis=2) l_reshape_concat = lasagne.layers.ReshapeLayer(concat, (-1, 2 * 512)) l_local = lasagne.layers.DenseLayer( l_reshape_concat, num_units=self.num_labels, nonlinearity=lasagne.nonlinearities.linear) network_params = lasagne.layers.get_all_params(l_local, trainable=True) network_params.append(Wyy) print len(network_params) f = open( 'ccctag_CRF_Bilstm_Viterbi_.Batchsize_10_dropout_0_LearningRate_0.01_0.0512_tagversoin_2.pickle', 'r') data = pickle.load(f) f.close() for idx, p in enumerate(network_params): #print data[idx].shape p.set_value(data[idx]) self.params = [] self.hos = [] self.Cos = [] self.encoder_lstm_layers = [] self.decoder_lstm_layers = [] ei, di, dt = T.imatrices(3) #place holders decoderInputs0, em, em1, dm, tf, di0 = T.fmatrices(6) #### the last one is for the stary symbole self.de_lookuptable = theano.shared(name="Decoder LookUpTable", value=init_xavier_uniform( self.num_labels + 1, self.de_hidden_size), borrow=True) self.linear = theano.shared( name="Linear", value=init_xavier_uniform( self.de_hidden_size + 2 * self.en_hidden_size, self.num_labels), borrow=True) self.linear_bias = theano.shared( name="Hidden to Bias", value=np.asarray(np.random.randn(self.num_labels, ) * 0., dtype=theano.config.floatX), borrow=True) #self.hidden_decode = theano.shared(name="Hidden to Decode", value= init_xavier_uniform(2*hidden, self.de_hidden_size), borrow = True) #self.hidden_bias = theano.shared( # name="Hidden to Bias", # value=np.asarray(np.random.randn(self.de_hidden_size, )*0., dtype=theano.config.floatX) , # borrow=True # ) input_var_shuffle = input_var.dimshuffle(1, 0) mask_var_shuffle = mask_var.dimshuffle(1, 0) target_var_in_shuffle = target_var_in.dimshuffle(1, 0) target_var_shuffle = target_var.dimshuffle(1, 0) self.params += [self.linear, self.linear_bias, self.de_lookuptable] #concatenate state_below = We[input_var_shuffle.flatten()].reshape( (input_var_shuffle.shape[0], input_var_shuffle.shape[1], embsize)) enclstm_f = LSTM(embsize, self.en_hidden_size) enclstm_b = LSTM(embsize, self.en_hidden_size, True) self.encoder_lstm_layers.append(enclstm_f) #append self.encoder_lstm_layers.append(enclstm_b) #append self.params += enclstm_f.params + enclstm_b.params #concatenate hs_f, Cs_f = enclstm_f.forward(state_below, mask_var_shuffle) hs_b, Cs_b = enclstm_b.forward(state_below, mask_var_shuffle) hs = T.concatenate([hs_f, hs_b], axis=2) Cs = T.concatenate([Cs_f, Cs_b], axis=2) hs0 = T.concatenate([hs_f[-1], hs_b[0]], axis=1) Cs0 = T.concatenate([Cs_f[-1], Cs_b[0]], axis=1) #self.hos += T.tanh(tensor.dot(hs0, self.hidden_decode) + self.hidden_bias), #self.Cos += T.tanh(tensor.dot(Cs0, self.hidden_decode) + self.hidden_bias), self.hos += T.alloc(np.asarray(0., dtype=theano.config.floatX), input_var_shuffle.shape[1], self.de_hidden_size), self.Cos += T.alloc(np.asarray(0., dtype=theano.config.floatX), input_var_shuffle.shape[1], self.de_hidden_size), Encoder = hs ei, di, dt = T.imatrices(3) #place holders em, dm, tf, di0 = T.fmatrices(4) self.encoder_function = theano.function(inputs=[ei, em], outputs=Encoder, givens={ input_var: ei, mask_var: em }) state_below = self.de_lookuptable[ target_var_in_shuffle.flatten()].reshape( (target_var_in_shuffle.shape[0], target_var_in_shuffle.shape[1], self.de_hidden_size)) for i in range(self.lstm_layers_num): declstm = LSTM(self.de_hidden_size, self.de_hidden_size) self.decoder_lstm_layers += declstm, #append self.params += declstm.params #concatenate ho, Co = self.hos[i], self.Cos[i] state_below, Cs = declstm.forward(state_below, mask_var_shuffle, ho, Co) decoder_lstm_outputs = T.concatenate([Encoder, state_below], axis=2) linear_outputs = T.dot(decoder_lstm_outputs, self.linear) + self.linear_bias[None, None, :] softmax_outputs, updates = theano.scan( fn=lambda x: T.nnet.softmax(x), sequences=[linear_outputs], ) def _NLL(pred, y, m): return -m * T.log(pred[T.arange(input_var.shape[0]), y]) def _step2(ctx_, state_, hs_, Cs_): #print ctx_.shape, state_.shape, hs_.shape, Cs_.shape hs, Cs = [], [] token_idxs = T.cast(state_.argmax(axis=-1), "int32") msk_ = T.fill((T.zeros_like(token_idxs, dtype="float32")), 1) msk_ = msk_.dimshuffle('x', 0) state_below0 = self.de_lookuptable[token_idxs].reshape( (1, ctx_.shape[0], self.de_hidden_size)) for i, lstm in enumerate(self.decoder_lstm_layers): h, C = lstm.forward(state_below0, msk_, hs_[i], Cs_[i]) #mind msk hs += h[-1], Cs += C[-1], state_below0 = h hs, Cs = T.as_tensor_variable(hs), T.as_tensor_variable(Cs) state_below0 = state_below0.reshape( (ctx_.shape[0], self.de_hidden_size)) state_below0 = T.concatenate([ctx_, state_below0], axis=1) newpred = T.dot(state_below0, self.linear) + self.linear_bias[None, :] state_below = T.nnet.softmax(newpred) extra_p = T.zeros_like(hs[:, :, 0]) state_below = T.concatenate([state_below, extra_p.T], axis=1) return state_below, hs, Cs ctx_0, state_0 = T.fmatrices(2) hs_0 = T.ftensor3() Cs_0 = T.ftensor3() state_below_tmp, hs_tmp, Cs_tmp = _step2(ctx_0, state_0, hs_0, Cs_0) self.f_next = theano.function([ctx_0, state_0, hs_0, Cs_0], [state_below_tmp, hs_tmp, Cs_tmp], name='f_next') hs0, Cs0 = T.as_tensor_variable( self.hos, name="hs0"), T.as_tensor_variable(self.Cos, name="Cs0") train_outputs, _ = theano.scan(fn=_step2, sequences=[Encoder], outputs_info=[decoderInputs0, hs0, Cs0], n_steps=input_var_shuffle.shape[0]) predy = train_outputs[0].dimshuffle(1, 0, 2) predy = predy[:, :, :-1] * mask_var[:, :, None] predy0 = predy.reshape((-1, self.num_labels)) def inner_function(targets_one_step, mask_one_step, prev_label, tg_energy): """ :param targets_one_step: [batch_size, t] :param prev_label: [batch_size, t] :param tg_energy: [batch_size] :return: """ new_ta_energy = T.dot(prev_label, Wyy[:-1, :-1]) new_ta_energy_t = tg_energy + T.sum( new_ta_energy * targets_one_step, axis=1) tg_energy_t = T.switch(mask_one_step, new_ta_energy_t, tg_energy) return [targets_one_step, tg_energy_t] local_energy = lasagne.layers.get_output(l_local, { l_in_word: input_var, l_mask_word: mask_var }) local_energy = local_energy.reshape((-1, length, self.num_labels)) local_energy = local_energy * mask_var[:, :, None] ##################### # for the end symbole of a sequence #################### end_term = Wyy[:-1, -1] local_energy = local_energy + end_term.dimshuffle( 'x', 'x', 0) * mask_var1[:, :, None] #predy0 = lasagne.layers.get_output(l_local_a, {l_in_word_a:input_var, l_mask_word_a:mask_var}) predy_in = T.argmax(predy0, axis=1) A = T.extra_ops.to_one_hot(predy_in, self.num_labels) A = A.reshape((-1, length, self.num_labels)) #predy = predy0.reshape((-1, length, 25)) #predy = predy*mask_var[:,:,None] targets_shuffled = predy.dimshuffle(1, 0, 2) target_time0 = targets_shuffled[0] masks_shuffled = mask_var.dimshuffle(1, 0) initial_energy0 = T.dot(target_time0, Wyy[-1, :-1]) initials = [target_time0, initial_energy0] [_, target_energies], _ = theano.scan( fn=inner_function, outputs_info=initials, sequences=[targets_shuffled[1:], masks_shuffled[1:]]) cost11 = target_energies[-1] + T.sum( T.sum(local_energy * predy, axis=2) * mask_var, axis=1) # compute the ground-truth energy targets_shuffled0 = A.dimshuffle(1, 0, 2) target_time00 = targets_shuffled0[0] initial_energy00 = T.dot(target_time00, Wyy[-1, :-1]) initials0 = [target_time00, initial_energy00] [_, target_energies0], _ = theano.scan( fn=inner_function, outputs_info=initials0, sequences=[targets_shuffled0[1:], masks_shuffled[1:]]) cost110 = target_energies0[-1] + T.sum( T.sum(local_energy * A, axis=2) * mask_var, axis=1) #predy_f = predy.reshape((-1, 25)) y_f = target_var.flatten() if (params.annealing == 0): lamb = params.L3 elif (params.annealing == 1): lamb = params.L3 * (1 - 0.01 * t_t) if (params.regutype == 0): ce_hinge = lasagne.objectives.categorical_crossentropy( predy0 + eps, y_f) ce_hinge = ce_hinge.reshape((-1, length)) ce_hinge = T.sum(ce_hinge * mask_var, axis=1) cost = T.mean(-cost11) + lamb * T.mean(ce_hinge) else: entropy_term = -T.sum(predy0 * T.log(predy0 + eps), axis=1) entropy_term = entropy_term.reshape((-1, length)) entropy_term = T.sum(entropy_term * mask_var, axis=1) cost = T.mean(-cost11) - lamb * T.mean(entropy_term) """ f = open('F0_simple.pickle') PARA = pickle.load(f) f.close() l2_term = sum(lasagne.regularization.l2(x-PARA[index]) for index, x in enumerate(a_params)) cost = T.mean(-cost11) + params.L2*l2_term """ ##from adam import adam ##updates_a = adam(cost, self.params, params.eta) #updates_a = lasagne.updates.sgd(cost, self.params, params.eta) #updates_a = lasagne.updates.apply_momentum(updates_a, self.params, momentum=0.9) from momentum import momentum updates_a = momentum(cost, self.params, params.eta, momentum=0.9) if (params.regutype == 0): self.train_fn = theano.function( inputs=[ei, dt, em, em1, length0, t_t0, di0], outputs=[cost, ce_hinge], updates=updates_a, on_unused_input='ignore', givens={ input_var: ei, target_var: dt, mask_var: em, mask_var1: em1, length: length0, t_t: t_t0, decoderInputs0: di0 }) #self.train_fn = theano.function([input_var, target_var, mask_var, mask_var1, length, t_t], [cost, ce_hinge], updates = updates_a, on_unused_input='ignore') else: self.train_fn = theano.function( inputs=[ei, dt, em, em1, length0, t_t0, di0], outputs=[cost, entropy_term], updates=updates_a, on_unused_input='ignore', givens={ input_var: ei, target_var: dt, mask_var: em, mask_var1: em1, length: length0, t_t: t_t0, decoderInputs0: di0 }) #self.train_fn = theano.function([input_var, target_var, mask_var, mask_var1, length, t_t], [cost, entropy_term], updates = updates_a, on_unused_input='ignore') prediction = T.argmax(predy, axis=2) corr = T.eq(prediction, target_var) corr_train = (corr * mask_var).sum(dtype=theano.config.floatX) num_tokens = mask_var.sum(dtype=theano.config.floatX) self.eval_fn = theano.function( inputs=[ei, dt, em, em1, length0, di0], outputs=[cost11, cost110, corr_train, num_tokens, prediction], on_unused_input='ignore', givens={ input_var: ei, target_var: dt, mask_var: em, mask_var1: em1, length: length0, decoderInputs0: di0 })
def __init__(self,Ne,Ni,n_inp,W_inp=None,W_inner=None): '''class SNNgroup's self Parameters: self.A: update matrix self.S: neuron state varaibles self.W_inner: inner-connect weights in the group self.W_inp: input weights self.spikes: the spikes matrix in the time t self.SpkC : spike containers input : ''' self.number = Ne+Ni self.Ne = Ne self.Ni = Ni self.mV=self.ms=1e-3 # units dt=1*self.ms # timestep self.dt = dt taum=20*self.ms # membrane time constant taue=5*self.ms taui=10*self.ms #self.Vt=-1*self.mV # threshold = -50+49 self.Vt = 15*self.mV #threshold = -55+70 #self.Vr=-11*self.mV # reset = -60+49 self.Vr = 0*self.mV # reset = -70+70 self.Vi = -10*self.mV # VI = -80+70 self.dApre = .0001 #self.dApre = .95 #changed into .95 self.dApost = -self.dApre*1.05 self.tauP = 20*self.ms #self.input = input self.n_inp = n_inp self.weight = .001 self.weightIn = 1. self.wmax = 200*self.weight zero = np.array([0]).astype(theano.config.floatX) self.zero = theano.shared(zero,name='zero',borrow=True) """ Equations --------- eqs=''' dv/dt = (ge*70mV-gi*10-(v+70*mV))/(20*ms) : volt dge/dt = -ge/(5*self.ms) : volt dgi/dt = -gi/(10*self.ms) : volt ''' """ # Update matrix A = np.array([[np.exp(-dt/taum),0,0], [taue/(taum-taue)*(np.exp(-dt/taum)-np.exp(-dt/taue)),np.exp(-dt/taue),0], [-taui/(taum-taui)*(np.exp(-dt/taum)-np.exp(-dt/taui)),0,np.exp(-dt/taui)] ],dtype=theano.config.floatX).T A = theano.shared(value=A,name='A',borrow=True) self.A = A # State varible : [v;ge;gi] (size=3*self.number) S = np.ones((1,self.number),dtype=theano.config.floatX)*self.Vr S = np.vstack((S,np.zeros((2,self.number),dtype=theano.config.floatX))) self.S_init = S S = theano.shared(value=S,name='S',borrow=True) self.S = S if W_inner == None: # weights of inner connections (size= self.number*self.number) self.W_inner_ini = np.ones((self.number,self.number),dtype=theano.config.floatX)*self.weight #self.W_inner_ini[Ne:,:] = self.weightIn self.W_inner_ini[Ne:,:] = self.weight wtmp = np.eye(self.number) ind = wtmp.nonzero() self.W_inner_ini[ind]=0 W_inner = theano.shared(value=self.W_inner_ini,name='W_inner',borrow=True) self.W_inner = W_inner else: self.W_inner = theano.shared(W_inner,name='W_inner',borrow=True) # weights of input connections (size=n_inp*self.number rng = np.random.RandomState(1234) if W_inp ==None: #W_inp = np.ones((self.n_inp,self.number)).astype(theano.config.floatX) #needs specification later #W_inp = np.random.rand(self.n_inp,self.number).astype(theano.config.floatX)*.00001*self.ms #needs specification later self.W_inp_ini = np.ones((self.n_inp,self.number)).astype(theano.config.floatX)*self.weight self.W_inp_ini[:,self.Ne:] = self.weightIn W_inp = theano.shared(self.W_inp_ini,name='W_inp',borrow=True) self.W_inp = W_inp else: self.W_inp = theano.shared(W_inp,name='W_inp',borrow=True) # Spike Container #spkC = theano.shared(value=np.empty((1,self.number)).astype(theano.config.floatX),name='spkC',borrow=True) spkC = np.empty((1,self.number)).astype(theano.config.floatX) self.spkC = spkC #spikes=np.empty((self.number,1),dtype=theano.config.floatX) #self.spikes = theano.shared(value=spikes,name='spikes',borrow=True) # not sure the dtype of sp_history self.sp_history = np.array([]) #output = np.empty(self.number,dtype=theano.config.floatX) #self.output = theano.shared(value=output,name='output',borrow=True) self.V_record = np.empty((1,self.number)) self.ge_record = np.empty((1,self.number)) self.gi_record = np.empty((1,self.number)) #================================================ # Process Function Initial # input:: 0-1 vector '''Update Schedule: 1.Update state variables of SNNgroup: dot(A,S) 1.Update state variables of Synapses: dot(exp(-dt/tau),Ssynapse), including W_inp and W_inner 2.Call thresholding function: S[0,:]>Vt 3.Push spikes into SpikeContainer 4.Propagate spikes via Connection(possibly with delays) 5.Update state variables of Synapses (STDP) 6.Call reset function on neurons which has spiked''' Ne = self.Ne Ni = self.Ni m = T.fmatrix(name='m') #self.Vt = T.as_tensor_variable(self.Vt,'Vt') # "Update state function:: stat()" # return np array # shape(stat()) = shape(self.S) S_update = T.dot(self.A,self.S) self.stat = theano.function( inputs = [], outputs = [], updates = {self.S : S_update}) #============================================================ # Update state of Synapses # Update matrix of Synapse A_STDP = np.array([[np.exp(-self.dt/self.tauP),0],[0,np.exp(-self.dt/self.tauP)]],dtype=theano.config.floatX) # Spre_inner :: pre synapse of inner connections # Spost_inner:: post synapse of inner connections # Spre_inp :: pre synapse of input conenctions # Spost_inp :: post synapse of input connections self.Spre_inner_ini = np.zeros((self.number,self.number),dtype=theano.config.floatX) Spre_inner = theano.shared(self.Spre_inner_ini,name='Spre_inner',borrow=True) self.Spre_inner = Spre_inner self.Spost_inner_ini = np.zeros((self.number,self.number),dtype=theano.config.floatX) Spost_inner = theano.shared(value=self.Spost_inner_ini,name='Spost_inner',borrow=True) self.Spost_inner = Spost_inner self.Spre_inp_ini = np.zeros((self.n_inp,self.number)).astype(theano.config.floatX) #needs specification later Spre_inp = theano.shared(value=self.Spre_inp_ini,name='Spre_inp',borrow=True) self.Spre_inp = Spre_inp self.Spost_inp_ini = np.zeros((self.n_inp,self.number)).astype(theano.config.floatX) #needs specification later Spost_inp = theano.shared(value=self.Spost_inp_ini,name='Spost_inp',borrow=True) self.Spost_inp = Spost_inp U = T.fscalar('U') UM = T.fmatrix('UM') #UpreV = theano.shared(A_STDP[0,0],name='UpreV',borrow=True) # Wpre = UpreV*Wpre #UpostV = theano.shared(A_STDP[1,1],name='UpostV',borrow=True) self.tmp = np.array(np.exp(-self.dt/self.tauP).astype(theano.config.floatX)) self.SynFresh = theano.shared(self.tmp,name='SynFresh',borrow=True) self.UpdateSpre_inner = theano.function(inputs=[],outputs=None,updates={self.Spre_inner:T.dot(self.SynFresh,self.Spre_inner)},allow_input_downcast=True) self.UpdateSpost_inner = theano.function(inputs=[],outputs=None,updates={self.Spost_inner:T.dot(self.SynFresh,self.Spost_inner)},allow_input_downcast=True) self.UpdateSpre_inp = theano.function(inputs=[],outputs=None,updates={self.Spre_inp:T.dot(self.SynFresh,self.Spre_inp)},allow_input_downcast=True) self.UpdateSpost_inp = theano.function(inputs=[],outputs=None,updates={self.Spost_inp:T.dot(self.SynFresh,self.Spost_inp)},allow_input_downcast=True) #------------------------------------------ #tmp = math.exp(-self.dt/self.tauP) #tmp = T.as_tensor(0.95122945) #================================================================ #------------------------------------------ # "thresholding function:: spike_fun()" # type return :: np.ndarray list # shape return:: shape(spike_fun()) = (self.number,) self.spike_fun = theano.function( inputs = [U], #[self.S] outputs = (T.gt(self.S[0,:],U))) #type outputs: np.ndarray,shape::(nL,) #'outputs = (self.S[0,:]>Vt).astype(theano.config.floatX)), #type outputs: list' #'updates={self.spikes:(self.S[0,:]>Vt).astype(theano.config.floatX)}' #------------------------------------ #------------------------------------ #================================================================= # "Push spike into Container function:: spCfun(vector)" # type vector :: np.array([],dtype=theano.config.floatX)!!! # type return :: np array # shape return:: shape(spCfun()) = ( shape(self.spkC)[0]+1 , shape(self.spkC)[1] ) #updates={self.spkC:T.stack(self.spkC,sp)}) '''spike_prop = theano.function( #wrong inputs = [], outputs =[], updates = {self.S:np.dot(self.W_inner,self.spikes)+self.S})#wrong''' #------------------------------- #-------------------------------- #==================================================================== # Propagate spikes # inner connection: # S_inner = f(inputs, outputs, updates) # Param:: inputs: spike 0-1 vector # Param:: inputs: spike is from function-> spike_fun # S_inner(spk)::-> for i in spk[0:Ne].nonzero()[0]: # S[1,:] = Winner[i,:]+S[1,:] (excitatory conenction) # for j in spk[Ne,:].nonzero()[0]: # S[2,:] = Winner[j,:]+S[2,:] (inhibitory connection) vinner = T.fvector(name='vinner') # vinner = spk :: np.array((1,self.number) def add_f1(i,p,q): np = T.inc_subtensor(p[1,:],q[i,:]) #ge return {p:np} def add_f2(i,p,q): np = T.inc_subtensor(p[2,:],q[i,:]) #gi return {p:np} #deltaWinner1,updates1 = theano.scan(fn=lambda i: self.W_inner[i,:]*i+self.S[1,:], sequences=vinner[0:Ne]) deltaWinner1,updates1 = theano.scan(fn=add_f1, sequences=vinner[0:Ne].nonzero()[0],non_sequences=[self.S,self.W_inner]) #deltaWinner2,updates2 = theano.scan(fn=lambda i: self.W_inner[i,:]*i+self.S[2,:], sequences=vinner[Ne:]) deltaWinner2,updates2 = theano.scan(fn=add_f2, sequences=vinner[Ne:].nonzero()[0]+self.Ne,non_sequences=[self.S,self.W_inner]) # S = S+W self.S_inner1 = theano.function(inputs=[vinner],outputs=None,updates=updates1,allow_input_downcast=True) self.S_inner2 = theano.function(inputs=[vinner],outputs=None,updates=updates2,allow_input_downcast=True) #------------------------------------------ #------------------------------------------ # outter connection (input spikes): # type input: index list voutter = T.fvector(name='voutter') #deltaWoutter = theano.scan(fn=lambda j: self.W_inp[j,:]+self.S[1,:],sequences=voutter) deltaWoutter,updatesout1 = theano.scan(fn=add_f1,sequences=voutter.nonzero()[0],non_sequences=[self.S,self.W_inp]) self.S_inp = theano.function(inputs=[voutter],outputs=None,updates=updatesout1,allow_input_downcast=True) #------------------------------------ #------------------------------------- #===================================================================== # Update Synapses (STDP | STDC) # Pre:: Apre += self.dApre, w+=Apost # Post:: Apost+=self.dApost, w+=Apre # # USpreInner :: Perform Pre function No.1 in inner connections # UWInner :: Perform Pre function No.2 in inner connections # UpreInner :: Function def add_synap_pre(i,p,po,s,q): # i :: sequence # p :: pre | post # s :: dApre | dApost # q :: W index = T.nonzero(q[i,:self.Ne]) np = T.inc_subtensor(p[i,index],s) ## tmp = p[i,:] ## tmp=T.inc_subtensor(tmp[index],s) ## np=T.set_subtensor(p[i,:],tmp) #np = T.inc_subtensor(p[i,:],s) nw = T.inc_subtensor(q[i,:],po[i,:]) nw=T.clip(nw,0,self.wmax) return {p:np,q:nw} def add_synap_pre_inp(i,p,po,s,q): # i :: sequence # p :: pre | post # s :: dApre | dApost # q :: W index = T.nonzero(q[i,:self.Ne]) np = T.inc_subtensor(p[i,index],s) ## tmp = p[i,:] ## tmp=T.inc_subtensor(tmp[index],s) ## np=T.set_subtensor(p[i,:],tmp) #np = T.inc_subtensor(p[i,:],s) nw = T.inc_subtensor(q[i,:],po[i,:]) nw=T.clip(nw,0,self.wmax) return {p:np,q:nw} def add_synap_post(i,po,p,s,q): # i:: sequence # po:: post # p:: pre # s:: dA # q:: W index = T.nonzero(q[:self.Ne,i]) npo = T.inc_subtensor(po[index,i],s) nw = T.inc_subtensor(q[:,i],p[:,i]) nw = T.clip(nw,0,self.wmax) return {po:npo,q:nw} def add_synap_post_inp(i,po,p,s,q): # i:: sequence # po:: post # p:: pre # s:: dA # q:: W index = T.nonzero(q[:self.Ne,i]) npo = T.inc_subtensor(po[index,i],s) nw = T.inc_subtensor(q[:,i],p[:,i]) nw = T.clip(nw,0,self.wmax) return {po:npo,q:nw} add_dA = T.fscalar('add_dA') add_p,add_po,add_q = T.fmatrices('add_p','add_po','add_q') #------------------------------------------------------------------------- #USinner,updatesUinner = theano.scan(fn=add_synap_pre,sequences=vinner,non_sequences=[self.Spre_inner,self.Spost_inp,self.dApre,self.W_inner]) 'USinner,updatesUinner = theano.scan(fn=add_synap_pre,sequences=vinner.nonzero()[0],non_sequences=[add_p,add_po,add_dA,add_q])' #USinner1,updatesUinner1 = theano.scan(fn=add_synap_pre,sequences=vinner,non_sequences=[self.Spost_inner,self.Spre_inner,self.dApost,self.W_inner]) #------------------------------------------------------------------------- #UpostInner = theano.function(inputs[vinner],updates={self.Spost_inner:USpostInner}) #UpostInp = theano.function(inputs=[vinner],updates={self.W_inner:UWInnerpost}) 'USinner_f = theano.function(inputs=[vinner,add_p,add_po,add_dA,add_q],outputs=None,updates=updatesUinner)' #USinner_step2 = theano.function(inputs=[vinner,add_p,add_po,add_dA,add_q],outputs=None,updates=updatesUinner) USinner_inner_pre,updatesUinner_inner_pre = theano.scan(fn=add_synap_pre,sequences=vinner[:self.Ne].nonzero()[0],non_sequences=[self.Spre_inner,self.Spost_inner,add_dA,self.W_inner]) self.USinner_f_inner_pre = theano.function(inputs=[vinner,add_dA],outputs=None,updates=updatesUinner_inner_pre,allow_input_downcast=True) USinner_innerpost,updatesUinner_inner_post = theano.scan(fn=add_synap_post,sequences=vinner[:self.Ne].nonzero()[0],non_sequences=[self.Spost_inner,self.Spre_inner,add_dA,self.W_inner]) self.USinner_f_inner_post = theano.function(inputs=[vinner,add_dA],outputs=None,updates=updatesUinner_inner_post,allow_input_downcast=True) USinner_inp_pre,updatesUSinner_inp_pre =theano.scan(fn=add_synap_pre_inp,sequences=vinner.nonzero()[0],non_sequences=[self.Spre_inp,self.Spost_inp,add_dA,self.W_inp]) self.USinner_f_inp_pre = theano.function(inputs=[vinner,add_dA],outputs=None,updates=updatesUSinner_inp_pre,allow_input_downcast=True) USinner_inp_post,updatesUSinner_inp_post =theano.scan(fn=add_synap_post_inp,sequences=vinner[:self.Ne].nonzero()[0],non_sequences=[self.Spost_inp,self.Spre_inp,add_dA,self.W_inp]) self.USinner_f_inp_post = theano.function(inputs=[vinner,add_dA],outputs=None,updates=updatesUSinner_inp_post,allow_input_downcast=True) # Call reset function def reset_v(index,vr): nv = T.set_subtensor(self.S[0,index],vr) return{self.S:nv} resetV,resetV_update = theano.scan(fn=reset_v,sequences=vinner.nonzero()[0],non_sequences=[U]) self.resetV_f = theano.function(inputs=[vinner,U],outputs=None,updates=resetV_update,allow_input_downcast=True) setvalue = T.fscalar('setvalue') iv = T.ivector('iv') def reset_state(i,value,state): nstate = T.set_subtensor(state[i,:],value) return {state:nstate} reset_S_state,Upreset_S_state = theano.scan(fn=reset_state,sequences=iv,non_sequences=[setvalue,self.S]) self.reset_S_fn = theano.function(inputs=[iv,setvalue],outputs=None,updates=Upreset_S_state)
def __init__(self, We, char_embedd_table_initial, params): lstm_layers_num = 1 emb_size = We.shape[1] self.eta = params.eta self.num_labels = params.num_labels self.en_hidden_size = params.en_hidden_size self.de_hidden_size = params.de_hidden_size self.lstm_layers_num = params.lstm_layers_num self._train = None self._utter = None self.params = [] self.encoder_lstm_layers = [] self.decoder_lstm_layers = [] self.hos = [] self.Cos = [] char_embedd_dim = params.char_embedd_dim char_dic_size = len(params.char_dic) char_embedd_table = theano.shared(char_embedd_table_initial) encoderInputs = tensor.imatrix() decoderInputs, decoderTarget = tensor.imatrices(2) encoderMask, TF, decoderMask, decoderInputs0 = tensor.fmatrices(4) char_input_var = tensor.itensor3(name='char-inputs') ci = tensor.itensor3() use_dropout = tensor.fscalar() use_dropout0 = tensor.fscalar() self.lookuptable = theano.shared(We) #### the last one is for the stary symbole self.de_lookuptable = theano.shared(name="Decoder LookUpTable", value=init_xavier_uniform( self.num_labels + 1, self.de_hidden_size), borrow=True) self.linear = theano.shared( name="Linear", value=init_xavier_uniform( self.de_hidden_size + 2 * self.en_hidden_size, self.num_labels), borrow=True) self.linear_bias = theano.shared( name="Hidden to Bias", value=np.asarray(np.random.randn(self.num_labels, ) * 0., dtype=theano.config.floatX), borrow=True) #self.hidden_decode = theano.shared(name="Hidden to Decode", value= init_xavier_uniform(2*en_hidden_size, self.de_hidden_size), borrow = True) #self.hidden_bias = theano.shared( # name="Hidden to Bias", # value=np.asarray(np.random.randn(self.de_hidden_size, )*0., dtype=theano.config.floatX) , # borrow=True # ) #self.params += [self.linear, self.de_lookuptable, self.hidden_decode, self.hidden_bias] #concatenate self.params += [ self.lookuptable, self.linear, self.linear_bias, self.de_lookuptable ] #the initial hidden state of decoder lstm is zeros #(max_sent_size, batch_size, hidden_size) state_below = self.lookuptable[encoderInputs.flatten()].reshape( (encoderInputs.shape[0], encoderInputs.shape[1], emb_size)) layer_char_input = lasagne.layers.InputLayer(shape=(None, None, Max_Char_Length), input_var=char_input_var, name='char-input') layer_char = lasagne.layers.reshape(layer_char_input, (-1, [2])) layer_char_embedding = lasagne.layers.EmbeddingLayer( layer_char, input_size=char_dic_size, output_size=char_embedd_dim, W=char_embedd_table, name='char_embedding') layer_char = lasagne.layers.DimshuffleLayer(layer_char_embedding, pattern=(0, 2, 1)) # first get some necessary dimensions or parameters conv_window = 3 num_filters = params.num_filters # construct convolution layer cnn_layer = lasagne.layers.Conv1DLayer( layer_char, num_filters=num_filters, filter_size=conv_window, pad='full', nonlinearity=lasagne.nonlinearities.tanh, name='cnn') # infer the pool size for pooling (pool size should go through all time step of cnn) _, _, pool_size = cnn_layer.output_shape # construct max pool layer pool_layer = lasagne.layers.MaxPool1DLayer(cnn_layer, pool_size=pool_size) # reshape the layer to match lstm incoming layer [batch * sent_length, num_filters, 1] --> [batch, sent_length, num_filters] output_cnn_layer = lasagne.layers.reshape( pool_layer, (-1, encoderInputs.shape[0], [1])) char_params = lasagne.layers.get_all_params(output_cnn_layer, trainable=True) self.params += char_params char_state_below = lasagne.layers.get_output(output_cnn_layer) char_state_below = dropout_layer(char_state_below, use_dropout, trng) char_state_shuff = char_state_below.dimshuffle(1, 0, 2) state_below = tensor.concatenate([state_below, char_state_shuff], axis=2) state_below = dropout_layer(state_below, use_dropout, trng) for _ in range(self.lstm_layers_num): enclstm_f = LSTM(emb_size + num_filters, self.en_hidden_size) enclstm_b = LSTM(emb_size + num_filters, self.en_hidden_size, True) self.encoder_lstm_layers.append(enclstm_f) #append self.encoder_lstm_layers.append(enclstm_b) #append self.params += enclstm_f.params + enclstm_b.params #concatenate hs_f, Cs_f = enclstm_f.forward(state_below, encoderMask) hs_b, Cs_b = enclstm_b.forward(state_below, encoderMask) hs = tensor.concatenate([hs_f, hs_b], axis=2) Cs = tensor.concatenate([Cs_f, Cs_b], axis=2) hs0 = tensor.concatenate([hs_f[-1], hs_b[0]], axis=1) Cs0 = tensor.concatenate([Cs_f[-1], Cs_b[0]], axis=1) #self.hos += tensor.tanh(tensor.dot(hs0, self.hidden_decode) + self.hidden_bias), #self.Cos += tensor.tanh(tensor.dot(Cs0, self.hidden_decode) + self.hidden_bias), self.hos += tensor.alloc( np.asarray(0., dtype=theano.config.floatX), encoderInputs.shape[1], self.de_hidden_size), self.Cos += tensor.alloc( np.asarray(0., dtype=theano.config.floatX), encoderInputs.shape[1], self.de_hidden_size), state_below = hs Encoder = state_below state_below = self.de_lookuptable[decoderInputs.flatten()].reshape( (decoderInputs.shape[0], decoderInputs.shape[1], self.de_hidden_size)) for i in range(self.lstm_layers_num): declstm = LSTM(self.de_hidden_size, self.de_hidden_size) self.decoder_lstm_layers += declstm, #append self.params += declstm.params #concatenate ho, Co = self.hos[i], self.Cos[i] state_below, Cs = declstm.forward(state_below, decoderMask, ho, Co) ##### Here we include the representation from the decoder decoder_lstm_outputs = tensor.concatenate([state_below, Encoder], axis=2) ei, di, dt = tensor.imatrices(3) #place holders em, dm, tf, di0 = tensor.fmatrices(4) ##################################################### ##################################################### linear_outputs = tensor.dot(decoder_lstm_outputs, self.linear) + self.linear_bias[None, None, :] softmax_outputs, _ = theano.scan( fn=lambda x: tensor.nnet.softmax(x), sequences=[linear_outputs], ) def _NLL(pred, y, m): return -m * tensor.log(pred[tensor.arange(encoderInputs.shape[1]), y]) costs, _ = theano.scan( fn=_NLL, sequences=[softmax_outputs, decoderTarget, decoderMask]) loss = costs.sum() / decoderMask.sum() + params.L2 * sum( lasagne.regularization.l2(x) for x in self.params) #updates = lasagne.updates.adam(loss, self.params, self.eta) #updates = lasagne.updates.apply_momentum(updates, self.params, momentum=0.9) ################################################### #### using the ground truth when training ################################################## #self._train = theano.function( # inputs=[ei, em, di, dm, dt], # outputs=[loss, softmax_outputs], # updates=updates, # givens={encoderInputs:ei, encoderMask:em, decoderInputs:di, decoderMask:dm, decoderTarget:dt} # ) ######################################################################### ### For schedule sampling ######################################################################### ###### always use privous predict as next input def _step2(ctx_, state_, hs_, Cs_): hs, Cs = [], [] token_idxs = tensor.cast(state_.argmax(axis=-1), "int32") msk_ = tensor.fill( (tensor.zeros_like(token_idxs, dtype="float32")), 1.) msk_ = msk_.dimshuffle('x', 0) state_below0 = self.de_lookuptable[token_idxs].reshape( (1, encoderInputs.shape[1], self.de_hidden_size)) for i, lstm in enumerate(self.decoder_lstm_layers): h, C = lstm.forward(state_below0, msk_, hs_[i], Cs_[i]) #mind msk hs += h[-1], Cs += C[-1], state_below0 = h hs, Cs = tensor.as_tensor_variable(hs), tensor.as_tensor_variable( Cs) state_below0 = state_below0.reshape( (encoderInputs.shape[1], self.de_hidden_size)) state_below0 = tensor.concatenate([ctx_, state_below0], axis=1) newpred = tensor.dot(state_below0, self.linear) + self.linear_bias[None, :] state_below = tensor.nnet.softmax(newpred) ##### the beging symbole probablity is 0 extra_p = tensor.zeros_like(hs[:, :, 0]) state_below = tensor.concatenate([state_below, extra_p.T], axis=1) return state_below, hs, Cs hs0, Cs0 = tensor.as_tensor_variable( self.hos, name="hs0"), tensor.as_tensor_variable(self.Cos, name="Cs0") train_outputs, _ = theano.scan(fn=_step2, sequences=[Encoder], outputs_info=[decoderInputs0, hs0, Cs0], n_steps=encoderInputs.shape[0]) train_predict = train_outputs[0] train_costs, _ = theano.scan( fn=_NLL, sequences=[train_predict, decoderTarget, decoderMask]) train_loss = train_costs.sum() / decoderMask.sum() + params.L2 * sum( lasagne.regularization.l2(x) for x in self.params) #from adam import adam #train_updates = adam(train_loss, self.params, self.eta) #train_updates = lasagne.updates.apply_momentum(train_updates, self.params, momentum=0.9) #train_updates = lasagne.updates.sgd(train_loss, self.params, self.eta) #train_updates = lasagne.updates.apply_momentum(train_updates, self.params, momentum=0.9) from momentum import momentum train_updates = momentum(train_loss, self.params, params.eta, momentum=0.9) self._train2 = theano.function( inputs=[ei, ci, em, di0, dm, dt, use_dropout0], outputs=[train_loss, train_predict], updates=train_updates, givens={ encoderInputs: ei, char_input_var: ci, encoderMask: em, decoderInputs0: di0, decoderMask: dm, decoderTarget: dt, use_dropout: use_dropout0 } #givens={encoderInputs:ei, encoderMask:em, decoderInputs:di, decoderMask:dm, decoderTarget:dt, TF:tf} ) listof_token_idx = train_predict.argmax(axis=-1) self._utter = theano.function(inputs=[ei, ci, em, di0, use_dropout0], outputs=listof_token_idx, givens={ encoderInputs: ei, char_input_var: ci, encoderMask: em, decoderInputs0: di0, use_dropout: use_dropout0 })
def UnitTest_OnestepAttend(): N = 2 #number of sample D = 5 #dimension of input H = 4 #dimension of hidden T_new = 1 #length of per each sample context_dim = 3 K = 5 x = np.linspace(-0.4, 0.6, num=N*T_new*D, dtype = theano.config.floatX).reshape(T_new, N, D) h0= np.linspace(-0.4, 0.8, num=N*H, dtype = theano.config.floatX).reshape(N, H) Wx= np.linspace(-0.2, 0.9, num=4*D*H, dtype = theano.config.floatX).reshape(D, 4*H) Wh= np.linspace(-0.3,0.6, num =4*H*H, dtype = theano.config.floatX).reshape(H,4*H) b = np.linspace(0.0, 0.0, num = 4*H, dtype = theano.config.floatX) Wz= np.linspace(-0.3, 0.6, num=4*H*context_dim, dtype = theano.config.floatX).reshape(context_dim, 4*H) Hcontext = np.linspace(-0.2, 0.6, num=H*K, dtype = theano.config.floatX).reshape(H, K) Zcontext = np.linspace(-0.2, 0.5, num=context_dim*K, dtype= theano.config.floatX).reshape(context_dim, K) Va= np.linspace(0.1, 0.4, num=K, dtype = theano.config.floatX) Va_reshape = Va.reshape(K,1) image_feature_3D = np.linspace(-0.2, 0.5, num=10*N*context_dim, dtype = theano.config.floatX).reshape(N,10, context_dim) h0_theano = h0.reshape(1, N, H) # h0_symb = theano.tensor.ftensor3("h_symb") # lstm_theano_layer.h_m1.set_value(h0_theano) c0_theano = np.zeros((1, N, H), dtype = theano.config.floatX) # c0_symb = theano.tensor.ftensor3("c_symb") # lstm_theano_layer.c_m1.set_value(c0_theano) z0_theano = np.zeros((1, N, context_dim), dtype = theano.config.floatX) x_theano = x.reshape(T_new, N, D, 1) image_feature_input = image_feature_3D weight_y_in_value = np.zeros(( 10, context_dim) , dtype= theano.config.floatX) b_theano= b.reshape(1, 1, 4*H) pdb.set_trace() #symbolic variables initial_h0_layer_out = theano.tensor.tensor3(name = 'h0_initial', dtype = theano.config.floatX) initial_c0_layer_out = theano.tensor.tensor3(name = 'c0_initial', dtype = theano.config.floatX) initial_z0 = T.tensor3(name= 'z0_initial', dtype = theano.config.floatX) weight_y_in = theano.tensor.fmatrix("weight_y") input_data = theano.tensor.tensor3(name ='x', dtype=theano.config.floatX) image_feature_region = theano.tensor.tensor3(name = 'feature_region', dtype = theano.config.floatX) Wi_sym, Wf_sym, Wc_sym, Wo_sym, Ui_sym, Uf_sym, Uc_sym, Uo_sym, Zi_sym, Zf_sym, Zc_sym, Zo_sym = T.fmatrices(12) Zcontext_sym, Hcontext_sym = T.fmatrices(2) bi = T.ftensor3("bi") bf = T.ftensor3("bf") bc = T.ftensor3("bc") bo = T.ftensor3("bo") Va_sym = T.fcol("Va") out_sym = onestep_attend_tell(input_data, initial_h0_layer_out, initial_c0_layer_out, initial_z0, Wi_sym, Wf_sym, Wc_sym, Wo_sym, Ui_sym, Uf_sym, Uc_sym, Uo_sym, Zi_sym, Zf_sym, Zc_sym, Zo_sym, Zcontext_sym, Hcontext_sym, Va_sym, bi, bf, bc, bo, image_feature_region, weight_y_in) onestep_func = theano.function([input_data, initial_h0_layer_out, initial_c0_layer_out, initial_z0, Wi_sym, Wf_sym, Wc_sym, Wo_sym, Ui_sym, Uf_sym, Uc_sym, Uo_sym, Zi_sym, Zf_sym, Zc_sym, Zo_sym, Zcontext_sym, Hcontext_sym, Va_sym, bi, bf, bc, bo, image_feature_region, weight_y_in], out_sym) list_output = onestep_func(x, h0_theano, c0_theano, z0_theano, Wx[:, :H], Wx[:, H:2*H], Wx[:, 2*H:3*H], Wx[:, 3*H:], Wh[:, :H], Wh[:, H:2*H], Wh[:, 2*H:3*H], Wh[:, 3*H:], Wz[:, :H], Wz[:, H:2*H], Wz[:, 2*H:3*H], Wz[:, 3*H:], Zcontext,Hcontext, Va_reshape, b_theano[:,: , :H], b_theano[:, :, H:2*H], b_theano[:, :, 2*H:3*H], b_theano[:, :, 3*H:], image_feature_input, weight_y_in_value) pdb.set_trace() print(list_output[0].shape) print(list_output[1].shape) print(list_output[2].shape) pdb.set_trace()
def find_all_step_visible_data(all_step_original_data_shared, all_step_visible_data_shared, sigmas_shared, N, steps, output_dims, n_epochs, initial_lr, final_lr, lr_switch, initial_momentum, final_momentum, momentum_switch, penalty_lambda, metric, verbose=0): """Optimize cost wrt all_step_visible_data[t], simultaneously for all t""" # Optimization hyper-parameters initial_lr = np.array(initial_lr, dtype=floath) final_lr = np.array(final_lr, dtype=floath) initial_momentum = np.array(initial_momentum, dtype=floath) final_momentum = np.array(final_momentum, dtype=floath) lr = T.fscalar('lr') lr_shared = theano.shared(initial_lr) momentum = T.fscalar('momentum') momentum_shared = theano.shared(initial_momentum) # Penalty hyper-parameter penalty_lambda_var = T.fscalar('penalty_lambda') penalty_lambda_shared = theano.shared( np.array(penalty_lambda, dtype=floath)) # Yv velocities all_step_visible_progress_shared = [] zero_velocities = np.zeros((N, output_dims), dtype=floath) for t in range(steps): all_step_visible_progress_shared.append( theano.shared(np.array(zero_velocities))) # Cost all_step_original_data_vars = T.fmatrices(steps) all_step_visible_data_vars = T.fmatrices(steps) all_step_visible_progress_vars = T.fmatrices(steps) sigmas_vars = T.fvectors(steps) c_vars = [] for t in range(steps): c_vars.append( cost_var(all_step_original_data_vars[t], all_step_visible_data_vars[t], sigmas_vars[t], metric)) cost = T.sum(c_vars) + penalty_lambda_var * movement_penalty( all_step_visible_data_vars, N) # Setting update for all_step_visible_data velocities grad_Y = T.grad(cost, all_step_visible_data_vars) givens = { lr: lr_shared, momentum: momentum_shared, penalty_lambda_var: penalty_lambda_shared } updates = [] for t in range(steps): updates.append( (all_step_visible_progress_shared[t], momentum * all_step_visible_progress_vars[t] - lr * grad_Y[t])) givens[ all_step_original_data_vars[t]] = all_step_original_data_shared[t] givens[all_step_visible_data_vars[t]] = all_step_visible_data_shared[t] givens[all_step_visible_progress_vars[ t]] = all_step_visible_progress_shared[t] givens[sigmas_vars[t]] = sigmas_shared[t] update_Yvs = theano.function([], cost, givens=givens, updates=updates) # Setting update for all_step_visible_data positions updates = [] givens = dict() for t in range(steps): updates.append( (all_step_visible_data_shared[t], all_step_visible_data_vars[t] + all_step_visible_progress_vars[t])) givens[all_step_visible_data_vars[t]] = all_step_visible_data_shared[t] givens[all_step_visible_progress_vars[ t]] = all_step_visible_progress_shared[t] update_all_step_visible_data = theano.function([], [], givens=givens, updates=updates) # Momentum-based gradient descent for epoch in range(n_epochs): if epoch == lr_switch: lr_shared.set_value(final_lr) if epoch == momentum_switch: momentum_shared.set_value(final_momentum) c = update_Yvs() update_all_step_visible_data() if verbose: print('Epoch: {0}. Cost: {1:.6f}.'.format(epoch + 1, float(c))) all_step_visible_data = [] for t in range(steps): all_step_visible_data.append( np.array(all_step_visible_data_shared[t].get_value(), dtype=floath)) return all_step_visible_data
def SGD(eta, n_epochs, valid_steps, momentum, low, high, init, random_init='gaussian'): t0 = time.time() index = T.iscalar('index') x, y, z, alpha = T.fmatrices('x', 'y', 'z', 'alpha') n_minibatch = max_minibatch - 2 model = Model(n_tree, n_nodes, low, high, init, random_init) model_op, auto_upd = model.op(x) valid_op, valid_upd = model.valid_op(z, valid_steps) loss = model.loss(y, model_op) valid_loss = model.loss(alpha, valid_op) print "Updation to be compiled yet" params = model.params train_upd = gradient_updates_momentum(loss, params, eta, momentum) + auto_upd train_output = [model_op, loss] valid_output = [valid_op, valid_loss] print "Train function to be compiled" train_fn = theano.function( [index], train_output, updates=train_upd, givens={ x: train_x[:, n_in * index:n_in * (index + 1)], y: train_x[:, (n_in * index + n_tree):(n_in * (index + 1) + 1)] }, name='train_fn') valid_fn = theano.function( [index], valid_output, updates=valid_upd, givens={ z: train_x[:, n_tree * index:n_tree * (index + 1)], alpha: train_x[:, (n_in * index + n_tree):(n_in * index + n_tree + valid_steps)] }, name='valid_fn') print "Train function compiled" # Compilation over ################# ## TRAIN MODEL ## ################# print 'The compilation time is', time.time() - t0 loss_list = [] for i in range(n_epochs): epoch_loss = 0 t1 = time.time() for idx in range(n_minibatch): print 'The current idx is ', idx, ' and the epoch number is ', i output, loss_ = train_fn(idx)[:-1], train_fn(idx)[-1] if idx % 500 == 0: v_output, v_loss = valid_fn(idx / 500)[:-1][0], valid_fn( idx / 500)[-1] print 'v_pred is', ' '.join( [mappings_words[prediction(abc)] for abc in v_output]) print 'v_loss is', np.array(v_loss) print 'The loss is', loss_ epoch_loss += loss_ loss_list.append(loss_) print '==' * 20 print 'The mean loss for the epoch was', epoch_loss / float( n_minibatch) print 'Time taken by this epoch is', time.time() - t1 print '-' * 50 pyplot.plot(loss_list) pyplot.show()
''' A theano implementation of the T-LSTM ''' import theano.tensor as T from theano import function import numpy as np import collections import pdb import os #np.seterr(under='warn') h, b = T.fvectors('h', 'b') W, X = T.fmatrices('W', 'X') dotvec = function([h,b], T.dot(h,b)) dot = function([W, h], T.dot(W, h)) #dotF = function([W, h], T.dot(W, h)) #dot = lambda W, h: dotF(W, h.squeeze()) dotW = function([W, X], T.dot(W,X)) layer = function([W, h, b], T.dot(W, h) + b) #layerF = function([W, h, b], T.dot(W, h) + b) #layer = lambda W, h, b: layerF(W, h.squeeze(), b.squeeze()) sigmoid = function([h], T.nnet.ultra_fast_sigmoid(h)) #sigmoidF = function([h], T.nnet.ultra_fast_sigmoid(h)) #sigmoid = lambda h: sigmoidF(h.squeeze()) tanh = function([h], T.tanh(h)) #tanhF = function([h], T.tanh(h)) #tanh = lambda h: tanhF(h.squeeze()) add = function([h, b], h+b)
def __init__(self, We, params): lstm_layers_num = 1 en_hidden_size = We.shape[1] self.eta = params.eta self.num_labels = params.num_labels self.en_hidden_size = en_hidden_size self.de_hidden_size = params.de_hidden_size self.lstm_layers_num = params.lstm_layers_num self._train = None self._utter = None self.params = [] self.encoder_lstm_layers = [] self.decoder_lstm_layers = [] self.hos = [] self.Cos = [] encoderInputs = tensor.imatrix() decoderInputs, decoderTarget = tensor.imatrices(2) encoderMask, TF, decoderMask, decoderInputs0 = tensor.fmatrices(4) self.lookuptable = theano.shared(We) #### the last one is for the stary symbole self.de_lookuptable = theano.shared(name="Decoder LookUpTable", value=init_xavier_uniform( self.num_labels + 1, self.de_hidden_size), borrow=True) self.linear = theano.shared(name="Linear", value=init_xavier_uniform( self.de_hidden_size, self.num_labels), borrow=True) self.hidden_decode = theano.shared(name="Hidden to Decode", value=init_xavier_uniform( 2 * en_hidden_size, self.de_hidden_size), borrow=True) self.hidden_bias = theano.shared( name="Hidden to Bias", value=np.asarray(np.random.randn(self.de_hidden_size, ) * 0., dtype=theano.config.floatX), borrow=True) self.params += [ self.linear, self.de_lookuptable, self.hidden_decode, self.hidden_bias ] #concatenate #(max_sent_size, batch_size, hidden_size) state_below = self.lookuptable[encoderInputs.flatten()].reshape( (encoderInputs.shape[0], encoderInputs.shape[1], self.en_hidden_size)) for _ in range(self.lstm_layers_num): enclstm_f = LSTM(self.en_hidden_size) enclstm_b = LSTM(self.en_hidden_size, True) self.encoder_lstm_layers.append(enclstm_f) #append self.encoder_lstm_layers.append(enclstm_b) #append self.params += enclstm_f.params + enclstm_b.params #concatenate hs_f, Cs_f = enclstm_f.forward(state_below, encoderMask) hs_b, Cs_b = enclstm_b.forward(state_below, encoderMask) hs = tensor.concatenate([hs_f, hs_b], axis=2) Cs = tensor.concatenate([Cs_f, Cs_b], axis=2) self.hos += tensor.tanh( tensor.dot(hs[-1], self.hidden_decode) + self.hidden_bias), self.Cos += tensor.tanh( tensor.dot(Cs[-1], self.hidden_decode) + self.hidden_bias), state_below = hs state_below = self.de_lookuptable[decoderInputs.flatten()].reshape( (decoderInputs.shape[0], decoderInputs.shape[1], self.de_hidden_size)) for i in range(self.lstm_layers_num): declstm = LSTM(self.de_hidden_size) self.decoder_lstm_layers += declstm, #append self.params += declstm.params #concatenate ho, Co = self.hos[i], self.Cos[i] state_below, Cs = declstm.forward(state_below, decoderMask, ho, Co) decoder_lstm_outputs = state_below ei, di, dt = tensor.imatrices(3) #place holders em, dm, tf, di0 = tensor.fmatrices(4) ##################################################### ##################################################### linear_outputs = tensor.dot(decoder_lstm_outputs, self.linear) softmax_outputs, updates = theano.scan( fn=lambda x: tensor.nnet.softmax(x), sequences=[linear_outputs], ) def _NLL(pred, y, m): return -m * tensor.log(pred[tensor.arange(encoderInputs.shape[1]), y]) costs, _ = theano.scan( fn=_NLL, sequences=[softmax_outputs, decoderTarget, decoderMask]) loss = costs.sum() / decoderMask.sum() updates = lasagne.updates.adam(loss, self.params, self.eta) #updates = lasagne.updates.apply_momentum(updates, self.params, momentum=0.9) ################################################### #### using the ground truth when training ################################################## self._train = theano.function(inputs=[ei, em, di, dm, dt], outputs=[loss, softmax_outputs], updates=updates, givens={ encoderInputs: ei, encoderMask: em, decoderInputs: di, decoderMask: dm, decoderTarget: dt }) ######################################################################### ### For schedule sampling ######################################################################### ###### always use privous predict as next input def _step2(state_, hs_, Cs_): hs, Cs = [], [] token_idxs = tensor.cast(state_.argmax(axis=-1), "int32") msk_ = tensor.fill( (tensor.zeros_like(token_idxs, dtype="float32")), 1) msk_ = msk_.dimshuffle('x', 0) state_below0 = self.de_lookuptable[token_idxs].reshape( (1, encoderInputs.shape[1], self.de_hidden_size)) for i, lstm in enumerate(self.decoder_lstm_layers): h, C = lstm.forward(state_below0, msk_, hs_[i], Cs_[i]) #mind msk hs += h[-1], Cs += C[-1], state_below0 = h hs, Cs = tensor.as_tensor_variable(hs), tensor.as_tensor_variable( Cs) newpred = tensor.dot(state_below0, self.linear).reshape( (encoderInputs.shape[1], self.num_labels)) state_below = tensor.nnet.softmax(newpred) return state_below, hs, Cs hs0, Cs0 = tensor.as_tensor_variable( self.hos, name="hs0"), tensor.as_tensor_variable(self.Cos, name="Cs0") train_outputs, _ = theano.scan(fn=_step2, outputs_info=[decoderInputs0, hs0, Cs0], n_steps=encoderInputs.shape[0]) train_predict = train_outputs[0] train_costs, _ = theano.scan( fn=_NLL, sequences=[train_predict, decoderTarget, decoderMask]) train_loss = train_costs.sum() / decoderMask.sum() train_updates = lasagne.updates.adam(train_loss, self.params, self.eta) #train_updates = lasagne.updates.apply_momentum(train_updates, self.params, momentum=0.9) self._train2 = theano.function( inputs=[ei, em, di0, dm, dt], outputs=[train_loss, train_predict], updates=train_updates, givens={ encoderInputs: ei, encoderMask: em, decoderInputs0: di0, decoderMask: dm, decoderTarget: dt } #givens={encoderInputs:ei, encoderMask:em, decoderInputs:di, decoderMask:dm, decoderTarget:dt, TF:tf} ) listof_token_idx = train_predict.argmax(axis=-1) self._utter = theano.function(inputs=[ei, em, di0], outputs=listof_token_idx, givens={ encoderInputs: ei, encoderMask: em, decoderInputs0: di0 })
import mnist def init_weights(n_in, n_out): weights = np.random.randn(n_in, n_out) / np.sqrt(n_in) return theano.shared(np.asarray(weights, dtype=theano.config.floatX)) def feed_forward(X, w_h, w_o): h = T.nnet.sigmoid(T.dot(X, w_h)) return T.nnet.softmax(T.dot(h, w_o)) trX, trY, teX, teY = mnist.load_data(one_hot=True) w_h, w_o = init_weights(28*28, 100), init_weights(100, 10) num_epochs, batch_size, learn_rate = 30, 10, 0.2 X, Y = T.fmatrices('X', 'Y') y_ = feed_forward(X, w_h, w_o) weights = [w_h, w_o] grads = T.grad(cost=T.nnet.categorical_crossentropy(y_, Y).mean(), wrt=weights) train = theano.function( inputs=[X, Y], updates=[[w, w - g * learn_rate] for w, g in zip(weights, grads)], allow_input_downcast=True) predict = theano.function(inputs=[X], outputs=T.argmax(y_, axis=1)) for i in range(num_epochs): for j in xrange(0, len(trX), batch_size): train(trX[j:j+batch_size], trY[j:j+batch_size]) print i, np.mean(predict(teX) == np.argmax(teY, axis=1))
def __init__(self, state = 'x', measurement = 'z', motion_transition = None, measurement_transition = None): self.N = len(state.split(' ')) self.M = len(measurement.split(' ')) self.X, self.Z = T.fvectors('X','Z') self.P, self.Q, self.R = T.fmatrices('P','Q','R') self.F, self.H = T.matrices('F','H') self.dt = T.scalar('dt') self.X_ = T.dot(self.F, self.X) self.fX_ = G.jacobian(T.flatten(self.X_), self.X) self.P_ = T.dot(T.dot(self.fX_, self.P), T.transpose(self.fX_)) + self.dt * self.Q self.h = T.dot(self.H, self.X_) self.y = self.Z - self.h self.hX_ = G.jacobian(self.h, self.X_) self.matrix_inv = T.nlinalg.MatrixInverse() self.S = T.dot(T.dot(self.hX_, self.P_), T.transpose(self.hX_)) + self.R self.K = T.dot(T.dot(self.P_, T.transpose(self.hX_)), self.matrix_inv(self.S)) self.X__ = self.X_ + T.dot(self.K, self.y) self.P__ = T.dot(T.identity_like(self.P) - T.dot(self.K, self.hX_), self.P_) self.prediction = theano.function(inputs = [self.X, self.P, self.Q, self.F, self.dt], outputs = [self.X_, self.P_], allow_input_downcast = True) self.update = theano.function(inputs = [self.X, self.Z, self.P, self.Q, self.R, self.F, self.H, self.dt], outputs = [self.X__, self.P__], allow_input_downcast = True) if motion_transition == None: self.motion_transition = np.eye(self.N) else: self.motion_transition = np.array(motion_transition) if measurement_transition == None: self.measurement_transition = np.eye(self.M) else: self.measurement_transition = np.array(motion_transition)
def find_Ys(Xs_shared, Ys_shared, sigmas_shared, N, steps, output_dims, n_epochs, initial_lr, final_lr, lr_switch, init_stdev, initial_momentum, final_momentum, momentum_switch, lmbda, metric, verbose=0): """Optimize cost wrt Ys[t], simultaneously for all t""" # Optimization hyperparameters initial_lr = np.array(initial_lr, dtype=floath) final_lr = np.array(final_lr, dtype=floath) initial_momentum = np.array(initial_momentum, dtype=floath) final_momentum = np.array(final_momentum, dtype=floath) lr = T.fscalar('lr') lr_shared = theano.shared(initial_lr) momentum = T.fscalar('momentum') momentum_shared = theano.shared(initial_momentum) # Penalty hyperparameter lmbda_var = T.fscalar('lmbda') lmbda_shared = theano.shared(np.array(lmbda, dtype=floath)) # Yv velocities Yvs_shared = [] zero_velocities = np.zeros((N, output_dims), dtype=floath) for t in range(steps): Yvs_shared.append(theano.shared(np.array(zero_velocities))) # Cost Xvars = T.fmatrices(steps) Yvars = T.fmatrices(steps) Yv_vars = T.fmatrices(steps) sigmas_vars = T.fvectors(steps) c_vars = [] for t in range(steps): c_vars.append(cost_var(Xvars[t], Yvars[t], sigmas_vars[t], metric)) cost = T.sum(c_vars) + lmbda_var * movement_penalty(Yvars, N) # Setting update for Ys velocities grad_Y = T.grad(cost, Yvars) givens = { lr: lr_shared, momentum: momentum_shared, lmbda_var: lmbda_shared } updates = [] for t in range(steps): updates.append((Yvs_shared[t], momentum * Yv_vars[t] - lr * grad_Y[t])) givens[Xvars[t]] = Xs_shared[t] givens[Yvars[t]] = Ys_shared[t] givens[Yv_vars[t]] = Yvs_shared[t] givens[sigmas_vars[t]] = sigmas_shared[t] update_Yvs = theano.function([], cost, givens=givens, updates=updates) # Setting update for Ys positions updates = [] givens = dict() for t in range(steps): updates.append((Ys_shared[t], Yvars[t] + Yv_vars[t])) givens[Yvars[t]] = Ys_shared[t] givens[Yv_vars[t]] = Yvs_shared[t] update_Ys = theano.function([], [], givens=givens, updates=updates) # Momentum-based gradient descent for epoch in range(n_epochs): if epoch == lr_switch: lr_shared.set_value(final_lr) if epoch == momentum_switch: momentum_shared.set_value(final_momentum) c = update_Yvs() update_Ys() if verbose: print('Epoch: {0}. Cost: {1:.6f}.'.format(epoch + 1, float(c))) Ys = [] for t in range(steps): Ys.append(np.array(Ys_shared[t].get_value(), dtype=floath)) return Ys
def __init__(self, inf=1e37): pos, vel = T.fmatrices(['pos', 'vel']) nc, N, n_steps = T.iscalars(['nc', 'N', 'n_steps']) ra, rb, re, r0 = T.fscalars(['ra', 'rb', 're', 'r0']) v0, j, b = T.fscalars(['v0', 'J', 'b']) nu = trng.uniform(size=(N, 2), low=0.0, high=3.14159, dtype='floatX') def distance_tensor(X): E = X.reshape((X.shape[0], 1, -1)) - X.reshape((1, X.shape[0], -1)) D = T.sqrt(T.sum(T.square(E), axis=2)) return D def direction_tensor(X): E = X.reshape((X.shape[0], 1, -1)) - X.reshape((1, X.shape[0], -1)) L = T.sqrt(T.sum(T.square(E), axis=2)) L = T.pow(L + T.identity_like(L), -1) L = T.stack([L, L, L], axis=2) return L * E def neighbourhood(X): D = distance_tensor(X) N = T.argsort(D, axis=0) mask = T.cast(T.lt(N, nc), 'float32') return N[1:nc + 1], mask def alignment(X, Y): n, d = neighbourhood(X) return T.sum(Y[n], axis=0) def cohesion(X, inf=100.0): D = distance_tensor(X) E = direction_tensor(X) n, d = neighbourhood(X) F = T.zeros_like(E) D = T.stack([D, D, D], axis=2) d = T.stack([d, d, d], axis=2) c1 = T.lt(D, rb) c2 = T.and_(T.gt(D, rb), T.lt(D, ra)) c3 = T.and_(T.gt(D, ra), T.lt(D, r0)) F = T.set_subtensor(F[c1], -E[c1]) F = T.set_subtensor(F[c2], 0.25 * (D[c2] - re) / (ra - re) * E[c2]) F = T.set_subtensor(F[c3], E[c3]) return T.sum(d * F, axis=0) def perturbation(nu=nu): phi = nu[:, 0] theta = 2.0 * nu[:, 1] return T.stack([ T.sin(theta) * T.sin(phi), T.cos(theta) * T.sin(phi), T.cos(phi) ], axis=1) def step(X, dX): X_ = X + dX V_ = j * nc / v0 * (alignment( X, dX)) + b * (cohesion(X)) + nc * (perturbation()) dV = T.sqrt(T.sum(T.square(V_), axis=1)).reshape(V_.shape[0], 1) dV = T.stack([dV, dV, dV], axis=1) V = v0 * V_ / dV return T.cast(X_, 'float32'), T.cast(V, 'float32') def probability(X, Y): n, d = neighbourhood(X) vDv = T.batched_dot(Y[n].swapaxes(0, 1), Y) p = T.exp((j / 2.0) * T.sum(vDv, axis=1)) return p / T.sum(p) sim, update = theano.scan(step, outputs_info=[pos, vel], n_steps=n_steps) pos_, vel_ = sim mean_final_velocity = 1 / (N * v0) * T.sqrt( T.sum(T.square(T.sum(vel_[-1], axis=0)))) particle_probability = probability(pos_[-1], vel_[-1]) self.f = theano.function( [pos, vel, nc, ra, rb, r0, re, j, v0, b, N, n_steps], [pos_, vel_], allow_input_downcast=True) self.g = theano.function( [pos, vel, nc, ra, rb, r0, re, j, v0, b, N, n_steps], mean_final_velocity, allow_input_downcast=True) self.h = theano.function( [pos, vel, nc, ra, rb, r0, re, j, v0, b, N, n_steps], particle_probability, allow_input_downcast=True)
def __init__(self, Ne, Ni, n_inp, W_inp=None, W_inner=None): '''class SNNgroup's self Parameters: self.A: update matrix self.S: neuron state varaibles self.W_inner: inner-connect weights in the group self.W_inp: input weights self.spikes: the spikes matrix in the time t self.SpkC : spike containers input : ''' self.number = Ne + Ni self.Ne = Ne self.Ni = Ni self.mV = self.ms = 1e-3 # units dt = 1 * self.ms # timestep self.dt = dt taum = 20 * self.ms # membrane time constant taue = 5 * self.ms taui = 10 * self.ms #self.Vt=-1*self.mV # threshold = -50+49 self.Vt = 15 * self.mV #threshold = -55+70 #self.Vr=-11*self.mV # reset = -60+49 self.Vr = 0 * self.mV # reset = -70+70 self.Vi = -10 * self.mV # VI = -80+70 self.dApre = .0001 #self.dApre = .95 #changed into .95 self.dApost = -self.dApre * 1.05 self.tauP = 20 * self.ms #self.input = input self.n_inp = n_inp self.weight = .001 self.weightIn = 1. self.wmax = 200 * self.weight zero = np.array([0]).astype(theano.config.floatX) self.zero = theano.shared(zero, name='zero', borrow=True) """ Equations --------- eqs=''' dv/dt = (ge*70mV-gi*10-(v+70*mV))/(20*ms) : volt dge/dt = -ge/(5*self.ms) : volt dgi/dt = -gi/(10*self.ms) : volt ''' """ # Update matrix A = np.array([[np.exp(-dt / taum), 0, 0], [ taue / (taum - taue) * (np.exp(-dt / taum) - np.exp(-dt / taue)), np.exp(-dt / taue), 0 ], [ -taui / (taum - taui) * (np.exp(-dt / taum) - np.exp(-dt / taui)), 0, np.exp(-dt / taui) ]], dtype=theano.config.floatX).T A = theano.shared(value=A, name='A', borrow=True) self.A = A # State varible : [v;ge;gi] (size=3*self.number) S = np.ones((1, self.number), dtype=theano.config.floatX) * self.Vr S = np.vstack((S, np.zeros((2, self.number), dtype=theano.config.floatX))) self.S_init = S S = theano.shared(value=S, name='S', borrow=True) self.S = S if W_inner == None: # weights of inner connections (size= self.number*self.number) self.W_inner_ini = np.ones( (self.number, self.number), dtype=theano.config.floatX) * self.weight #self.W_inner_ini[Ne:,:] = self.weightIn self.W_inner_ini[Ne:, :] = self.weight wtmp = np.eye(self.number) ind = wtmp.nonzero() self.W_inner_ini[ind] = 0 W_inner = theano.shared(value=self.W_inner_ini, name='W_inner', borrow=True) self.W_inner = W_inner else: self.W_inner = theano.shared(W_inner, name='W_inner', borrow=True) # weights of input connections (size=n_inp*self.number rng = np.random.RandomState(1234) if W_inp == None: #W_inp = np.ones((self.n_inp,self.number)).astype(theano.config.floatX) #needs specification later #W_inp = np.random.rand(self.n_inp,self.number).astype(theano.config.floatX)*.00001*self.ms #needs specification later self.W_inp_ini = np.ones((self.n_inp, self.number)).astype( theano.config.floatX) * self.weight self.W_inp_ini[:, self.Ne:] = self.weightIn W_inp = theano.shared(self.W_inp_ini, name='W_inp', borrow=True) self.W_inp = W_inp else: self.W_inp = theano.shared(W_inp, name='W_inp', borrow=True) # Spike Container #spkC = theano.shared(value=np.empty((1,self.number)).astype(theano.config.floatX),name='spkC',borrow=True) spkC = np.empty((1, self.number)).astype(theano.config.floatX) self.spkC = spkC #spikes=np.empty((self.number,1),dtype=theano.config.floatX) #self.spikes = theano.shared(value=spikes,name='spikes',borrow=True) # not sure the dtype of sp_history self.sp_history = np.array([]) #output = np.empty(self.number,dtype=theano.config.floatX) #self.output = theano.shared(value=output,name='output',borrow=True) self.V_record = np.empty((1, self.number)) self.ge_record = np.empty((1, self.number)) self.gi_record = np.empty((1, self.number)) #================================================ # Process Function Initial # input:: 0-1 vector '''Update Schedule: 1.Update state variables of SNNgroup: dot(A,S) 1.Update state variables of Synapses: dot(exp(-dt/tau),Ssynapse), including W_inp and W_inner 2.Call thresholding function: S[0,:]>Vt 3.Push spikes into SpikeContainer 4.Propagate spikes via Connection(possibly with delays) 5.Update state variables of Synapses (STDP) 6.Call reset function on neurons which has spiked''' Ne = self.Ne Ni = self.Ni m = T.fmatrix(name='m') #self.Vt = T.as_tensor_variable(self.Vt,'Vt') # "Update state function:: stat()" # return np array # shape(stat()) = shape(self.S) S_update = T.dot(self.A, self.S) self.stat = theano.function(inputs=[], outputs=[], updates={self.S: S_update}) #============================================================ # Update state of Synapses # Update matrix of Synapse A_STDP = np.array([[np.exp(-self.dt / self.tauP), 0], [0, np.exp(-self.dt / self.tauP)]], dtype=theano.config.floatX) # Spre_inner :: pre synapse of inner connections # Spost_inner:: post synapse of inner connections # Spre_inp :: pre synapse of input conenctions # Spost_inp :: post synapse of input connections self.Spre_inner_ini = np.zeros((self.number, self.number), dtype=theano.config.floatX) Spre_inner = theano.shared(self.Spre_inner_ini, name='Spre_inner', borrow=True) self.Spre_inner = Spre_inner self.Spost_inner_ini = np.zeros((self.number, self.number), dtype=theano.config.floatX) Spost_inner = theano.shared(value=self.Spost_inner_ini, name='Spost_inner', borrow=True) self.Spost_inner = Spost_inner self.Spre_inp_ini = np.zeros((self.n_inp, self.number)).astype( theano.config.floatX) #needs specification later Spre_inp = theano.shared(value=self.Spre_inp_ini, name='Spre_inp', borrow=True) self.Spre_inp = Spre_inp self.Spost_inp_ini = np.zeros((self.n_inp, self.number)).astype( theano.config.floatX) #needs specification later Spost_inp = theano.shared(value=self.Spost_inp_ini, name='Spost_inp', borrow=True) self.Spost_inp = Spost_inp U = T.fscalar('U') UM = T.fmatrix('UM') #UpreV = theano.shared(A_STDP[0,0],name='UpreV',borrow=True) # Wpre = UpreV*Wpre #UpostV = theano.shared(A_STDP[1,1],name='UpostV',borrow=True) self.tmp = np.array( np.exp(-self.dt / self.tauP).astype(theano.config.floatX)) self.SynFresh = theano.shared(self.tmp, name='SynFresh', borrow=True) self.UpdateSpre_inner = theano.function( inputs=[], outputs=None, updates={self.Spre_inner: T.dot(self.SynFresh, self.Spre_inner)}, allow_input_downcast=True) self.UpdateSpost_inner = theano.function( inputs=[], outputs=None, updates={self.Spost_inner: T.dot(self.SynFresh, self.Spost_inner)}, allow_input_downcast=True) self.UpdateSpre_inp = theano.function( inputs=[], outputs=None, updates={self.Spre_inp: T.dot(self.SynFresh, self.Spre_inp)}, allow_input_downcast=True) self.UpdateSpost_inp = theano.function( inputs=[], outputs=None, updates={self.Spost_inp: T.dot(self.SynFresh, self.Spost_inp)}, allow_input_downcast=True) #------------------------------------------ #tmp = math.exp(-self.dt/self.tauP) #tmp = T.as_tensor(0.95122945) #================================================================ #------------------------------------------ # "thresholding function:: spike_fun()" # type return :: np.ndarray list # shape return:: shape(spike_fun()) = (self.number,) self.spike_fun = theano.function( inputs=[U], #[self.S] outputs=(T.gt(self.S[0, :], U))) #type outputs: np.ndarray,shape::(nL,) #'outputs = (self.S[0,:]>Vt).astype(theano.config.floatX)), #type outputs: list' #'updates={self.spikes:(self.S[0,:]>Vt).astype(theano.config.floatX)}' #------------------------------------ #------------------------------------ #================================================================= # "Push spike into Container function:: spCfun(vector)" # type vector :: np.array([],dtype=theano.config.floatX)!!! # type return :: np array # shape return:: shape(spCfun()) = ( shape(self.spkC)[0]+1 , shape(self.spkC)[1] ) #updates={self.spkC:T.stack(self.spkC,sp)}) '''spike_prop = theano.function( #wrong inputs = [], outputs =[], updates = {self.S:np.dot(self.W_inner,self.spikes)+self.S})#wrong''' #------------------------------- #-------------------------------- #==================================================================== # Propagate spikes # inner connection: # S_inner = f(inputs, outputs, updates) # Param:: inputs: spike 0-1 vector # Param:: inputs: spike is from function-> spike_fun # S_inner(spk)::-> for i in spk[0:Ne].nonzero()[0]: # S[1,:] = Winner[i,:]+S[1,:] (excitatory conenction) # for j in spk[Ne,:].nonzero()[0]: # S[2,:] = Winner[j,:]+S[2,:] (inhibitory connection) vinner = T.fvector( name='vinner') # vinner = spk :: np.array((1,self.number) def add_f1(i, p, q): np = T.inc_subtensor(p[1, :], q[i, :]) #ge return {p: np} def add_f2(i, p, q): np = T.inc_subtensor(p[2, :], q[i, :]) #gi return {p: np} #deltaWinner1,updates1 = theano.scan(fn=lambda i: self.W_inner[i,:]*i+self.S[1,:], sequences=vinner[0:Ne]) deltaWinner1, updates1 = theano.scan( fn=add_f1, sequences=vinner[0:Ne].nonzero()[0], non_sequences=[self.S, self.W_inner]) #deltaWinner2,updates2 = theano.scan(fn=lambda i: self.W_inner[i,:]*i+self.S[2,:], sequences=vinner[Ne:]) deltaWinner2, updates2 = theano.scan( fn=add_f2, sequences=vinner[Ne:].nonzero()[0] + self.Ne, non_sequences=[self.S, self.W_inner]) # S = S+W self.S_inner1 = theano.function(inputs=[vinner], outputs=None, updates=updates1, allow_input_downcast=True) self.S_inner2 = theano.function(inputs=[vinner], outputs=None, updates=updates2, allow_input_downcast=True) #------------------------------------------ #------------------------------------------ # outter connection (input spikes): # type input: index list voutter = T.fvector(name='voutter') #deltaWoutter = theano.scan(fn=lambda j: self.W_inp[j,:]+self.S[1,:],sequences=voutter) deltaWoutter, updatesout1 = theano.scan( fn=add_f1, sequences=voutter.nonzero()[0], non_sequences=[self.S, self.W_inp]) self.S_inp = theano.function(inputs=[voutter], outputs=None, updates=updatesout1, allow_input_downcast=True) #------------------------------------ #------------------------------------- #===================================================================== # Update Synapses (STDP | STDC) # Pre:: Apre += self.dApre, w+=Apost # Post:: Apost+=self.dApost, w+=Apre # # USpreInner :: Perform Pre function No.1 in inner connections # UWInner :: Perform Pre function No.2 in inner connections # UpreInner :: Function def add_synap_pre(i, p, po, s, q): # i :: sequence # p :: pre | post # s :: dApre | dApost # q :: W index = T.nonzero(q[i, :self.Ne]) np = T.inc_subtensor(p[i, index], s) ## tmp = p[i,:] ## tmp=T.inc_subtensor(tmp[index],s) ## np=T.set_subtensor(p[i,:],tmp) #np = T.inc_subtensor(p[i,:],s) nw = T.inc_subtensor(q[i, :], po[i, :]) nw = T.clip(nw, 0, self.wmax) return {p: np, q: nw} def add_synap_pre_inp(i, p, po, s, q): # i :: sequence # p :: pre | post # s :: dApre | dApost # q :: W index = T.nonzero(q[i, :self.Ne]) np = T.inc_subtensor(p[i, index], s) ## tmp = p[i,:] ## tmp=T.inc_subtensor(tmp[index],s) ## np=T.set_subtensor(p[i,:],tmp) #np = T.inc_subtensor(p[i,:],s) nw = T.inc_subtensor(q[i, :], po[i, :]) nw = T.clip(nw, 0, self.wmax) return {p: np, q: nw} def add_synap_post(i, po, p, s, q): # i:: sequence # po:: post # p:: pre # s:: dA # q:: W index = T.nonzero(q[:self.Ne, i]) npo = T.inc_subtensor(po[index, i], s) nw = T.inc_subtensor(q[:, i], p[:, i]) nw = T.clip(nw, 0, self.wmax) return {po: npo, q: nw} def add_synap_post_inp(i, po, p, s, q): # i:: sequence # po:: post # p:: pre # s:: dA # q:: W index = T.nonzero(q[:self.Ne, i]) npo = T.inc_subtensor(po[index, i], s) nw = T.inc_subtensor(q[:, i], p[:, i]) nw = T.clip(nw, 0, self.wmax) return {po: npo, q: nw} add_dA = T.fscalar('add_dA') add_p, add_po, add_q = T.fmatrices('add_p', 'add_po', 'add_q') #------------------------------------------------------------------------- #USinner,updatesUinner = theano.scan(fn=add_synap_pre,sequences=vinner,non_sequences=[self.Spre_inner,self.Spost_inp,self.dApre,self.W_inner]) 'USinner,updatesUinner = theano.scan(fn=add_synap_pre,sequences=vinner.nonzero()[0],non_sequences=[add_p,add_po,add_dA,add_q])' #USinner1,updatesUinner1 = theano.scan(fn=add_synap_pre,sequences=vinner,non_sequences=[self.Spost_inner,self.Spre_inner,self.dApost,self.W_inner]) #------------------------------------------------------------------------- #UpostInner = theano.function(inputs[vinner],updates={self.Spost_inner:USpostInner}) #UpostInp = theano.function(inputs=[vinner],updates={self.W_inner:UWInnerpost}) 'USinner_f = theano.function(inputs=[vinner,add_p,add_po,add_dA,add_q],outputs=None,updates=updatesUinner)' #USinner_step2 = theano.function(inputs=[vinner,add_p,add_po,add_dA,add_q],outputs=None,updates=updatesUinner) USinner_inner_pre, updatesUinner_inner_pre = theano.scan( fn=add_synap_pre, sequences=vinner[:self.Ne].nonzero()[0], non_sequences=[ self.Spre_inner, self.Spost_inner, add_dA, self.W_inner ]) self.USinner_f_inner_pre = theano.function( inputs=[vinner, add_dA], outputs=None, updates=updatesUinner_inner_pre, allow_input_downcast=True) USinner_innerpost, updatesUinner_inner_post = theano.scan( fn=add_synap_post, sequences=vinner[:self.Ne].nonzero()[0], non_sequences=[ self.Spost_inner, self.Spre_inner, add_dA, self.W_inner ]) self.USinner_f_inner_post = theano.function( inputs=[vinner, add_dA], outputs=None, updates=updatesUinner_inner_post, allow_input_downcast=True) USinner_inp_pre, updatesUSinner_inp_pre = theano.scan( fn=add_synap_pre_inp, sequences=vinner.nonzero()[0], non_sequences=[self.Spre_inp, self.Spost_inp, add_dA, self.W_inp]) self.USinner_f_inp_pre = theano.function( inputs=[vinner, add_dA], outputs=None, updates=updatesUSinner_inp_pre, allow_input_downcast=True) USinner_inp_post, updatesUSinner_inp_post = theano.scan( fn=add_synap_post_inp, sequences=vinner[:self.Ne].nonzero()[0], non_sequences=[self.Spost_inp, self.Spre_inp, add_dA, self.W_inp]) self.USinner_f_inp_post = theano.function( inputs=[vinner, add_dA], outputs=None, updates=updatesUSinner_inp_post, allow_input_downcast=True) # Call reset function def reset_v(index, vr): nv = T.set_subtensor(self.S[0, index], vr) return {self.S: nv} resetV, resetV_update = theano.scan(fn=reset_v, sequences=vinner.nonzero()[0], non_sequences=[U]) self.resetV_f = theano.function(inputs=[vinner, U], outputs=None, updates=resetV_update, allow_input_downcast=True) setvalue = T.fscalar('setvalue') iv = T.ivector('iv') def reset_state(i, value, state): nstate = T.set_subtensor(state[i, :], value) return {state: nstate} reset_S_state, Upreset_S_state = theano.scan( fn=reset_state, sequences=iv, non_sequences=[setvalue, self.S]) self.reset_S_fn = theano.function(inputs=[iv, setvalue], outputs=None, updates=Upreset_S_state)
def init_weights(n_in, n_out): weights = np.random.randn(n_in, n_out) / np.sqrt(n_in) return theano.shared(np.asarray(weights, dtype=theano.config.floatX)) def feed_forward(X, w_h, w_o): h = T.nnet.sigmoid(T.dot(X, w_h)) return T.nnet.softmax(T.dot(h, w_o)) trX, trY, teX, teY = mnist.load_data(one_hot=True) w_h, w_o = init_weights(28 * 28, 100), init_weights(100, 10) num_epochs, batch_size, learn_rate = 30, 10, 0.2 X, Y = T.fmatrices('X', 'Y') y_ = feed_forward(X, w_h, w_o) weights = [w_h, w_o] grads = T.grad(cost=T.nnet.categorical_crossentropy(y_, Y).mean(), wrt=weights) train = theano.function(inputs=[X, Y], updates=[[w, w - g * learn_rate] for w, g in zip(weights, grads)], allow_input_downcast=True) predict = theano.function(inputs=[X], outputs=T.argmax(y_, axis=1)) for i in range(num_epochs): for j in xrange(0, len(trX), batch_size): train(trX[j:j + batch_size], trY[j:j + batch_size]) print i, np.mean(predict(teX) == np.argmax(teY, axis=1))
def __init__(self, We, params): lstm_layers_num = 1 en_hidden_size = We.shape[1] self.eta = params.eta self.num_labels = params.num_labels self.en_hidden_size = en_hidden_size self.de_hidden_size = params.de_hidden_size self.lstm_layers_num = params.lstm_layers_num self._train = None self._utter = None self.params = [] self.encoder_lstm_layers = [] self.decoder_lstm_layers = [] self.hos = [] self.Cos = [] encoderInputs = tensor.imatrix() decoderInputs, decoderTarget = tensor.imatrices(2) encoderMask, TF, decoderMask, decoderInputs0 = tensor.fmatrices(4) self.lookuptable = theano.shared(We) #### the last one is for the stary symbole self.de_lookuptable = theano.shared(name="Decoder LookUpTable", value=init_xavier_uniform( self.num_labels + 1, self.de_hidden_size), borrow=True) self.linear = theano.shared( name="Linear", value=init_xavier_uniform(self.de_hidden_size + 2 * en_hidden_size, self.num_labels), borrow=True) self.linear_bias = theano.shared( name="Hidden to Bias", value=np.asarray(np.random.randn(self.num_labels, ) * 0., dtype=theano.config.floatX), borrow=True) #self.hidden_decode = theano.shared(name="Hidden to Decode", value= init_xavier_uniform(2*en_hidden_size, self.de_hidden_size), borrow = True) #self.hidden_bias = theano.shared( # name="Hidden to Bias", # value=np.asarray(np.random.randn(self.de_hidden_size, )*0., dtype=theano.config.floatX) , # borrow=True # ) #self.params += [self.linear, self.de_lookuptable, self.hidden_decode, self.hidden_bias] #concatenate self.params += [self.linear, self.linear_bias, self.de_lookuptable ] #the initial hidden state of decoder lstm is zeros #(max_sent_size, batch_size, hidden_size) state_below = self.lookuptable[encoderInputs.flatten()].reshape( (encoderInputs.shape[0], encoderInputs.shape[1], self.en_hidden_size)) for _ in range(self.lstm_layers_num): enclstm_f = LSTM(self.en_hidden_size) enclstm_b = LSTM(self.en_hidden_size, True) self.encoder_lstm_layers.append(enclstm_f) #append self.encoder_lstm_layers.append(enclstm_b) #append self.params += enclstm_f.params + enclstm_b.params #concatenate hs_f, Cs_f = enclstm_f.forward(state_below, encoderMask) hs_b, Cs_b = enclstm_b.forward(state_below, encoderMask) hs = tensor.concatenate([hs_f, hs_b], axis=2) Cs = tensor.concatenate([Cs_f, Cs_b], axis=2) hs0 = tensor.concatenate([hs_f[-1], hs_b[0]], axis=1) Cs0 = tensor.concatenate([Cs_f[-1], Cs_b[0]], axis=1) #self.hos += tensor.tanh(tensor.dot(hs0, self.hidden_decode) + self.hidden_bias), #self.Cos += tensor.tanh(tensor.dot(Cs0, self.hidden_decode) + self.hidden_bias), self.hos += tensor.alloc( np.asarray(0., dtype=theano.config.floatX), encoderInputs.shape[1], self.de_hidden_size), self.Cos += tensor.alloc( np.asarray(0., dtype=theano.config.floatX), encoderInputs.shape[1], self.de_hidden_size), state_below = hs Encoder = state_below ei, di, dt = tensor.imatrices(3) #place holders em, dm, tf, di0 = tensor.fmatrices(4) self.encoder_function = theano.function(inputs=[ei, em], outputs=Encoder, givens={ encoderInputs: ei, encoderMask: em }) ##################################################### ##################################################### state_below = self.de_lookuptable[decoderInputs.flatten()].reshape( (decoderInputs.shape[0], decoderInputs.shape[1], self.de_hidden_size)) for i in range(self.lstm_layers_num): declstm = LSTM(self.de_hidden_size) self.decoder_lstm_layers += declstm, #append self.params += declstm.params #concatenate ho, Co = self.hos[i], self.Cos[i] state_below, Cs = declstm.forward(state_below, decoderMask, ho, Co) ##### Here we include the representation from the decoder decoder_lstm_outputs = tensor.concatenate([state_below, Encoder], axis=2) linear_outputs = tensor.dot(decoder_lstm_outputs, self.linear) + self.linear_bias[None, None, :] softmax_outputs, _ = theano.scan( fn=lambda x: tensor.nnet.softmax(x), sequences=[linear_outputs], ) def _NLL(pred, y, m): return -m * tensor.log(pred[tensor.arange(encoderInputs.shape[1]), y]) costs, _ = theano.scan( fn=_NLL, sequences=[softmax_outputs, decoderTarget, decoderMask]) loss = costs.sum() / decoderMask.sum() + params.L2 * sum( lasagne.regularization.l2(x) for x in self.params) updates = lasagne.updates.adam(loss, self.params, self.eta) #updates = lasagne.updates.apply_momentum(updates, self.params, momentum=0.9) ################################################### #### using the ground truth when training ################################################## self._train = theano.function(inputs=[ei, em, di, dm, dt], outputs=[loss, softmax_outputs], updates=updates, givens={ encoderInputs: ei, encoderMask: em, decoderInputs: di, decoderMask: dm, decoderTarget: dt }) ######################################################################### ### For schedule sampling ######################################################################### ###### always use privous predict as next input def _step2(ctx_, state_, hs_, Cs_): ### ctx_: b x h ### state_ : b x h ### hs_ : 1 x b x h the first dimension is the number of the decoder layers ### Cs_ : 1 x b x h the first dimension is the number of the decoder layers hs, Cs = [], [] token_idxs = tensor.cast(state_.argmax(axis=-1), "int32") msk_ = tensor.fill( (tensor.zeros_like(token_idxs, dtype="float32")), 1) msk_ = msk_.dimshuffle('x', 0) state_below0 = self.de_lookuptable[token_idxs].reshape( (1, ctx_.shape[0], self.de_hidden_size)) for i, lstm in enumerate(self.decoder_lstm_layers): h, C = lstm.forward(state_below0, msk_, hs_[i], Cs_[i]) #mind msk hs += h[-1], Cs += C[-1], state_below0 = h hs, Cs = tensor.as_tensor_variable(hs), tensor.as_tensor_variable( Cs) state_below0 = state_below0.reshape( (ctx_.shape[0], self.de_hidden_size)) state_below0 = tensor.concatenate([ctx_, state_below0], axis=1) newpred = tensor.dot(state_below0, self.linear) + self.linear_bias[None, :] state_below = tensor.nnet.softmax(newpred) ##### the beging symbole probablity is 0 extra_p = tensor.zeros_like(hs[:, :, 0]) state_below = tensor.concatenate([state_below, extra_p.T], axis=1) return state_below, hs, Cs ctx_0, state_0 = tensor.fmatrices(2) hs_0 = tensor.ftensor3() Cs_0 = tensor.ftensor3() state_below_tmp, hs_tmp, Cs_tmp = _step2(ctx_0, state_0, hs_0, Cs_0) self.f_next = theano.function([ctx_0, state_0, hs_0, Cs_0], [state_below_tmp, hs_tmp, Cs_tmp], name='f_next') hs0, Cs0 = tensor.as_tensor_variable( self.hos, name="hs0"), tensor.as_tensor_variable(self.Cos, name="Cs0") train_outputs, _ = theano.scan(fn=_step2, sequences=[Encoder], outputs_info=[decoderInputs0, hs0, Cs0], n_steps=encoderInputs.shape[0]) train_predict = train_outputs[0] train_costs, _ = theano.scan( fn=_NLL, sequences=[train_predict, decoderTarget, decoderMask]) train_loss = train_costs.sum() / decoderMask.sum() + params.L2 * sum( lasagne.regularization.l2(x) for x in self.params) ##from adam import adam ##train_updates = adam(train_loss, self.params, self.eta) #train_updates = lasagne.updates.apply_momentum(train_updates, self.params, momentum=0.9) #train_updates = lasagne.updates.sgd(train_loss, self.params, self.eta) #train_updates = lasagne.updates.apply_momentum(train_updates, self.params, momentum=0.9) from momentum import momentum train_updates = momentum(train_loss, self.params, params.eta, momentum=0.9) self._train2 = theano.function( inputs=[ei, em, di0, dm, dt], outputs=[train_loss, train_predict], updates=train_updates, givens={ encoderInputs: ei, encoderMask: em, decoderInputs0: di0, decoderMask: dm, decoderTarget: dt } #givens={encoderInputs:ei, encoderMask:em, decoderInputs:di, decoderMask:dm, decoderTarget:dt, TF:tf} ) listof_token_idx = train_predict.argmax(axis=-1) self._utter = theano.function(inputs=[ei, em, di0], outputs=listof_token_idx, givens={ encoderInputs: ei, encoderMask: em, decoderInputs0: di0 })
def find_Ys(Xs_shared, Ys_shared, sigmas_shared, N, steps, output_dims, n_epochs, initial_lr, final_lr, lr_switch, init_stdev, initial_momentum, final_momentum, momentum_switch, lmbda, metric, verbose=0): """Optimize cost wrt Ys[t], simultaneously for all t""" # Optimization hyperparameters initial_lr = np.array(initial_lr, dtype=floath) final_lr = np.array(final_lr, dtype=floath) initial_momentum = np.array(initial_momentum, dtype=floath) final_momentum = np.array(final_momentum, dtype=floath) lr = T.fscalar('lr') lr_shared = theano.shared(initial_lr) momentum = T.fscalar('momentum') momentum_shared = theano.shared(initial_momentum) # Penalty hyperparameter lmbda_var = T.fscalar('lmbda') lmbda_shared = theano.shared(np.array(lmbda, dtype=floath)) # Yv velocities Yvs_shared = [] zero_velocities = np.zeros((N, output_dims), dtype=floath) for t in range(steps): Yvs_shared.append(theano.shared(np.array(zero_velocities))) # Cost Xvars = T.fmatrices(steps) Yvars = T.fmatrices(steps) Yv_vars = T.fmatrices(steps) sigmas_vars = T.fvectors(steps) c_vars = [] for t in range(steps): c_vars.append(cost_var(Xvars[t], Yvars[t], sigmas_vars[t], metric)) cost = T.sum(c_vars) + lmbda_var*movement_penalty(Yvars, N) # Setting update for Ys velocities grad_Y = T.grad(cost, Yvars) givens = {lr: lr_shared, momentum: momentum_shared, lmbda_var: lmbda_shared} updates = [] for t in range(steps): updates.append((Yvs_shared[t], momentum*Yv_vars[t] - lr*grad_Y[t])) givens[Xvars[t]] = Xs_shared[t] givens[Yvars[t]] = Ys_shared[t] givens[Yv_vars[t]] = Yvs_shared[t] givens[sigmas_vars[t]] = sigmas_shared[t] update_Yvs = theano.function([], cost, givens=givens, updates=updates) # Setting update for Ys positions updates = [] givens = dict() for t in range(steps): updates.append((Ys_shared[t], Yvars[t] + Yv_vars[t])) givens[Yvars[t]] = Ys_shared[t] givens[Yv_vars[t]] = Yvs_shared[t] update_Ys = theano.function([], [], givens=givens, updates=updates) # Momentum-based gradient descent for epoch in range(n_epochs): if epoch == lr_switch: lr_shared.set_value(final_lr) if epoch == momentum_switch: momentum_shared.set_value(final_momentum) c = update_Yvs() update_Ys() if verbose: print('Epoch: {0}. Cost: {1:.6f}.'.format(epoch + 1, float(c))) Ys = [] for t in range(steps): Ys.append(np.array(Ys_shared[t].get_value(), dtype=floath)) return Ys