import helper.reberGrammar as reberGrammar import numpy as np import theano import theano.tensor as T import helper.dt_utils as du dtype=theano.config.floatX theano.config.exception_verbosity="high" #train_data = reberGrammar.get_n_embedded_examples(1000) train_data=du.laod_pose() # squashing of the gates should result in values between 0 and 1 # therefore we use the logistic function sigma = lambda x: 1 / (1 + T.exp(-x)) # for the other activation function we use the tanh act = T.tanh # sequences: x_t # prior results: h_tm1, c_tm1 # non-sequences: W_xi, W_hi, W_ci, b_i, W_xf, W_hf, W_cf, b_f, W_xc, W_hc, b_c, W_xy, W_hy, W_cy, b_y def one_lstm_step(x_t, h_tm1, c_tm1, W_xi, W_hi, W_ci, b_i, W_xf, W_hf, W_cf, b_f, W_xc, W_hc, b_c, W_xy, W_ho, W_cy, b_o, W_hy, b_y): i_t = sigma(theano.dot(x_t, W_xi) + theano.dot(h_tm1, W_hi) + theano.dot(c_tm1, W_ci) + b_i) f_t = sigma(theano.dot(x_t, W_xf) + theano.dot(h_tm1, W_hf) + theano.dot(c_tm1, W_cf) + b_f) c_t = f_t * c_tm1 + i_t * act(theano.dot(x_t, W_xc) + theano.dot(h_tm1, W_hc) + b_c) o_t = sigma(theano.dot(x_t, W_xo)+ theano.dot(h_tm1, W_ho) + theano.dot(c_t, W_co) + b_o) h_t = o_t * act(c_t) y_t = sigma(theano.dot(h_t, W_hy) + b_y) return [h_t, c_t, y_t]
lr=lr ) # gparams = T.grad(cost, self.params) # updates = OrderedDict() # for param, gparam in zip(self.params, gparams): # updates[param] = param - gparam * lr # self.loss = theano.function(inputs = [X, Y], outputs = [cxe, mse, cost]) # self.train = theano.function(inputs = [X, Y], outputs = cost, updates=updates,allow_input_downcast=True) self.train = theano.function(inputs=[X, Y],outputs=cost,updates=optimizer.getUpdates(),allow_input_downcast=True) #self.train = theano.function(inputs = [X, Y], outputs = cost, updates=updates,allow_input_downcast=True) self.predictions = theano.function(inputs = [X], outputs = y_vals.dimshuffle(1,0,2),allow_input_downcast=True) self.debug = theano.function(inputs = [X, Y], outputs = [X.shape, Y.shape, y_vals.shape, cxe.shape]) (X_train,Y_train,X_test,Y_test)=du.laod_pose() batch_size=64 n_train_batches = len(X_train) n_train_batches /= batch_size n_test_batches = len(X_test) n_test_batches /= batch_size print "Number of batches: "+str(n_train_batches) print "Training size: "+str(len(X_train)) model = lstm(1024, 2, 54,batch_size=batch_size, single_output=False,output_activation=theano.tensor.nnet.sigmoid, cost_function='mse') print("Model loaded") lrate=0.0001, # Learning rate for sgd (not used for adadelta and rmsprop) nb_epochs=250
# self.train = theano.function(inputs = [X, Y], outputs = cost, updates=updates,allow_input_downcast=True) self.train = theano.function(inputs=[X, Y], outputs=cost, updates=optimizer.getUpdates(), allow_input_downcast=True) #self.train = theano.function(inputs = [X, Y], outputs = cost, updates=updates,allow_input_downcast=True) self.predictions = theano.function(inputs=[X], outputs=y_vals.dimshuffle(1, 0, 2), allow_input_downcast=True) self.debug = theano.function( inputs=[X, Y], outputs=[X.shape, Y.shape, y_vals.shape, cxe.shape]) (X_train, Y_train, X_test, Y_test) = du.laod_pose() batch_size = 64 n_train_batches = len(X_train) n_train_batches /= batch_size n_test_batches = len(X_test) n_test_batches /= batch_size print "Number of batches: " + str(n_train_batches) print "Training size: " + str(len(X_train)) model = lstm(1024, 2, 54, batch_size=batch_size, single_output=False,
from keras.models import Sequential from keras.layers.core import Dense, Dropout, Activation from keras.layers.embeddings import Embedding from keras.layers.recurrent import LSTM import helper.dt_utils as du import numpy maxlen = 1024 data = numpy.random.randint(1, 12, (5000, 12)) train_data = du.laod_pose() # build the model: 2 stacked LSTM print('Build model...') model = Sequential() model.add(LSTM(2, return_sequences=False, input_shape=(maxlen, 0))) model.add(Dropout(0.2)) model.add(Dense(54)) model.add(Activation('linear')) model.compile(loss='mean_squared_error', optimizer='rmsprop') train_data = du.laod_pose() i, o = train_data[1] model.fit(i, o, batch_size=1, nb_epoch=1)