def __init__(self): self.dt = 1 self.xdim = 1 self.udim = 1 self.x = T.fcol() self.u = T.fcol() self.xu_flat = T.flatten(T.concatenate([self.x, self.u])) self.xu = self.xu_flat.dimshuffle(0, 'x')
def test_ndim_mismatch(self): data = self.rng.rand(5).astype('float32') x = f32sc(data) y = tensor.fcol('y') cond = theano.tensor.iscalar('cond') self.assertRaises(TypeError, ifelse, cond, x, y) self.assertRaises(TypeError, ifelse, cond, y, x)
def test_theano_grad(self): quagga.processor_type = 'gpu' r = [] for i in xrange(self.N): for sparse in [True, False]: batch_size, dim = self.rng.random_integers(2000, size=2) if sparse: true_labels = np.zeros((batch_size, dim), np.float32) for k, j in enumerate(self.rng.randint(dim, size=batch_size)): true_labels[k, j] = 1.0 else: true_labels = self.rng.randint(dim, size=(batch_size, 1)).astype(np.int32) x = self.rng.randn(batch_size, dim).astype(np.float32) mask = (self.rng.rand(batch_size, 1) < 0.8).astype(np.float32) device_id = 0 for with_mask in [False, True]: # Theano model th_x = T.fmatrix() th_mask = T.fcol() th_true_labels = T.fmatrix() if sparse else T.ivector() if with_mask: probs = T.nnet.softmax(th_mask * th_x) else: probs = T.nnet.softmax(th_x) loss = T.mean(T.nnet.categorical_crossentropy(probs, th_true_labels)) if with_mask: get_theano_grads = theano.function([th_x, th_true_labels, th_mask], T.grad(loss, wrt=th_x)) th_dL_dx = get_theano_grads(x, true_labels if sparse else true_labels[:, 0], mask) else: get_theano_grads = theano.function([th_x, th_true_labels], T.grad(loss, wrt=th_x)) th_dL_dx = get_theano_grads(x, true_labels if sparse else true_labels[:, 0]) # quagga model x_gpu = Connector(Matrix.from_npa(x), device_id) true_labels_gpu = Connector(Matrix.from_npa(true_labels)) mask_gpu = Connector(Matrix.from_npa(mask)) if with_mask else None softmax_ce_block = SoftmaxCeBlock(x_gpu, true_labels_gpu, mask_gpu) x_gpu.fprop() true_labels_gpu.fprop() if with_mask: mask_gpu.fprop() softmax_ce_block.fprop() softmax_ce_block.bprop() q_dL_dx = x_gpu.backward_matrix.to_host() r.append(np.allclose(th_dL_dx, q_dL_dx)) self.assertEqual(sum(r), len(r))
def run_networklg(train_wins, train_wins_labels, test_wins, test_wins_labels, mode = "same_random", batch_size = None, weights = None): input_var = T.tensor3('inputs') target_var = T.fcol('targets') if (batch_size == None): batch_size = len(train_wins_labels) #print("Will create Lasagne network") netlg = create_networklg(input_var, sliding_window_length, mode = mode, weights = weights) #print("Will train network") validation_function = train_networklg(netlg, input_var, target_var, train_wins, train_wins_labels, batch_size) test_loss, predictions = test_networklg(netlg, input_var, target_var, validation_function, test_wins, test_wins_labels, batch_size) return test_loss, predictions
def main( num_epochs=1, n_songs_train=1, n_songs_val=1, n_songs_test=1, batch_size=256, learning_rate=1e-4 ): """ Main function """ # Theano config theano.config.floatX = 'float32' train, val, test = None, None, None try: train, val, test = use_preparsed_data( outputdir='/zap/tsob/audio/', ) except: train, val, test = get_data( n_songs_train=n_songs_train, n_songs_val=n_songs_val, n_songs_test=n_songs_test, outputdir='/zap/tsob/audio/', seed=None ) # Save the returned metadata np.savez('/zap/tsob/audio/metadata', train, val, test) # Print the dimensions print "Data dimensions:" for datapt in [train['Xshape'], train['yshape'], val['Xshape'], val['yshape'], test['Xshape'], test['yshape']]: print datapt # Parse dimensions n_train = train['yshape'][0] n_val = val['yshape'][0] n_test = test['yshape'][0] n_chan = train['Xshape'][1] n_feats = train['Xshape'][2] n_frames = train['Xshape'][3] print "n_train = {0}".format(n_train) print "n_val = {0}".format(n_val) print "n_test = {0}".format(n_test) print "n_chan = {0}".format(n_chan) print "n_feats = {0}".format(n_feats) print "n_frames = {0}".format(n_frames) # Prepare Theano variables for inputs and targets input_var = T.tensor4(name='inputs') target_var = T.fcol(name='targets') # Create neural network model (depending on first command line parameter) print("Building model and compiling functions..."), network = build_cnn(input_var) print("Done.") # Create a loss expression for training, i.e., a scalar objective we want to minimize prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.binary_hinge_loss(prediction, target_var) loss = loss.mean() # Create update expressions for training # Here, we'll use adam params = lasagne.layers.get_all_params( network, trainable=True ) updates = lasagne.updates.adam( loss, params, learning_rate=learning_rate, beta1=0.95, beta2=0.999, epsilon=1e-08 ) # Create a loss expression for validation/testing. # The crucial difference here is that we do a deterministic forward pass # through the network, disabling dropout layers. test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.binary_hinge_loss( test_prediction, target_var ) test_loss = test_loss.mean() test_pred_fn = theano.function( [input_var], test_prediction, allow_input_downcast=True ) # As a bonus, also create an expression for the classification accuracy: test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_fn = theano.function( [input_var, target_var], loss, updates=updates, mode=NanGuardMode( #TODO remove nan_is_error=True, inf_is_error=True, big_is_error=True #TODO remove ), #TODO remove allow_input_downcast=True ) # Compile a second function computing the validation loss and accuracy: val_fn = theano.function( [input_var, target_var], [test_loss, test_acc], allow_input_downcast=True ) # Finally, launch the training loop. print("Starting training...") train_error_hist = [] # We iterate over epochs: for epoch in range(num_epochs): # In each epoch, we do a full pass over the training data: train_err = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches( train, batch_size, shuffle=True ): inputs, targets = batch train_err_increment = train_fn(inputs, targets) train_err += train_err_increment train_error_hist.append(train_err_increment) train_batches += 1 # And a full pass over the validation data: val_err = 0 val_acc = 0 val_batches = 0 for batch in iterate_minibatches(val, batch_size, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) val_err += err val_acc += acc val_batches += 1 # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) print(" training loss:\t\t{:.8f}".format(train_err / train_batches)) print(" validation loss:\t\t{:.8f}".format(val_err / val_batches)) print(" validation accuracy:\t\t{:.2f} %".format( val_acc / val_batches * 100)) print("Done training.") # After training, we compute and print the test error: test_err = 0 test_acc = 0 test_batches = 0 test_predictions = [] for batch in iterate_minibatches(test, batch_size, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) test_predictions.append( test_pred_fn(inputs) ) test_err += err test_acc += acc test_batches += 1 print("Final results:") print(" test loss:\t\t\t{:.6f}".format(test_err / test_batches)) print(" test accuracy:\t\t{:.2f} %".format( test_acc / test_batches * 100)) # Optionally, you could now dump the network weights to a file like this: timestr = str(time.time()) np.savez('/zap/tsob/audio/model'+timestr+'.npz', *lasagne.layers.get_all_param_values(network)) np.save('/zap/tsob/audio/train_error_hist'+timestr+'.npy', train_error_hist) np.save('/zap/tsob/audio/test_predictions'+timestr+'.npy', test_predictions) print "Wrote model to {0}, test error histogram to {1}, and test predictions to {2}".format( 'model'+timestr+'.npz', 'train_error_hist'+timestr+'.npy', 'test_predictions'+timestr+'.npy' )
def buildModel(self): print(' -- Building...') x_init = sparse.csr_matrix('x', dtype='float32') y_init = T.imatrix('y') g_init = T.imatrix('g') ind_init = T.ivector('ind') sub_path_init = T.imatrix('subPathsBatch') mask_init = T.fmatrix('subMask') # step train x_input = lgl.InputLayer(shape=(None, self.x.shape[1]), input_var=x_init) g_input = lgl.InputLayer(shape=(None, 2), input_var=g_init) ind_input = lgl.InputLayer(shape=(None, ), input_var=ind_init) pair_second = lgl.SliceLayer(g_input, indices=1, axis=1) pair_first = lgl.SliceLayer(g_input, indices=0, axis=1) pair_first_emd = lgl.EmbeddingLayer(pair_first, input_size=self.num_ver, output_size=self.embedding_size) emd_to_numver = layers.DenseLayer( pair_first_emd, self.num_ver, nonlinearity=lg.nonlinearities.softmax) index_emd = lgl.EmbeddingLayer(ind_input, input_size=self.num_ver, output_size=self.embedding_size, W=pair_first_emd.W) x_to_ydim = layers.SparseLayer(x_input, self.y.shape[1], nonlinearity=lg.nonlinearities.softmax) index_emd = layers.DenseLayer(index_emd, self.y.shape[1], nonlinearity=lg.nonlinearities.softmax) concat_two = lgl.ConcatLayer([x_to_ydim, index_emd], axis=1) concat_two = layers.DenseLayer(concat_two, self.y.shape[1], nonlinearity=lg.nonlinearities.softmax) concat_two_output = lgl.get_output(concat_two) step_loss = lgo.categorical_crossentropy(concat_two_output, y_init).mean() hid_loss = lgl.get_output(x_to_ydim) step_loss += lgo.categorical_crossentropy(hid_loss, y_init).mean() emd_loss = lgl.get_output(index_emd) step_loss += lgo.categorical_crossentropy(emd_loss, y_init).mean() step_params = [ index_emd.W, index_emd.b, x_to_ydim.W, x_to_ydim.b, concat_two.W, concat_two.b ] step_updates = lg.updates.sgd(step_loss, step_params, learning_rate=self.step_learning_rate) self.step_train = theano.function([x_init, y_init, ind_init], step_loss, updates=step_updates, on_unused_input='ignore') self.test_fn = theano.function([x_init, ind_init], concat_two_output, on_unused_input='ignore') # supervised train fc_output = lgl.get_output(emd_to_numver) pair_second_output = lgl.get_output(pair_second) sup_loss = lgo.categorical_crossentropy(fc_output, pair_second_output).sum() sup_params = lgl.get_all_params(emd_to_numver, trainable=True) sup_updates = lg.updates.sgd(sup_loss, sup_params, learning_rate=self.sup_learning_rate) self.sup_train = theano.function([g_init], sup_loss, updates=sup_updates, on_unused_input='ignore') cross_entropy = lgo.categorical_crossentropy(fc_output, pair_second_output) cross_entropy = T.reshape(cross_entropy, (1, self.unsup_batch_size), ndim=None) mask_input = lgl.InputLayer(shape=(None, self.window_size + 1), input_var=mask_init) subPath_in = lgl.InputLayer(shape=(None, self.window_size + 1), input_var=sub_path_init) sub_path_emd = lgl.EmbeddingLayer(subPath_in, input_size=self.num_ver, output_size=self.embedding_size, W=pair_first_emd.W) lstm_layer = lgl.LSTMLayer(sub_path_emd, self.lstm_hidden_units, grad_clipping=3, mask_input=mask_input) # handle path weight max1 = T.mean(lgl.get_output(lstm_layer), axis=1) max2 = T.mean(max1, axis=1) max2_init = T.fcol('max2') max2_init = T.reshape(max2, ((self.subpath_num, 1))) max2_input = lgl.InputLayer(shape=(self.subpath_num, 1), input_var=max2_init) max2_input = lgl.BatchNormLayer(max2_input) path_weight = lgl.get_output(max2_input) path_weight = lg.nonlinearities.sigmoid(path_weight) path_weight = 1 + 0.3 * path_weight # unsupervised train reweight_loss = T.dot(cross_entropy, path_weight)[0][0] lstm_params_all = lgl.get_all_params(lstm_layer, trainable=True) lstm_params = list(set(lstm_params_all).difference(set(sup_params))) lstm_updates = lg.updates.sgd(reweight_loss, lstm_params, learning_rate=0.01) self.lstm_fn = theano.function([sub_path_init, g_init, mask_init], reweight_loss, updates=lstm_updates, on_unused_input='ignore') alpha_updates = lg.updates.sgd(reweight_loss, sup_params, learning_rate=0.001) self.alpha_fn = theano.function([sub_path_init, g_init, mask_init], reweight_loss, updates=alpha_updates, on_unused_input='ignore') print(' -- Done!')
def main(num_epochs=100, num_points=1200, compute_flag='cpu'): # Arguments passed as string need to be converted to int num_epochs = int(num_epochs) num_points = int(num_points) # Define name of output files results_file_name = 'exp_' + str(num_epochs) + '_' + str( num_points) + '_' + compute_flag + '.csv' network_file_name = 'network_' + str(num_epochs) + '_' + str( num_points) + '_' + compute_flag print 'Saving file to: %s' % results_file_name print 'Number of points: %d ' % num_points print 'Compute Flag: %s ' % compute_flag save_file(results_file_name) Deep_learner = DCNN_network.DCNN_network() # Define the input tensor input_var = T.tensor4('inputs') # Define the output tensor (in this case it is a real value or reflectivity) if compute_flag == 'gpu3_softmax': output_var = T.ivector('targets') else: output_var = T.fcol('targets') # User input to decide which experiment to run, cpu runs were performed # to check if the network was working correctly if compute_flag == 'cpu': network, l_hidden1 = Deep_learner.build_CNN(input_var) elif compute_flag == 'cpu2': network, l_hidden1 = Deep_learner.build_CNN_2(input_var) elif compute_flag == 'cpu3': network, l_hidden1 = Deep_learner.build_CNN_3(input_var) elif compute_flag == 'gpu2': print('gpu2 experiment') network, l_hidden1 = Deep_learner.build_DCNN_2(input_var) elif compute_flag == 'gpu3': print('gpu3 experiment') network, l_hidden1 = Deep_learner.build_DCNN_3(input_var) elif compute_flag == 'deep': network, l_hidden1 = Deep_learner.build_DCNN_deep(input_var) elif compute_flag == 'gpu3_softmax': network, l_hidden1 = Deep_learner.build_DCNN_3_softmax(input_var) else: network, l_hidden1 = Deep_learner.build_DCNN(input_var) train_prediction = lasagne.layers.get_output(network) test_prediction = lasagne.layers.get_output(network) if compute_flag == 'gpu3_softmax': loss = lasagne.objectives.categorical_crossentropy( train_prediction, output_var) loss = loss.mean() else: # Define the mean square error objective function loss = T.mean( lasagne.objectives.squared_error(train_prediction, output_var)) test_loss = T.mean( lasagne.objectives.squared_error(test_prediction, output_var)) # Add a l1 regulerization on the fully connected dense layer l1_penalty = regularize_layer_params(l_hidden1, l1) loss = loss + l1_penalty test_loss = loss + l1_penalty params = lasagne.layers.get_all_params(network, trainable=True) updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=0.0000001, momentum=0.9) train_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), output_var), dtype=theano.config.floatX) # Define theano function which generates and compiles C code for the optimization problem train_fn = theano.function([input_var, output_var], [loss, train_acc], updates=updates) # test_fn = theano.function([input_var, output_var],test_loss, updates=updates) base_path = '/home/an67a/deep_nowcaster/data/dataset2/' training_set_list = os.listdir(base_path) training_set_list = filter(lambda x: x[-4:] == '.pkl' and 'val' not in x, training_set_list) validation_set_list = os.listdir(base_path) validation_set_list = filter(lambda x: x[-4:] == '.pkl' and 'val' in x, validation_set_list) experiment_start_time = time.time() # Load Data Set DataSet = [] print('Loading data set...') for file_name in training_set_list[:3]: print file_name temp_file = file(base_path + file_name, 'rb') X_train, Y_train = cPickle.load(temp_file) temp_file.close() Y_train = Y_train.reshape(-1, ).astype('uint8') DataSet.append((X_train, Y_train)) print('Start training...') for epoch in range(num_epochs): print('Epoch number : %d ' % epoch) train_err = 0 train_batches = 0 train_acc = 0 start_time = time.time() for data in DataSet: # for file_name in training_set_list: # print file_name # temp_file = file(base_path + file_name,'rb') # X_train,Y_train = cPickle.load(temp_file) # Y_train = Y_train.astype('uint8') # temp_file.close() for batch in iterate_minibatches(data[0], data[1], 1059, shuffle=False): inputs, targets = batch err, acc = train_fn(inputs, targets) train_err += err train_acc += acc train_batches += 1 print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs, time.time() - start_time)) print(" training loss:\t\t{:.6f}".format(train_err / train_batches)) print(" validation accuracy:\t\t{:.2f} %".format(train_acc / train_batches * 100)) append_file(results_file_name, epoch + 1, round(train_err / train_batches, 2), round((train_acc / train_batches) * 100, 2)) # Dump the network file every 100 epochs if (epoch + 1) % 100 == 0: print('creating network file') network_file = file( '/home/an67a/deep_nowcaster/output/' + network_file_name + '_' + str(epoch + 1) + '.pkl', 'wb') cPickle.dump(network, network_file, protocol=cPickle.HIGHEST_PROTOCOL) network_file.close() time_taken = round(time.time() - experiment_start_time, 2) print('The experiment took {:.3f}s'.format(time.time() - experiment_start_time)) append_file(results_file_name, 'The experiment took', time_taken, 0)
def buildModel(self): print(' -- Building...') x_init = sparse.csr_matrix('x', dtype='float32') y_init = T.imatrix('y') gx_init = sparse.csr_matrix('gx', dtype='float32') gy_init = T.ivector('gy') gz_init = T.vector('gz') mask_init = T.fmatrix('subMask') # step train x_input = lgl.InputLayer(shape=(None, self.x.shape[1]), input_var=x_init) x_to_label = layers.SparseLayer(x_input, self.y.shape[1], nonlinearity=lg.nonlinearities.softmax) x_to_emd = layers.SparseLayer(x_input, self.embedding_size) W = x_to_emd.W x_to_emd = layers.DenseLayer(x_to_emd, self.y.shape[1], nonlinearity=lg.nonlinearities.softmax) x_concat = lgl.ConcatLayer([x_to_label, x_to_emd], axis=1) x_concat = layers.DenseLayer(x_concat, self.y.shape[1], nonlinearity=lg.nonlinearities.softmax) pred = lgl.get_output(x_concat) step_loss = lgo.categorical_crossentropy(pred, y_init).mean() hid_loss = lgl.get_output(x_to_label) step_loss += lgo.categorical_crossentropy(hid_loss, y_init).mean() emd_loss = lgl.get_output(x_to_emd) step_loss += lgo.categorical_crossentropy(emd_loss, y_init).mean() step_params = lgl.get_all_params(x_concat) step_updates = lg.updates.sgd(step_loss, step_params, learning_rate=self.step_learning_rate) self.step_train = theano.function([x_init, y_init], step_loss, updates=step_updates) self.test_fn = theano.function([x_init], pred) # supervised train gx_input = lgl.InputLayer(shape=(None, self.x.shape[1]), input_var=gx_init) gx_to_emd = layers.SparseLayer(gx_input, self.embedding_size, W=W) gx_to_emd = lgl.DenseLayer(gx_to_emd, self.num_ver, nonlinearity=lg.nonlinearities.softmax) gx_pred = lgl.get_output(gx_to_emd) g_loss = lgo.categorical_crossentropy(gx_pred, gy_init).sum() sup_params = lgl.get_all_params(gx_to_emd) sup_updates = lg.updates.sgd(g_loss, sup_params, learning_rate=self.sup_learning_rate) self.sup_train = theano.function([gx_init, gy_init, gz_init], g_loss, updates=sup_updates, on_unused_input='ignore') # handle lstm input cross_entropy = lgo.categorical_crossentropy(gx_pred, gy_init) cross_entropy = T.reshape(cross_entropy, (1, self.subpath_num), ndim=None) mask_input = lgl.InputLayer(shape=(None, self.window_size + 1), input_var=mask_init) sub_path_batch1 = sparse.csr_matrix('x', dtype='float32') sub_path_input1 = lgl.InputLayer(shape=(None, self.x.shape[1]), input_var=sub_path_batch1) sub_path_batch2 = sparse.csr_matrix('x', dtype='float32') sub_path_input2 = lgl.InputLayer(shape=(None, self.x.shape[1]), input_var=sub_path_batch2) sub_path_batch3 = sparse.csr_matrix('x', dtype='float32') sub_path_input3 = lgl.InputLayer(shape=(None, self.x.shape[1]), input_var=sub_path_batch3) sub_path_batch4 = sparse.csr_matrix('x', dtype='float32') sub_path_input4 = lgl.InputLayer(shape=(None, self.x.shape[1]), input_var=sub_path_batch4) sub_path_emd1 = layers.SparseLayer(sub_path_input1, self.embedding_size, W=W) sub_path_emd1 = T.reshape(lgl.get_output(sub_path_emd1), (self.subpath_num, 1, self.embedding_size)) sub_path_emd2 = layers.SparseLayer(sub_path_input2, self.embedding_size, W=W) sub_path_emd2 = T.reshape(lgl.get_output(sub_path_emd2), (self.subpath_num, 1, self.embedding_size)) sub_path_emd3 = layers.SparseLayer(sub_path_input3, self.embedding_size, W=W) sub_path_emd3 = T.reshape(lgl.get_output(sub_path_emd3), (self.subpath_num, 1, self.embedding_size)) sub_path_emd4 = layers.SparseLayer(sub_path_input4, self.embedding_size, W=W) sub_path_emd4 = T.reshape(lgl.get_output(sub_path_emd4), (self.subpath_num, 1, self.embedding_size)) sub_path_concat = T.concatenate([sub_path_emd1, sub_path_emd2, sub_path_emd3, sub_path_emd4], axis=1) sub_path_concat_layer = lgl.InputLayer(shape=(None, self.window_size + 1, self.embedding_size), input_var=sub_path_concat) # lstm layer lstm_layer = lgl.LSTMLayer(sub_path_concat_layer, self.lstm_hidden_units, grad_clipping=3, mask_input=mask_input) # handle path weight max1 = T.mean(lgl.get_output(lstm_layer), axis=1) max2 = T.mean(max1, axis=1) max2_init = T.fcol('max2') max2_init = T.reshape(max2, ((self.subpath_num, 1))) max2_input = lgl.InputLayer(shape=(self.subpath_num, 1), input_var=max2_init) max2_input = lgl.BatchNormLayer(max2_input) path_weight = lgl.get_output(max2_input) path_weight = lg.nonlinearities.sigmoid(path_weight) path_weight = 1 + 0.3 * path_weight # unsupervised train reweight_loss = T.dot(cross_entropy, path_weight)[0][0] lstm_params = lgl.get_all_params(lstm_layer, trainable=True) lstm_updates = lg.updates.sgd(reweight_loss, lstm_params, learning_rate=0.01) self.lstm_fn = theano.function([gx_init, gy_init, gz_init, sub_path_batch1, sub_path_batch2, sub_path_batch3, sub_path_batch4, mask_init], reweight_loss, updates=lstm_updates, on_unused_input='ignore') alpha_updates = lg.updates.sgd(reweight_loss, sup_params, learning_rate=0.001) self.alpha_fn = theano.function([gx_init, gy_init, gz_init, sub_path_batch1, sub_path_batch2, sub_path_batch3, sub_path_batch4, mask_init], reweight_loss, updates=alpha_updates, on_unused_input='ignore') print(' -- Done!')
outputLayerSize)).astype("float32"), name="W2") b2 = theano.shared(np.zeros(outputLayerSize).astype("float32"), name="b2") #%% # Forward propogation X = T.matrix("X") z1 = T.dot(X, W1) + b1 a1 = T.nnet.sigmoid(z1) z2 = T.dot(a1, W2) + b2 # using ReLu improve the results a lot y_hat = T.nnet.relu(z2) forward = theano.function([X], y_hat) #%% # cost function, gradient and optimizer epsilon = 0.01 y = T.fcol("y") loss = 0.5 * ((y - y_hat)**2).sum() calloss = theano.function([X, y], loss) # gradinet dW1, dW2 = T.grad(loss, [W1, W2]) db1, db2 = T.grad(loss, [b1, b2]) # optimizer #%% train = theano.function(inputs=[X, y], outputs=[y_hat, loss], updates=[[W2, W2 - epsilon * dW2], [W1, W1 - epsilon * dW1], [b2, b2 - epsilon * db2], [b1, b1 - epsilon * db1]]) #%% cost = []
def conv_net(tr_block,val_block,num_epochs,exp_no,load_model_weights = False,model_file_name = ''): #------------------------------------------ # Model input_var_ipw = T.tensor4('inputs') input_var_refl = T.tensor4('inputs') target_var = T.fcol('targets') net,l1_hidden = build_DCNN_softmax_mod_special_refl(input_var_refl) # net,l1_hidden = build_2DCNN_softmax_special(input_var_ipw,input_var_refl) l2_penelty = regularize_layer_params(l1_hidden,l2) prediction = lasagne.layers.get_output(net) loss = lasagne.objectives.squared_error(prediction,target_var) # loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() + l2_penelty if load_model_weights: print 'Loading existing model parameters...' model_file_name = '../data/1CNNneural_network_refl_' + str(exp_no) + '_100.pkl' model_file = file(model_file_name,'rb') model_weights = pkl.load(model_file) model_file.close() lasagne.layers.set_all_param_values(net,model_weights) params = lasagne.layers.get_all_params(net, trainable=True) updates = lasagne.updates.adadelta(loss, params) # updates = lasagne.updates.nesterov_momentum( # loss, params, learning_rate=0.0001, momentum=0.9) test_prediction = lasagne.layers.get_output(net, deterministic=True) test_loss = lasagne.objectives.squared_error(test_prediction,target_var) # test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, # target_var) test_loss = test_loss.mean() # test_accuracy = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), # dtype=theano.config.floatX) # train_fn = theano.function([input_var_ipw,input_var_refl, target_var], loss, updates=updates) train_fn = theano.function([input_var_refl, target_var], loss, updates=updates) # val_fn = theano.function([input_var_ipw,input_var_refl, target_var], [test_loss]) val_fn = theano.function([input_var_refl, target_var], [test_loss]) #------------------------------------------ performance_metrics = {} base_path = '/project/uma_michael_zink/deep_nowcaster/data/TrainTest/points_regression/' #'../data/TrainTest/points/' point_files = filter(lambda x: x[-4:] == '.pkl',os.listdir(base_path)) val_indices = determine_indices(base_path + point_files[8],val_block) first_pass = True # val_batches = [] # X_train_full_data = [] for ep in range(num_epochs): performance_metrics[ep + 1] = [] print 'Train Model for epoch: %d'%(ep) print '-'*50 train_err = 0. train_batches = 0 if first_pass: print 'Loading the entire data to memory!!!!' X_train, Y_train, X_val, Y_val = load_data_to_memory(point_files,val_indices) first_pass = False # X_mean_list = [] # for ea_point in point_files[:2]: # temp_matrix = np.load(base_path + ea_point) # # Add this index for reflectivity features alone [:,4:,...] # X_train = np.vstack(map(lambda x: temp_matrix[x][1][:,4:,...],[i for i in range(len(temp_matrix)) if i not in val_indices])).astype('float') # Y_train = np.vstack(map(lambda x: temp_matrix[x][2],[i for i in range(len(temp_matrix)) if i not in val_indices])) # X_mean = X_train.mean(axis = 0) # X_train -= X_mean # if not first_pass: # X_val = np.vstack(map(lambda x: temp_matrix[x][1][:,4:,...],val_indices)) # Y_val = np.vstack(map(lambda x: temp_matrix[x][2],val_indices)) # val_batches.append((X_val,Y_val,X_mean)) # # over here we are going to kill 1 sample at random and divide the # # training examples to 4 batches # X_train_full_data.append((X_train,Y_train)) for X_,Y_ in iterate_minibatches(X_train,Y_train, 250, shuffle = True): # train_err += train_fn(X_[:,:4,...].astype('float32'),X_[:,4:,...].astype('float32'),Y_.reshape(-1,1).astype('float32')) train_err += train_fn(X_[:,4:,...].astype('float32'),Y_.reshape(-1,1).astype('float32')) train_batches += 1 print 'Number of batches %d '%train_batches print 'Training loss = %.6f'%(train_err/train_batches) val_loss = 0. val_batches_ctr = 0 # if not first_pass: # # x_batch = np.vstack(map(lambda x: x[0] ,val_batches)) # y_batch = np.vstack(map(lambda x: x[1],val_batches)) ## x_batch_means = np.vstack(map(lambda x: x[2],val_batches)) # del val_batches # x_batch -= X_mean for x_val_batch,y_val_batch in iterate_minibatches(X_val,Y_val,250,shuffle = True): # x_val_batch = x_val_batch.astype('float') # temp = val_fn(x_val_batch[:,:4,...].astype('float32'),x_val_batch[:,4:,...].astype('float32'),y_val_batch.reshape(-1,1).astype('float32')) temp = val_fn(x_val_batch[:,4:,...].astype('float32'),y_val_batch.reshape(-1,1).astype('float32')) # val_acc += temp[0] val_loss += temp[0] val_batches_ctr+=1 print 'Number of validation batches %d'%val_batches_ctr # print 'Validation accuracy for epoch %d = %.6f'%(ep,val_acc/val_batches_ctr) print 'Validation loss for epoch %d = %.6f'%(ep,val_loss/val_batches_ctr) performance_metrics[ep + 1].append({'val_loss': val_loss / val_batches_ctr, # 'val_acc' : val_acc / val_batches_ctr, 'train_loss' : train_err / train_batches}) if (ep+ 1) % 10 == 0: params = convert_gpu_cpu(net) network_file_name = '1CNN_0maxpool_2048neural_network_p20_mod_special_refl8_regression_adadelta' network_file = file('../output/'+ network_file_name + '_' + str(exp_no) +'_' + str(ep + 1) + '.pkl','wb') pkl.dump(params,network_file,protocol = pkl.HIGHEST_PROTOCOL) network_file.close() f1 = file('../output/performance_metrics_' + network_file_name + '_' + str(exp_no) + '.pkl','wb') pkl.dump(performance_metrics,f1,protocol = pkl.HIGHEST_PROTOCOL) f1.close()
def __init__(self, stateSize, actionSize, numFrames, batchSize, discount, rho, momentum, learningRate, rmsEpsilon, rng, updateRule, batchAccumulator, freezeInterval): self.stateSize = stateSize self.actionSize = actionSize self.numFrames = numFrames self.batchSize = batchSize self.discount = discount self.rho = rho self.momentum = momentum self.learningRate = learningRate self.rmsEpsilon = rmsEpsilon self.rng = rng self.updateRule = updateRule self.batchAccumulator = batchAccumulator self.freezeInterval = freezeInterval lasagne.random.set_rng(self.rng) self.updateCounter = 0 self.lOut = self.buildNetwork(self.stateSize, self.actionSize, self.numFrames, self.batchSize) if self.freezeInterval > 0: self.nextLOut = self.buildNetwork(self.stateSize, self.actionSize, self.numFrames, self.batchSize) self.resetQHat() states = T.ftensor3('states') nextStates = T.ftensor3('nextStates') rewards = T.fcol('rewards') actions = T.icol('actions') terminals = T.icol('terminals') # Shared variables for teaching from a minibatch of replayed # state transitions, each consisting of num_frames + 1 (due to # overlap) states, along with the chosen action and resulting # reward and termninal status. self.states_shared = theano.shared( numpy.zeros((self.batchSize, self.numFrames + 1, self.stateSize), dtype=theano.config.floatX)) self.rewards_shared = theano.shared(numpy.zeros( (self.batchSize, 1), dtype=theano.config.floatX), broadcastable=(False, True)) self.actions_shared = theano.shared(numpy.zeros((self.batchSize, 1), dtype='int32'), broadcastable=(False, True)) self.terminals_shared = theano.shared(numpy.zeros((self.batchSize, 1), dtype='int32'), broadcastable=(False, True)) # Shared variable for a single state, to calculate qVals self.state_shared = theano.shared( numpy.zeros((self.numFrames, self.stateSize), dtype=theano.config.floatX)) qVals = lasagne.layers.get_output(self.lOut, states) if self.freezeInterval > 0: nextQVals = lasagne.layers.get_output(self.nextLOut, nextStates) else: nextQVals = lasagne.layers.get_output(self.lOut, nextStates) nextQVals = theano.gradient.disconnected_grad(nextQVals) # Cast terminals to floatX terminalsX = terminals.astype(theano.config.floatX) # T.eq(a,b) returns a variable representing the nogical # EQuality (a==b) actionmask = T.eq( T.arange(self.actionSize).reshape((1, -1)), actions.reshape( (-1, 1))).astype(theano.config.floatX) target = (rewards + (T.ones_like(terminalsX) - terminalsX) * self.discount * T.max(nextQVals, axis=1, keepdims=True)) output = (qVals * actionmask).sum(axis=1).reshape((-1, 1)) diff = target - output # no if clip delta, since clip-delta=0 loss = (diff**2) if self.batchAccumulator == 'sum': loss = T.sum(loss) elif self.batchAccumulator == 'mean': loss = T.mean(loss) else: raise ValueError('Bad accumulator: {}'.format(batch_accumulator)) params = lasagne.layers.helper.get_all_params(self.lOut) train_givens = { states: self.states_shared[:, :-1], nextStates: self.states_shared[:, 1:], rewards: self.rewards_shared, actions: self.actions_shared, terminals: self.terminals_shared } if self.updateRule == 'rmsprop': updates = lasagne.updates.rmsprop(loss, params, self.learningRate, self.rho, self.rmsEpsilon) elif self.updateRule == 'deepmind_rmsprop': updates = deepmind_rmsprop(loss, params, self.learningRate, self.rho, self.rmsEpsilon) else: raise ValueError('Unrecognized update: {}'.format(updateRule)) if self.momentum > 0: updates = lasagne.updates.apply_momentum(updates, None, self.momentum) self._train = theano.function([], [loss], updates=updates, givens=train_givens) q_givens = { states: self.state_shared.reshape( (1, self.numFrames, self.stateSize)) } # self._q_vals=theano.function([],qVals[0], givens=q_givens) self._q_vals = theano.function([], qVals[0], givens=q_givens)
def causalNeuralNetwork(data_dim, hidden_dim, output_dim, parameters): X = T.fcol('X') Y = T.fcol('Y') global XWh, Xbh, hWY, hbY XWh = theano.shared(np.random.uniform(-np.sqrt(1. / data_dim), np.sqrt(1. / data_dim), (data_dim, hidden_dim)), name='XWh') Xbh = theano.shared(np.random.uniform(-np.sqrt(1. / hidden_dim), np.sqrt(1. / hidden_dim), (hidden_dim)), name='Xbh') hWY = theano.shared(np.random.uniform(-np.sqrt(1. / hidden_dim), np.sqrt(1. / hidden_dim), (hidden_dim, output_dim)), name='hWY') hbY = theano.shared(np.random.uniform(-np.sqrt(1. / output_dim), np.sqrt(1. / output_dim), (output_dim)), name='hbY') if (parameters['input_shift_to_tanh']): X = (X - .5) * 2. hidden = X.dot(XWh) if (parameters['use_bias']): hidden = hidden + Xbh if (parameters['hidden_activation'] == 1): hidden = T.tanh(hidden) elif (parameters['hidden_activation'] == 2): # ReLu doesn't work well with the intuition that negative values are also significant vs. interpretations as activations hidden = T.maximum(T.zeros_like(hidden), hidden) output = hidden.dot(hWY) if (parameters['use_bias']): output = output + hbY if (parameters['hidden_activation'] == 1): output = T.tanh(output) if (parameters['output_shift_to_prob']): output = (output / 2.) + .5 weights_sum = (T.sum(abs(XWh)) + T.sum(abs(Xbh)) + T.sum(abs(hWY)) + T.sum(abs(hbY))) factor = 2. causal = causal_loss(XWh, factor) + causal_loss(Xbh, factor) + causal_loss( hWY, factor) + causal_loss(hbY, factor) softOutput = T.minimum(T.ones_like(output) * 0.999999999999, output) softOutput = T.maximum(T.ones_like(output) * 0.000000000001, softOutput) # Predictions hidden_prediction = hidden > 0. if (parameters['loss_function'] == 0): loss = -T.sum(Y * T.log(softOutput) + (1. - Y) * (T.log(1. - softOutput))) elif (parameters['loss_function'] == 1): loss = T.mean(T.sqr(Y - output)) else: loss = -T.mean(Y * T.log(softOutput) + (1. - Y) * (T.log(1. - softOutput))) if (parameters['loss_weights_sum']): loss += weights_sum if (parameters['loss_causal_linear']): loss += causal if (parameters['use_bias']): var_list = [XWh, Xbh, hWY, hbY] else: var_list = [XWh, hWY] gradients = T.grad(loss, var_list) # updates = lasagne.updates.rmsprop(gradients, var_list, learning_rate=learning_rate); updates = lasagne.updates.nesterov_momentum(gradients, var_list, learning_rate=0.01) sgd = theano.function([X, Y], [loss], updates=updates) predict = theano.function([X], [output, weights_sum]) graph = theano.function([X], [output, hidden_prediction, hidden, weights_sum]) return sgd, predict, graph
def conv_net(tr_block, val_block, num_epochs, exp_no, load_model_weights=False, model_file_name=''): #------------------------------------------ # Model input_var_ipw = T.tensor4('inputs') input_var_refl = T.tensor4('inputs') target_var = T.fcol('targets') net, l1_hidden = build_DCNN_softmax_mod_special_refl(input_var_refl) # net,l1_hidden = build_2DCNN_softmax_special(input_var_ipw,input_var_refl) l2_penelty = regularize_layer_params(l1_hidden, l2) prediction = lasagne.layers.get_output(net) loss = lasagne.objectives.squared_error(prediction, target_var) # loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() + l2_penelty if load_model_weights: print 'Loading existing model parameters...' model_file_name = '../data/1CNNneural_network_refl_' + str( exp_no) + '_100.pkl' model_file = file(model_file_name, 'rb') model_weights = pkl.load(model_file) model_file.close() lasagne.layers.set_all_param_values(net, model_weights) params = lasagne.layers.get_all_params(net, trainable=True) updates = lasagne.updates.adadelta(loss, params) # updates = lasagne.updates.nesterov_momentum( # loss, params, learning_rate=0.0001, momentum=0.9) test_prediction = lasagne.layers.get_output(net, deterministic=True) test_loss = lasagne.objectives.squared_error(test_prediction, target_var) # test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, # target_var) test_loss = test_loss.mean() # test_accuracy = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), # dtype=theano.config.floatX) # train_fn = theano.function([input_var_ipw,input_var_refl, target_var], loss, updates=updates) train_fn = theano.function([input_var_refl, target_var], loss, updates=updates) # val_fn = theano.function([input_var_ipw,input_var_refl, target_var], [test_loss]) val_fn = theano.function([input_var_refl, target_var], [test_loss]) #------------------------------------------ performance_metrics = {} base_path = '/project/uma_michael_zink/deep_nowcaster/data/TrainTest/points_regression/' #'../data/TrainTest/points/' point_files = filter(lambda x: x[-4:] == '.pkl', os.listdir(base_path)) val_indices = determine_indices(base_path + point_files[8], val_block) first_pass = True # val_batches = [] # X_train_full_data = [] for ep in range(num_epochs): performance_metrics[ep + 1] = [] print 'Train Model for epoch: %d' % (ep) print '-' * 50 train_err = 0. train_batches = 0 if first_pass: print 'Loading the entire data to memory!!!!' X_train, Y_train, X_val, Y_val = load_data_to_memory( point_files, val_indices) first_pass = False # X_mean_list = [] # for ea_point in point_files[:2]: # temp_matrix = np.load(base_path + ea_point) # # Add this index for reflectivity features alone [:,4:,...] # X_train = np.vstack(map(lambda x: temp_matrix[x][1][:,4:,...],[i for i in range(len(temp_matrix)) if i not in val_indices])).astype('float') # Y_train = np.vstack(map(lambda x: temp_matrix[x][2],[i for i in range(len(temp_matrix)) if i not in val_indices])) # X_mean = X_train.mean(axis = 0) # X_train -= X_mean # if not first_pass: # X_val = np.vstack(map(lambda x: temp_matrix[x][1][:,4:,...],val_indices)) # Y_val = np.vstack(map(lambda x: temp_matrix[x][2],val_indices)) # val_batches.append((X_val,Y_val,X_mean)) # # over here we are going to kill 1 sample at random and divide the # # training examples to 4 batches # X_train_full_data.append((X_train,Y_train)) for X_, Y_ in iterate_minibatches(X_train, Y_train, 250, shuffle=True): # train_err += train_fn(X_[:,:4,...].astype('float32'),X_[:,4:,...].astype('float32'),Y_.reshape(-1,1).astype('float32')) train_err += train_fn(X_[:, 4:, ...].astype('float32'), Y_.reshape(-1, 1).astype('float32')) train_batches += 1 print 'Number of batches %d ' % train_batches print 'Training loss = %.6f' % (train_err / train_batches) val_loss = 0. val_batches_ctr = 0 # if not first_pass: # # x_batch = np.vstack(map(lambda x: x[0] ,val_batches)) # y_batch = np.vstack(map(lambda x: x[1],val_batches)) ## x_batch_means = np.vstack(map(lambda x: x[2],val_batches)) # del val_batches # x_batch -= X_mean for x_val_batch, y_val_batch in iterate_minibatches(X_val, Y_val, 250, shuffle=True): # x_val_batch = x_val_batch.astype('float') # temp = val_fn(x_val_batch[:,:4,...].astype('float32'),x_val_batch[:,4:,...].astype('float32'),y_val_batch.reshape(-1,1).astype('float32')) temp = val_fn(x_val_batch[:, 4:, ...].astype('float32'), y_val_batch.reshape(-1, 1).astype('float32')) # val_acc += temp[0] val_loss += temp[0] val_batches_ctr += 1 print 'Number of validation batches %d' % val_batches_ctr # print 'Validation accuracy for epoch %d = %.6f'%(ep,val_acc/val_batches_ctr) print 'Validation loss for epoch %d = %.6f' % (ep, val_loss / val_batches_ctr) performance_metrics[ep + 1].append({ 'val_loss': val_loss / val_batches_ctr, # 'val_acc' : val_acc / val_batches_ctr, 'train_loss': train_err / train_batches }) if (ep + 1) % 10 == 0: params = convert_gpu_cpu(net) network_file_name = '1CNN_0maxpool_2048neural_network_p20_mod_special_refl8_regression_adadelta' network_file = file( '../output/' + network_file_name + '_' + str(exp_no) + '_' + str(ep + 1) + '.pkl', 'wb') pkl.dump(params, network_file, protocol=pkl.HIGHEST_PROTOCOL) network_file.close() f1 = file( '../output/performance_metrics_' + network_file_name + '_' + str(exp_no) + '.pkl', 'wb') pkl.dump(performance_metrics, f1, protocol=pkl.HIGHEST_PROTOCOL) f1.close()
def __init__(self,x_train,dim_z=10,batch_size = 10,filter_no = [5.,5.,5.],filter_l = [10.,10.,10.], pooling_d=3,pooling_s=2,learning_rate = 0.0008,dim_y=None,y_train=None,diff=None,magic=5000): ####################################### SETTINGS ################################### self.x_train = x_train self.y_train = y_train if y_train !=None: self.dim_y = dim_y self.diff=diff self.batch_size = batch_size self.learning_rate = theano.shared(np.float32(learning_rate)) self.performance = {"train":[]} self.inpt = T.ftensor4(name='input') self.Y = T.fcol(name= 'label') self.df = T.fmatrix(name='differential') self.dim_z = dim_z self.magic =magic self.pooling_d = pooling_d self.pooling_s = pooling_s self.generative_z = theano.shared(np.float32(np.zeros([1,dim_z]))) self.generative_hid = theano.shared(np.float32(np.zeros([1,magic]))) self.activation =relu self.out_distribution=False self.in_filters = filter_l self.filter_lengths = filter_no self.params = [] self.d_o_prob = theano.shared(np.float32(0.0)) ####################################### LAYERS ###################################### # LAYER 1 ############################## self.conv1 = one_d_conv_layer(self.inpt,self.in_filters[0],1,self.filter_lengths[0],param_names = ["W1",'b1']) self.params+=self.conv1.params self.bn1 = batchnorm(self.conv1.output) self.nl1 = self.activation(self.bn1.X) self.maxpool1 = ds.max_pool_2d(self.nl1,[self.pooling_d,1],st=[self.pooling_s,1],ignore_border = False).astype(theano.config.floatX) self.layer1_out = dropout(self.maxpool1,self.d_o_prob) self.flattened = T.flatten(self.layer1_out,outdim = 2) # Conditional +variational layer layer ##################### if y_train != None: self.c_enc =hidden_layer(self.Y,1,self.dim_y) self.c_dec = hidden_layer(self.Y,1,self.dim_y,param_names = ["W10",'b10']) self.params+=self.c_enc.params self.params+=self.c_dec.params self.c_nl = self.activation(self.c_enc.output) self.c_nl_dec = self.activation(self.c_dec.output) self.concatenated = T.concatenate((self.flattened,self.c_nl),axis = 1) self.latent_layer = variational_gauss_layer(self.concatenated,self.magic+self.dim_y,dim_z) else: self.latent_layer = variational_gauss_layer(self.flattened,self.magic,dim_z) self.params+=self.latent_layer.params self.latent_out = self.latent_layer.output # Hidden Layer ######################### if y_train!= None: self.dec_concat = T.concatenate((self.latent_out,self.c_nl_dec),axis = 1) self.hidden_layer = hidden_layer(self.dec_concat,self.dim_z+self.dim_y,self.magic) else: self.hidden_layer = hidden_layer(self.latent_out,dim_z,self.magic) self.params+=self.hidden_layer.params self.hid_out = dropout(self.activation(self.hidden_layer.output).reshape((self.inpt.shape[0],self.in_filters[-1],int(self.magic/self.in_filters[-1]),1)),self.d_o_prob) # Devonvolutional 1 ###################### self.deconv1 = one_d_deconv_layer(self.hid_out,1,self.in_filters[2],self.filter_lengths[2],pool=self.pooling_d,param_names = ["W3",'b3'],distribution=False) self.params+=self.deconv1.params #self.nl_deconv1 = dropout(self.activation(self.deconv1.output),self.dropout_symbolic) self.tanh_out = self.deconv1.output self.last_layer = self.deconv1 if self.out_distribution==True: self.trunk_sigma = self.last_layer.log_sigma[:,:,:self.inpt.shape[2],:] self.trunc_output = self.tanh_out[:,:,:self.inpt.shape[2],:] self.cost = self.MSE() self.mse = self.MSE() #self.likelihood = self.log_px_z() #self.get_cost = theano.function([self.inpt],[self.cost,self.mse]) #self.get_likelihood = theano.function([self.layer1.inpt],[self.likelihood]) self.derivatives = T.grad(self.cost,self.params) #self.get_gradients = theano.function([self.inpt],self.derivatives) self.updates =adam(self.params,self.derivatives,self.learning_rate) ################################### FUNCTIONS ###################################################### #self.prior_debug = theano.function([self.inpt],[self.latent_out,self.latent_layer.mu_encoder,self.latent_layer.log_sigma_encoder,self.latent_layer.prior]) #self.get_prior = theano.function([self.inpt],self.latent_layer.prior) #self.convolve1 = theano.function([self.inpt],self.layer1_out) #self.convolve2 = theano.function([self.inpt],self.layer2_out) #self.deconvolve1 = theano.function([self.inpt],self.deconv1.output) #self.deconvolve2 = theano.function([self.inpt],self.deconv2.output) #self.sig_out = theano.function([self.inpt],T.flatten(self.trunk_sigma,outdim=2)) #self.output = theano.function([self.inpt],self.trunc_output,givens=[[self.dropout_symbolic,self.dropout_prob]]) #self.generate_from_z = theano.function([self.inpt],self.trunc_output,givens = [[self.latent_out,self.generative_z]]) #self.get_cost = theano.function([self.inpt],[self.cost,self.mse]) #self.get_likelihood = theano.function([self.layer1.inpt],[self.likelihood]) #self.get_gradients = theano.function([self.inpt],self.derivatives) self.generate_from_hid = theano.function([self.inpt],self.trunc_output,givens = [[self.hidden_layer.output,self.generative_hid]]) self.get_flattened = theano.function([self.inpt],self.flattened) if self.y_train!=None: self.generate_from_z = theano.function([self.inpt,self.Y],self.trunc_output,givens = [[self.latent_out,self.generative_z]]) self.train_model = theano.function(inputs = [self.inpt,self.df,self.Y],outputs = self.cost,updates = self.updates) self.get_latent_states = theano.function([self.inpt,self.Y],self.latent_out) self.get_c_enc = theano.function([self.Y],self.c_enc.output) self.output = theano.function([self.inpt,self.Y],self.trunc_output) self.get_concat = theano.function([self.inpt,self.Y],self.concatenated) else: self.generate_from_z = theano.function([self.inpt],self.trunc_output,givens = [[self.latent_out,self.generative_z]]) self.train_model = theano.function(inputs = [self.inpt,self.df],outputs = self.cost,updates = self.updates) self.output = theano.function([self.inpt],self.trunc_output) self.get_latent_states = theano.function([self.inpt],self.latent_out)
def main(num_epochs=1, n_songs_train=1, n_songs_val=1, n_songs_test=1, batch_size=256, learning_rate=1e-4): """ Main function """ # Theano config theano.config.floatX = 'float32' train, val, test = None, None, None try: train, val, test = use_preparsed_data(outputdir='/zap/tsob/audio/', ) except: train, val, test = get_data(n_songs_train=n_songs_train, n_songs_val=n_songs_val, n_songs_test=n_songs_test, outputdir='/zap/tsob/audio/', seed=None) # Save the returned metadata np.savez('/zap/tsob/audio/metadata', train, val, test) # Print the dimensions print "Data dimensions:" for datapt in [ train['Xshape'], train['yshape'], val['Xshape'], val['yshape'], test['Xshape'], test['yshape'] ]: print datapt # Parse dimensions n_train = train['yshape'][0] n_val = val['yshape'][0] n_test = test['yshape'][0] n_chan = train['Xshape'][1] n_feats = train['Xshape'][2] n_frames = train['Xshape'][3] print "n_train = {0}".format(n_train) print "n_val = {0}".format(n_val) print "n_test = {0}".format(n_test) print "n_chan = {0}".format(n_chan) print "n_feats = {0}".format(n_feats) print "n_frames = {0}".format(n_frames) # Prepare Theano variables for inputs and targets input_var = T.tensor4(name='inputs') target_var = T.fcol(name='targets') # Create neural network model (depending on first command line parameter) print("Building model and compiling functions..."), network = build_cnn(input_var) print("Done.") # Create a loss expression for training, i.e., a scalar objective we want to minimize prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.binary_hinge_loss(prediction, target_var) loss = loss.mean() # Create update expressions for training # Here, we'll use adam params = lasagne.layers.get_all_params(network, trainable=True) updates = lasagne.updates.adam(loss, params, learning_rate=learning_rate, beta1=0.95, beta2=0.999, epsilon=1e-08) # Create a loss expression for validation/testing. # The crucial difference here is that we do a deterministic forward pass # through the network, disabling dropout layers. test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.binary_hinge_loss(test_prediction, target_var) test_loss = test_loss.mean() test_pred_fn = theano.function([input_var], test_prediction, allow_input_downcast=True) # As a bonus, also create an expression for the classification accuracy: test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_fn = theano.function( [input_var, target_var], loss, updates=updates, mode=NanGuardMode( #TODO remove nan_is_error=True, inf_is_error=True, big_is_error=True #TODO remove ), #TODO remove allow_input_downcast=True) # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input_var, target_var], [test_loss, test_acc], allow_input_downcast=True) # Finally, launch the training loop. print("Starting training...") train_error_hist = [] # We iterate over epochs: for epoch in range(num_epochs): # In each epoch, we do a full pass over the training data: train_err = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(train, batch_size, shuffle=True): inputs, targets = batch train_err_increment = train_fn(inputs, targets) train_err += train_err_increment train_error_hist.append(train_err_increment) train_batches += 1 # And a full pass over the validation data: val_err = 0 val_acc = 0 val_batches = 0 for batch in iterate_minibatches(val, batch_size, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) val_err += err val_acc += acc val_batches += 1 # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs, time.time() - start_time)) print(" training loss:\t\t{:.8f}".format(train_err / train_batches)) print(" validation loss:\t\t{:.8f}".format(val_err / val_batches)) print(" validation accuracy:\t\t{:.2f} %".format(val_acc / val_batches * 100)) print("Done training.") # After training, we compute and print the test error: test_err = 0 test_acc = 0 test_batches = 0 test_predictions = [] for batch in iterate_minibatches(test, batch_size, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) test_predictions.append(test_pred_fn(inputs)) test_err += err test_acc += acc test_batches += 1 print("Final results:") print(" test loss:\t\t\t{:.6f}".format(test_err / test_batches)) print(" test accuracy:\t\t{:.2f} %".format(test_acc / test_batches * 100)) # Optionally, you could now dump the network weights to a file like this: timestr = str(time.time()) np.savez('/zap/tsob/audio/model' + timestr + '.npz', *lasagne.layers.get_all_param_values(network)) np.save('/zap/tsob/audio/train_error_hist' + timestr + '.npy', train_error_hist) np.save('/zap/tsob/audio/test_predictions' + timestr + '.npy', test_predictions) print "Wrote model to {0}, test error histogram to {1}, and test predictions to {2}".format( 'model' + timestr + '.npz', 'train_error_hist' + timestr + '.npy', 'test_predictions' + timestr + '.npy')
def main(num_epochs = 100,num_points = 1200,compute_flag='cpu'): # Arguments passed as string need to be converted to int num_epochs = int(num_epochs) num_points = int(num_points) # Define name of output files results_file_name = 'exp_' + str(num_epochs) + '_' + str(num_points) + '_' + compute_flag + '.csv' network_file_name = 'network_' + str(num_epochs) + '_' + str(num_points) + '_' + compute_flag print 'Saving file to: %s' % results_file_name print 'Number of points: %d ' % num_points print 'Compute Flag: %s ' % compute_flag save_file(results_file_name) Deep_learner = DCNN_network.DCNN_network() # Define the input tensor input_var = T.tensor4('inputs') # Define the output tensor (in this case it is a real value or reflectivity) if compute_flag == 'gpu3_softmax': output_var = T.ivector('targets') else: output_var = T.fcol('targets') # User input to decide which experiment to run, cpu runs were performed # to check if the network was working correctly if compute_flag =='cpu': network,l_hidden1 = Deep_learner.build_CNN(input_var) elif compute_flag == 'cpu2': network,l_hidden1 = Deep_learner.build_CNN_2(input_var) elif compute_flag == 'cpu3': network,l_hidden1 = Deep_learner.build_CNN_3(input_var) elif compute_flag == 'gpu2': print('gpu2 experiment') network,l_hidden1 = Deep_learner.build_DCNN_2(input_var) elif compute_flag == 'gpu3': print('gpu3 experiment') network,l_hidden1 = Deep_learner.build_DCNN_3(input_var) elif compute_flag == 'deep': network,l_hidden1 = Deep_learner.build_DCNN_deep(input_var) elif compute_flag == 'gpu3_softmax': network,l_hidden1 = Deep_learner.build_DCNN_3_softmax(input_var) else: network,l_hidden1 = Deep_learner.build_DCNN(input_var) train_prediction = lasagne.layers.get_output(network) test_prediction = lasagne.layers.get_output(network) if compute_flag == 'gpu3_softmax': loss = lasagne.objectives.categorical_crossentropy(train_prediction, output_var) loss = loss.mean() else: # Define the mean square error objective function loss = T.mean(lasagne.objectives.squared_error(train_prediction,output_var)) test_loss = T.mean(lasagne.objectives.squared_error(test_prediction,output_var)) # Add a l1 regulerization on the fully connected dense layer l1_penalty = regularize_layer_params(l_hidden1, l1) loss = loss + l1_penalty test_loss = loss + l1_penalty params = lasagne.layers.get_all_params(network, trainable=True) updates = lasagne.updates.nesterov_momentum( loss, params, learning_rate=0.0000001, momentum=0.9) train_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), output_var), dtype=theano.config.floatX) # Define theano function which generates and compiles C code for the optimization problem train_fn = theano.function([input_var, output_var], [loss,train_acc], updates=updates) # test_fn = theano.function([input_var, output_var],test_loss, updates=updates) base_path = '/home/an67a/deep_nowcaster/data/dataset2/' training_set_list = os.listdir(base_path) training_set_list = filter(lambda x: x[-4:] == '.pkl' and 'val' not in x,training_set_list) validation_set_list = os.listdir(base_path) validation_set_list = filter(lambda x: x[-4:] == '.pkl' and 'val' in x,validation_set_list) experiment_start_time = time.time() # Load Data Set DataSet = [] print('Loading data set...') for file_name in training_set_list[:3]: print file_name temp_file = file(base_path + file_name,'rb') X_train,Y_train = cPickle.load(temp_file) temp_file.close() Y_train = Y_train.reshape(-1,).astype('uint8') DataSet.append((X_train,Y_train)) print('Start training...') for epoch in range(num_epochs): print('Epoch number : %d '%epoch) train_err = 0 train_batches = 0 train_acc = 0 start_time = time.time() for data in DataSet: # for file_name in training_set_list: # print file_name # temp_file = file(base_path + file_name,'rb') # X_train,Y_train = cPickle.load(temp_file) # Y_train = Y_train.astype('uint8') # temp_file.close() for batch in iterate_minibatches(data[0], data[1], 1059, shuffle=False): inputs, targets = batch err,acc = train_fn(inputs, targets) train_err += err train_acc += acc train_batches += 1 print("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) print(" training loss:\t\t{:.6f}".format(train_err / train_batches)) print(" validation accuracy:\t\t{:.2f} %".format( train_acc / train_batches * 100)) append_file(results_file_name,epoch + 1,round(train_err / train_batches,2),round((train_acc / train_batches) * 100,2)) # Dump the network file every 100 epochs if (epoch + 1) % 100 == 0: print('creating network file') network_file = file('/home/an67a/deep_nowcaster/output/'+ network_file_name + '_' + str(epoch + 1) + '.pkl','wb') cPickle.dump(network,network_file,protocol = cPickle.HIGHEST_PROTOCOL) network_file.close() time_taken = round(time.time() - experiment_start_time,2) print('The experiment took {:.3f}s'.format(time.time() - experiment_start_time)) append_file(results_file_name,'The experiment took',time_taken,0)
def UnitTest_OnestepAttend(): N = 2 #number of sample D = 5 #dimension of input H = 4 #dimension of hidden T_new = 1 #length of per each sample context_dim = 3 K = 5 x = np.linspace(-0.4, 0.6, num=N*T_new*D, dtype = theano.config.floatX).reshape(T_new, N, D) h0= np.linspace(-0.4, 0.8, num=N*H, dtype = theano.config.floatX).reshape(N, H) Wx= np.linspace(-0.2, 0.9, num=4*D*H, dtype = theano.config.floatX).reshape(D, 4*H) Wh= np.linspace(-0.3,0.6, num =4*H*H, dtype = theano.config.floatX).reshape(H,4*H) b = np.linspace(0.0, 0.0, num = 4*H, dtype = theano.config.floatX) Wz= np.linspace(-0.3, 0.6, num=4*H*context_dim, dtype = theano.config.floatX).reshape(context_dim, 4*H) Hcontext = np.linspace(-0.2, 0.6, num=H*K, dtype = theano.config.floatX).reshape(H, K) Zcontext = np.linspace(-0.2, 0.5, num=context_dim*K, dtype= theano.config.floatX).reshape(context_dim, K) Va= np.linspace(0.1, 0.4, num=K, dtype = theano.config.floatX) Va_reshape = Va.reshape(K,1) image_feature_3D = np.linspace(-0.2, 0.5, num=10*N*context_dim, dtype = theano.config.floatX).reshape(N,10, context_dim) h0_theano = h0.reshape(1, N, H) # h0_symb = theano.tensor.ftensor3("h_symb") # lstm_theano_layer.h_m1.set_value(h0_theano) c0_theano = np.zeros((1, N, H), dtype = theano.config.floatX) # c0_symb = theano.tensor.ftensor3("c_symb") # lstm_theano_layer.c_m1.set_value(c0_theano) z0_theano = np.zeros((1, N, context_dim), dtype = theano.config.floatX) x_theano = x.reshape(T_new, N, D, 1) image_feature_input = image_feature_3D weight_y_in_value = np.zeros(( 10, context_dim) , dtype= theano.config.floatX) b_theano= b.reshape(1, 1, 4*H) pdb.set_trace() #symbolic variables initial_h0_layer_out = theano.tensor.tensor3(name = 'h0_initial', dtype = theano.config.floatX) initial_c0_layer_out = theano.tensor.tensor3(name = 'c0_initial', dtype = theano.config.floatX) initial_z0 = T.tensor3(name= 'z0_initial', dtype = theano.config.floatX) weight_y_in = theano.tensor.fmatrix("weight_y") input_data = theano.tensor.tensor3(name ='x', dtype=theano.config.floatX) image_feature_region = theano.tensor.tensor3(name = 'feature_region', dtype = theano.config.floatX) Wi_sym, Wf_sym, Wc_sym, Wo_sym, Ui_sym, Uf_sym, Uc_sym, Uo_sym, Zi_sym, Zf_sym, Zc_sym, Zo_sym = T.fmatrices(12) Zcontext_sym, Hcontext_sym = T.fmatrices(2) bi = T.ftensor3("bi") bf = T.ftensor3("bf") bc = T.ftensor3("bc") bo = T.ftensor3("bo") Va_sym = T.fcol("Va") out_sym = onestep_attend_tell(input_data, initial_h0_layer_out, initial_c0_layer_out, initial_z0, Wi_sym, Wf_sym, Wc_sym, Wo_sym, Ui_sym, Uf_sym, Uc_sym, Uo_sym, Zi_sym, Zf_sym, Zc_sym, Zo_sym, Zcontext_sym, Hcontext_sym, Va_sym, bi, bf, bc, bo, image_feature_region, weight_y_in) onestep_func = theano.function([input_data, initial_h0_layer_out, initial_c0_layer_out, initial_z0, Wi_sym, Wf_sym, Wc_sym, Wo_sym, Ui_sym, Uf_sym, Uc_sym, Uo_sym, Zi_sym, Zf_sym, Zc_sym, Zo_sym, Zcontext_sym, Hcontext_sym, Va_sym, bi, bf, bc, bo, image_feature_region, weight_y_in], out_sym) list_output = onestep_func(x, h0_theano, c0_theano, z0_theano, Wx[:, :H], Wx[:, H:2*H], Wx[:, 2*H:3*H], Wx[:, 3*H:], Wh[:, :H], Wh[:, H:2*H], Wh[:, 2*H:3*H], Wh[:, 3*H:], Wz[:, :H], Wz[:, H:2*H], Wz[:, 2*H:3*H], Wz[:, 3*H:], Zcontext,Hcontext, Va_reshape, b_theano[:,: , :H], b_theano[:, :, H:2*H], b_theano[:, :, 2*H:3*H], b_theano[:, :, 3*H:], image_feature_input, weight_y_in_value) pdb.set_trace() print(list_output[0].shape) print(list_output[1].shape) print(list_output[2].shape) pdb.set_trace()