def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000, dataset='/Users/gabrielsynnaeve/postdoc/htk_mfc/TIMIT/', batch_size=600): """ Demonstrate stochastic gradient descent optimization of a log-linear model :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: the path of the MNIST dataset file from http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz """ datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels # construct the logistic regression class classifier = LogisticRegression(input=x, n_in=39 * 11, n_out=40) # the cost we minimize during training is the negative log likelihood of # the model in symbolic format cost = classifier.negative_log_likelihood(y) # compiling a Theano function that computes the mistakes that are made by # the model on a minibatch test_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size]}) validate_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size]}) # compute the gradient of cost with respect to theta = (W,b) g_W = T.grad(cost=cost, wrt=classifier.W) g_b = T.grad(cost=cost, wrt=classifier.b) # specify how to update the parameters of the model as a dictionary updates = {classifier.W: classifier.W - learning_rate * g_W, classifier.b: classifier.b - learning_rate * g_b} # compiling a Theano function `train_model` that returns the cost, but in # the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function(inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size]}) ############### # TRAIN MODEL # ############### print '... training the model' # early-stopping parameters patience = 5000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = epoch * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # test it on the test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of best' ' model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print 'The code run for %d epochs, with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time)))
def test_DBN(finetune_lr=0.01, pretraining_epochs=0, pretrain_lr=0.01, k=1, training_epochs=500, # TODO 100+ dataset=DATASET, batch_size=100): """ :type learning_rate: float :param learning_rate: learning rate used in the finetune stage :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training :type k: int :param k: number of Gibbs steps in CD/PCD :type training_epochs: int :param training_epochs: maximal number of iterations ot run the optimizer :type dataset: string :param dataset: path the the pickled dataset :type batch_size: int :param batch_size: the size of a minibatch """ print "loading dataset from", dataset datasets = load_data(dataset, nframes=1, features='fbank', scaling='student', cv_frac='fixed', speakers=True, numpy_array_only=True) train_set_x, train_set_y = datasets[0] # if speakers, do test/test/test valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] print "dataset loaded!" print "train set size", train_set_x.shape[0] print "validation set size", valid_set_x.shape[0] print "test set size", test_set_x.shape[0] print "#spkrs in train", len(set(train_set_y[1])) print "#spkrs in valid", len(set(valid_set_y[1])) print "#spkrs in test", len(set(test_set_y[1])) # test_set_y = test_set_y[0], test_set_y[1] - test_set_y[1].min() # print "checking that both y_phn and y_spkr are 0-indexed (for the Softmax)" # print "y_phn min:", test_set_y[0].min(), # print "y_spkr min:", test_set_y[1].min() # assert test_set_y[0].min() == 0 # assert test_set_y[1].min() == 0 to_int = {} with open('timit_to_int_and_to_state_dicts_tuple.pickle') as f: # TODO to_int, _ = cPickle.load(f) train_set_iterator = DatasetSentencesIteratorPhnSpkr(train_set_x, train_set_y, to_int, N_FRAMES) valid_set_iterator = DatasetSentencesIteratorPhnSpkr(valid_set_x, valid_set_y, to_int, N_FRAMES) test_set_iterator = DatasetSentencesIteratorPhnSpkr(test_set_x, test_set_y, to_int, N_FRAMES) # numpy random generator numpy_rng = numpy.random.RandomState(123) print '... building the model' # construct the Deep Belief Network dbn = DBN(numpy_rng=numpy_rng, n_ins=N_FRAMES * N_FEATURES, hidden_layers_sizes=[2048, 2048, 2048], # n_phn=len(set(test_set_y[0])), n_spkr=len(set(test_set_y[1]))) n_phn=len(set(test_set_y[0])), n_spkr=len(set(train_set_y[1])) + len(set(valid_set_y[1])) + len(set(test_set_y[1]))) # get the training, validation and testing function for the model print '... getting the finetuning functions' #train_fn = dbn.get_adadelta_trainer() train_fn_phn, train_fn_spkr = dbn.get_adadelta_trainers() train_clf_phn, train_clf_spkr = dbn.train_only_classif() train_scoref = dbn.score_classif(train_set_iterator) valid_scoref = dbn.score_classif(valid_set_iterator) test_scoref = dbn.score_classif(test_set_iterator) print '... finetuning the model' # early-stopping parameters patience = 1000 # look as this many examples regardless TODO patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant best_validation_loss_phn = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 avg_costs_phn = [] avg_costs_spkr = [] for iteration, (x, y_phn, y_spkr) in enumerate(train_set_iterator): if random.random() > 0.8: # TODO play with this ratio avg_cost_phn = train_fn_phn(x, y_phn) avg_costs_phn.append(avg_cost_phn) else: avg_cost_spkr = train_fn_spkr(x, y_spkr) avg_costs_spkr.append(avg_cost_spkr) #avg_cost_phn = train_clf_phn(x, y_phn) #avg_costs_phn.append(avg_cost_phn) #avg_cost_spkr = train_clf_spkr(x, y_spkr) #avg_costs_spkr.append(avg_cost_spkr) #avg_cost = train_fn(x, y_phn, y_spkr) #avg_costs_phn.append(avg_cost[0]) #avg_costs_spkr.append(avg_cost[1]) print(' epoch %i, avg costs phn %f' % \ (epoch, numpy.mean(avg_costs_phn))) print(' epoch %i, avg costs spkr %f' % \ (epoch, numpy.mean(avg_costs_spkr))) zipped = zip(*train_scoref()) print(' epoch %i, training error phn %f %%' % \ (epoch, numpy.mean(zipped[0]) * 100.)) print(' epoch %i, training error spkr %f %%' % \ (epoch, numpy.mean(zipped[1]) * 100.)) # we check the validation loss on every epoch validation_losses = valid_scoref() this_phn_validation_loss = numpy.mean(zip(*validation_losses)[0]) # TODO this is a mean of means (with different lengths) print(' epoch %i, validation error phn %f %%' % \ (epoch, this_phn_validation_loss * 100.)) # if we got the best validation score until now if this_phn_validation_loss < best_validation_loss_phn: with open(output_file_name + '.pickle', 'w') as f: cPickle.dump(dbn, f) # improve patience if loss improvement is good enough if (this_phn_validation_loss < best_validation_loss_phn * improvement_threshold): patience = max(patience, iteration * patience_increase) # save best validation score and iteration number best_validation_loss = this_phn_validation_loss # test it on the test set test_losses = test_scoref() test_score_phn = numpy.mean(zip(*test_losses)[0]) # TODO this is a mean of means (with different lengths) print((' epoch %i, test error phn of ' 'best model %f %%') % (epoch, test_score_phn * 100.)) if patience <= iteration: # TODO correct that done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score phn of %f %%, ' 'with test performance phn %f %%') % (best_validation_loss_phn * 100., test_score_phn * 100.)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) with open(output_file_name + '.pickle', 'w') as f: cPickle.dump(dbn, f)
def train_DBN(finetune_lr=0.01, pretraining_epochs=100, pretrain_lr=0.001, k=1, training_epochs=200, dataset=DATASET, batch_size=20, dbn_load_from=''): """ :type learning_rate: float :param learning_rate: learning rate used in the finetune stage :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training :type k: int :param k: number of Gibbs steps in CD/PCD :type training_epochs: int :param training_epochs: maximal number of iterations ot run the optimizer :type dataset: string :param dataset: path the the pickled dataset :type batch_size: int :param batch_size: the size of a minibatch """ print "loading dataset from", dataset ###datasets = load_data(dataset, nframes=N_FRAMES, unit=False, normalize=True, pca_whiten=True, cv_frac=0.0) datasets = load_data(dataset, nframes=N_FRAMES, unit=False, student=True, pca_whiten=False, cv_frac=0.15, dataset_name='TIMIT_std', speakers=SPEAKERS) # unit=False because we don't want the [0-1] binary RBM projection # normalize=True because we want the data to be 0 centered with 1 variance. # pca_whiten=True because we want the data to be decorrelated train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] N_OUTS = 62 * 3 # #phones * #states if SPEAKERS: from collections import Counter c = Counter(train_set_y.eval()) N_OUTS = len(c) print "dataset loaded!" print "train set size", train_set_x.shape[0] print "validation set size", valid_set_x.shape[0] print "test set size", test_set_x.shape[0] print "N_OUTS:", N_OUTS # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # numpy random generator numpy_rng = numpy.random.RandomState(123) print '... building the model' # construct the Deep Belief Network print "train_set_x.shape.eval()", train_set_x.shape.eval() assert(train_set_x.shape[1].eval() == N_FRAMES * 39) # check dbn = DBN(numpy_rng=numpy_rng, n_ins=train_set_x.shape[1].eval(), hidden_layers_sizes=[1248, 1248, 1248], n_outs=N_OUTS) ######################### # PRETRAINING THE MODEL # ######################### print '... evaluating on MFCC only, error rate of a LogisticRegression:' on_top_of_MFCC_fn = dbn.pretraining_eval_function(layer=-1, train_set_x=train_set_x, train_set_y=train_set_y, valid_set_x=valid_set_x, valid_set_y=valid_set_y, test_set_x=test_set_x, test_set_y=test_set_y, batch_size=batch_size) print 'error rate:', on_top_of_MFCC_fn() #dbn = None ### TOREMOVE #with open('dbn_analyze_timit__plr1.0E-03_pep100_flr1.0E-03_fep_10_k1_layer_1.pickle') as f: ### TOREMOVE # dbn = cPickle.load(f) ### TOREMOVE print '... getting the pretraining functions' pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size, k=k) pretraining_eval_fns = [dbn.pretraining_eval_function(layer=ii, train_set_x=train_set_x, train_set_y=train_set_y, valid_set_x=valid_set_x, valid_set_y=valid_set_y, test_set_x=test_set_x, test_set_y=test_set_y, batch_size=batch_size) for ii in xrange(dbn.n_layers)] for i in xrange(dbn.n_layers): print i, pretraining_eval_fns[i]() print '... pre-training the model' start_time = time.clock() ## Pre-train layer-wise for i in xrange(dbn.n_layers): ####################### # go through pretraining epochs for epoch in xrange(pretraining_epochs): # go through the training set c = [] for batch_index in xrange(n_train_batches): tmp_lr = pretrain_lr / (1. + 0.05 * batch_index) # TODO if i == 0: tmp_lr /= LEARNING_RATE_DENOMINATOR_FOR_GAUSSIAN c.append(pretraining_fns[i](index=batch_index, lr=tmp_lr)) print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), print numpy.mean(c) ############################## print('>>> (cross_val, test) error rates of LogisticRegression on top of the hidden layer %d is' % i) print (pretraining_eval_fns[i]()) # TODO stop pretraining when this error rate goes up (early stopping) ############################## with open(output_file_name + '_layer_' + str(i) + '.pickle', 'w') as f: cPickle.dump(dbn, f) print "dumped a partially pre-trained model" ####################### end_time = time.clock() print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) ######################## # FINETUNING THE MODEL # ######################## if dbn_load_from != '': with open(dbn_load_from) as f: dbn = cPickle.load(f) print 'loaded this dbn:', dbn_load_from #with open(output_file_name + '_layer_2.pickle') as f: # dbn = cPickle.load(f) #datasets = load_data(dataset, nframes=N_FRAMES, unit=False, student=True, pca_whiten=False, cv_frac=0.2, dataset_name='TIMIT', speakers=SPEAKERS) #train_set_x, train_set_y, valid_set_x, valid_set_y, test_set_x, test_set_y = None, None, None, None, None, None #train_set_x, train_set_y = datasets[0] #valid_set_x, valid_set_y = datasets[1] #test_set_x, test_set_y = datasets[2] #n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # get the training, validation and testing function for the model print '... getting the finetuning functions' train_fn, validate_model, test_model = dbn.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr) print '... finetuning the model' # early-stopping parameters patience = 4 * n_train_batches # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatches before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 print "number of training (fine-tuning) batches", n_train_batches while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_fn(minibatch_index) iter = epoch * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) ############################## for layer_ind in xrange(dbn.n_layers): print('>>> (cross-val, test) error rate of a LogisticRegression on top of layer %d is' % layer_ind) print(pretraining_eval_fns[layer_ind]()) ############################## # if we got the best validation score until now if this_validation_loss < best_validation_loss: with open(output_file_name + '.pickle', 'w') as f: cPickle.dump(dbn, f) #improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = test_model() test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def test_DBN(finetune_lr=0.01, pretraining_epochs=0, pretrain_lr=0.01, k=1, training_epochs=200, # TODO 100+ dataset=DATASET, batch_size=100): """ :type learning_rate: float :param learning_rate: learning rate used in the finetune stage :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training :type k: int :param k: number of Gibbs steps in CD/PCD :type training_epochs: int :param training_epochs: maximal number of iterations ot run the optimizer :type dataset: string :param dataset: path the the pickled dataset :type batch_size: int :param batch_size: the size of a minibatch """ print "loading dataset from", dataset features = 'fbank' if features == 'MFCC': N_FEATURES = 39 elif features == 'fbank': N_FEATURES = 40 #datasets = load_data(dataset, nframes=N_FRAMES, features='fbank', scaling='normalize', cv_frac=0.2, speakers=False, numpy_array_only=True) #datasets = load_data(dataset, nframes=N_FRAMES, features='fbank', scaling='student', cv_frac='fixed', speakers=False, numpy_array_only=True) datasets = load_data(dataset, nframes=1, features=features, scaling='student', cv_frac='fixed', speakers=False, numpy_array_only=True) #datasets = load_data(dataset, nframes=1, features='fbank', scaling='student', cv_frac=0.2, speakers=False, numpy_array_only=True) train_set_x, train_set_y = datasets[0] # if speakers, do test/test/test valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] print "dataset loaded!" print "train set size", train_set_x.shape[0] print "validation set size", valid_set_x.shape[0] print "test set size", test_set_x.shape[0] print "phones in train", len(set(train_set_y)) print "phones in valid", len(set(valid_set_y)) print "phones in test", len(set(test_set_y)) to_int = {} with open('timit_to_int_and_to_state_dicts_tuple.pickle') as f: # TODO to_int, _ = cPickle.load(f) train_set_iterator = DatasetSentencesIterator(train_set_x, train_set_y, to_int, N_FRAMES) valid_set_iterator = DatasetSentencesIterator(valid_set_x, valid_set_y, to_int, N_FRAMES) test_set_iterator = DatasetSentencesIterator(test_set_x, test_set_y, to_int, N_FRAMES) # numpy random generator numpy_rng = numpy.random.RandomState(123) print '... building the model' # construct the Deep Belief Network dbn = DBN(numpy_rng=numpy_rng, n_ins=N_FRAMES * N_FEATURES, #hidden_layers_sizes=[2496, 2496, 2496], hidden_layers_sizes=[1024, 1024, 1024], n_outs=len(set(train_set_y)))#62 * 3) ######################### # PRETRAINING THE MODEL # ######################### # print '... getting the pretraining functions' # pretraining_fns = dbn.pretraining_functions(k=k) # # print '... pre-training the model' # start_time = time.clock() # ## Pre-train layer-wise # #for i in xrange(dbn.n_layers): # TODO # for i in xrange(1): # # go through pretraining epochs # for epoch in xrange(pretraining_epochs): # # go through the training set # c = [] # for batch_index, (batch_x, _) in enumerate(train_set_iterator): # tmp_lr = pretrain_lr / (1. + 0.05 * batch_index) # TODO # if i == 0: # tmp_lr /= PRELEARNING_RATE_DENOMINATOR_FOR_GAUSSIAN # c.append(pretraining_fns[i](batch_x=batch_x, lr=tmp_lr)) # print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), # print numpy.mean(c) # with open(output_file_name + '_layer_' + str(i) + '.pickle', 'w') as f: # cPickle.dump(dbn, f) # print "dumped a partially pre-trained model" # # end_time = time.clock() # print >> sys.stderr, ('The pretraining code for file ' + # os.path.split(__file__)[1] + # ' ran for %.2fm' % ((end_time - start_time) / 60.)) ######################## # FINETUNING THE MODEL # ######################## #with open('dbn_Gaussian_gpu_layer_2.pickle') as f: # dbn = cPickle.load(f) # get the training, validation and testing function for the model print '... getting the finetuning functions' train_fn = dbn.get_adadelta_trainer() train_scoref = dbn.score_classif(train_set_iterator) valid_scoref = dbn.score_classif(valid_set_iterator) test_scoref = dbn.score_classif(test_set_iterator) print '... finetuning the model' # early-stopping parameters patience = 1000 # look as this many examples regardless TODO patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 avg_costs = [] for iteration, (x, y) in enumerate(train_set_iterator): avg_cost = train_fn(x, y) avg_costs.append(avg_cost) #print(' epoch %i, sentence %i, ' #'avg cost for this sentence %f' % \ # (epoch, iteration, avg_cost)) print(' epoch %i, avg costs %f' % \ (epoch, numpy.mean(avg_costs))) print(' epoch %i, training error %f %%' % \ (epoch, numpy.mean(train_scoref()) * 100.)) # we check the validation loss on every epoch validation_losses = valid_scoref() this_validation_loss = numpy.mean(validation_losses) # TODO this is a mean of means (with different lengths) print(' epoch %i, validation error %f %%' % \ (epoch, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: with open(output_file_name + '.pickle', 'w') as f: cPickle.dump(dbn, f) # improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iteration * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss # test it on the test set test_losses = test_scoref() test_score = numpy.mean(test_losses) # TODO this is a mean of means (with different lengths) print((' epoch %i, test error of ' 'best model %f %%') % (epoch, test_score * 100.)) if patience <= iteration: # TODO correct that done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f %%, ' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) with open(output_file_name + '.pickle', 'w') as f: cPickle.dump(dbn, f)
def test_DBN( finetune_lr=0.1, pretraining_epochs=42, # TODO 1000 pretrain_lr=0.01, k=1, training_epochs=42, # TODO 1000 dataset=DATASET, batch_size=10): """ :type learning_rate: float :param learning_rate: learning rate used in the finetune stage :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training :type k: int :param k: number of Gibbs steps in CD/PCD :type training_epochs: int :param training_epochs: maximal number of iterations ot run the optimizer :type dataset: string :param dataset: path the the pickled dataset :type batch_size: int :param batch_size: the size of a minibatch """ print "loading dataset from", dataset datasets = load_data(dataset, nframes=N_FRAMES, unit=True, normalize=False) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] print "dataset loaded!" print "train set size", train_set_x.shape[0] print "validation set size", valid_set_x.shape[0] print "test set size", test_set_x.shape[0] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # numpy random generator numpy_rng = numpy.random.RandomState(123) print '... building the model' # construct the Deep Belief Network dbn = DBN(numpy_rng=numpy_rng, n_ins=39 * N_FRAMES, hidden_layers_sizes=[512, 512], n_outs=62 * 3) ######################### # PRETRAINING THE MODEL # ######################### print '... getting the pretraining functions' pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size, k=k) print '... pre-training the model' start_time = time.clock() ## Pre-train layer-wise for i in xrange(dbn.n_layers): # go through pretraining epochs for epoch in xrange(pretraining_epochs): # go through the training set c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index=batch_index, lr=pretrain_lr / (1. + 0.5 * batch_index))) # TODO print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), print numpy.mean(c) end_time = time.clock() print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) ######################## # FINETUNING THE MODEL # ######################## # get the training, validation and testing function for the model print '... getting the finetuning functions' train_fn, validate_model, test_model = dbn.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr) print '... finetuning the model' # early-stopping parameters patience = 4 * n_train_batches # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_fn(minibatch_index) iter = epoch * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = test_model() test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) with open('dbn_1.pickle', 'w') as f: cPickle.dump(dbn, f)
def test_RRNN(finetune_lr=0.0001, pretraining_epochs=0, pretrain_lr=0.01, k=1, training_epochs=500, # TODO 100+ dataset=DATASET, batch_size=100): """ :type learning_rate: float :param learning_rate: learning rate used in the finetune stage :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training :type k: int :param k: number of Gibbs steps in CD/PCD :type training_epochs: int :param training_epochs: maximal number of iterations ot run the optimizer :type dataset: string :param dataset: path the the pickled dataset :type batch_size: int :param batch_size: the size of a minibatch """ print "output file name:", output_file_name print "loading dataset from", dataset datasets = load_data(dataset, nframes=1, features='fbank', scaling='normalize', cv_frac='fixed', speakers=False, numpy_array_only=True) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] print "dataset loaded!" print "train set size", train_set_x.shape[0] print "validation set size", valid_set_x.shape[0] print "test set size", test_set_x.shape[0] print "phones in train", len(set(train_set_y)) print "phones in valid", len(set(valid_set_y)) print "phones in test", len(set(test_set_y)) to_int = {} with open('timit_to_int_and_to_state_dicts_tuple.pickle') as f: # TODO to_int, _ = cPickle.load(f) train_set_iterator = DatasetSentencesIterator(train_set_x, train_set_y, to_int, N_FRAMES) valid_set_iterator = DatasetSentencesIterator(valid_set_x, valid_set_y, to_int, N_FRAMES) test_set_iterator = DatasetSentencesIterator(test_set_x, test_set_y, to_int, N_FRAMES) # numpy random generator numpy_rng = numpy.random.RandomState(123) print '... building the model' nnet = RRNN(numpy_rng=numpy_rng, n_ins=N_FRAMES * N_FEATURES, relu_layers_sizes=[2400, 2400, 2400, 2400], recurrent_connections=[], n_outs=len(set(train_set_y))) # get the training, validation and testing function for the model print '... getting the finetuning functions' train_fn = nnet.get_adadelta_trainer() #train_fn = nnet.get_SGD_trainer() #train_fn = nnet.get_adagrad_trainer() train_scoref = nnet.score_classif(train_set_iterator) valid_scoref = nnet.score_classif(valid_set_iterator) test_scoref = nnet.score_classif(test_set_iterator) dataset_iterator = train_set_iterator if DEBUG_ON_TEST_ONLY: dataset_iterator = test_set_iterator train_scoref = test_scoref print '... finetuning the model' # early-stopping parameters patience = 1000 # look as this many examples regardless TODO patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 avg_costs = [] for iteration, (x, y) in enumerate(dataset_iterator): avg_cost = train_fn(x, y) #avg_cost = train_fn(x, y, lr=finetune_lr) avg_costs.append(avg_cost) #print(' epoch %i, sentence %i, ' #'avg cost for this sentence %f' % \ # (epoch, iteration, avg_cost)) #print(' epoch %i, avg costs %f, avg accudeltas %f' % \ # (epoch, numpy.mean(avg_costs), numpy.mean([T.mean(ad).eval() for ad in nnet._accudeltas]))) print(' epoch %i, avg costs %f' % \ (epoch, numpy.mean(avg_costs))) print(' epoch %i, training error %f %%' % \ (epoch, numpy.mean(train_scoref()) * 100.)) if DEBUG_ON_TEST_ONLY: continue # we check the validation loss on every epoch validation_losses = valid_scoref() this_validation_loss = numpy.mean(validation_losses) # TODO this is a mean of means (with different lengths) print(' epoch %i, validation error %f %%' % \ (epoch, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: with open(output_file_name + '.pickle', 'w') as f: cPickle.dump(nnet, f) # improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iteration * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss # test it on the test set test_losses = test_scoref() test_score = numpy.mean(test_losses) # TODO this is a mean of means (with different lengths) print((' epoch %i, test error of ' 'best model %f %%') % (epoch, test_score * 100.)) if patience <= iteration: # TODO correct that done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f %%, ' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) with open(output_file_name + '.pickle', 'w') as f: cPickle.dump(nnet, f)
print abnnet.layers[-2].output def createLogisticRegression(n_ins, n_outs): numpy_rng = np.random.RandomState(123) ret = NeuralNet(numpy_rng=numpy_rng, n_ins=n_ins, layers_types=[LogisticRegression], layers_sizes=[], n_outs=n_outs, debugprint=False) return ret data = load_data(DATASET, nframes=NFRAMES, features='fbank', scaling='none', cv_frac='fixed', speakers=False, numpy_array_only=True) d = np.load("mean_std.npz") mean = d['mean'] std = d['std'] mean = np.tile(mean, NFRAMES) std = np.tile(std, NFRAMES) train_set_x, train_set_y = data[0] valid_set_x, valid_set_y = data[1] test_set_x, test_set_y = data[2] train_set_x = np.asarray((train_set_x - mean) / std, dtype='float32') valid_set_x = np.asarray((valid_set_x - mean) / std, dtype='float32') test_set_x = np.asarray((test_set_x - mean) / std, dtype='float32') if REMOVE_ENTER_EXIT: to_int = {}
def run(dataset_path=DEFAULT_DATASET, dataset_name='timit', iterator_type=DatasetSentencesIterator, batch_size=100, nframes=13, features="fbank", init_lr=0.001, max_epochs=500, network_type="dropout_net", trainer_type="adadelta", layers_types=[Linear, ReLU, ReLU, ReLU, LogisticRegression], layers_sizes=[2400, 2400, 2400, 2400], dropout_rates=[0.2, 0.5, 0.5, 0.5, 0.5], recurrent_connections=[], prefix_fname='', debug_on_test_only=False, debug_print=0, debug_time=False, debug_plot=0): """ FIXME TODO """ output_file_name = dataset_name if prefix_fname != "": output_file_name = prefix_fname + "_" + dataset_name output_file_name += "_" + features + str(nframes) output_file_name += "_" + network_type + "_" + trainer_type print "output file name:", output_file_name n_ins = None n_outs = None print "loading dataset from", dataset_path data = load_data(dataset_path, nframes=1, features=features, scaling='normalize', cv_frac='fixed', speakers=False, numpy_array_only=True) train_set_x, train_set_y = data[0] valid_set_x, valid_set_y = data[1] test_set_x, test_set_y = data[2] assert train_set_x.shape[1] == valid_set_x.shape[1] assert test_set_x.shape[1] == valid_set_x.shape[1] print "dataset loaded!" print "train set size", train_set_x.shape[0] print "validation set size", valid_set_x.shape[0] print "test set size", test_set_x.shape[0] print "phones in train", len(set(train_set_y)) print "phones in valid", len(set(valid_set_y)) print "phones in test", len(set(test_set_y)) n_outs = len(set(train_set_y)) to_int = {} with open(dataset_name + '_to_int_and_to_state_dicts_tuple.pickle') as f: to_int, _ = cPickle.load(f) print "nframes:", nframes train_set_iterator = iterator_type(train_set_x, train_set_y, to_int, nframes=nframes, batch_size=batch_size) valid_set_iterator = iterator_type(valid_set_x, valid_set_y, to_int, nframes=nframes, batch_size=batch_size) test_set_iterator = iterator_type(test_set_x, test_set_y, to_int, nframes=nframes, batch_size=batch_size) n_ins = test_set_x.shape[1] * nframes assert n_ins != None assert n_outs != None # numpy random generator numpy_rng = numpy.random.RandomState(123) print '... building the model' # TODO the proper network type other than just dropout or not nnet = None if "dropout" in network_type: nnet = DropoutNet(numpy_rng=numpy_rng, n_ins=n_ins, layers_types=layers_types, layers_sizes=layers_sizes, dropout_rates=dropout_rates, n_outs=n_outs, debugprint=debug_print) else: nnet = NeuralNet(numpy_rng=numpy_rng, n_ins=n_ins, layers_types=layers_types, layers_sizes=layers_sizes, n_outs=n_outs, debugprint=debug_print) print "Created a neural net as:", print str(nnet) # get the training, validation and testing function for the model print '... getting the training functions' print trainer_type train_fn = None if debug_plot or debug_print: if trainer_type == "adadelta": train_fn = nnet.get_adadelta_trainer(debug=True) elif trainer_type == "adagrad": train_fn = nnet.get_adagrad_trainer(debug=True) else: train_fn = nnet.get_SGD_trainer(debug=True) else: if trainer_type == "adadelta": train_fn = nnet.get_adadelta_trainer() elif trainer_type == "adagrad": train_fn = nnet.get_adagrad_trainer() else: train_fn = nnet.get_SGD_trainer() train_scoref = nnet.score_classif(train_set_iterator) valid_scoref = nnet.score_classif(valid_set_iterator) test_scoref = nnet.score_classif(test_set_iterator) data_iterator = train_set_iterator if debug_on_test_only: data_iterator = test_set_iterator train_scoref = test_scoref print '... training the model' # early-stopping parameters patience = 1000 # look as this many examples regardless TODO patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 lr = init_lr timer = None if debug_plot: print_mean_weights_biases(nnet.params) #with open(output_file_name + 'epoch_0.pickle', 'wb') as f: # cPickle.dump(nnet, f) while (epoch < max_epochs) and (not done_looping): epoch = epoch + 1 avg_costs = [] avg_params_gradients_updates = [] if debug_time: timer = time.time() for iteration, (x, y) in enumerate(data_iterator): avg_cost = 0. if "delta" in trainer_type: # TODO remove need for this if avg_cost = train_fn(x, y) else: avg_cost = train_fn(x, y, lr) if type(avg_cost) == list: avg_costs.append(avg_cost[0]) else: avg_costs.append(avg_cost) if debug_print >= 2: print_mean_weights_biases(nnet.params) if debug_plot >= 2: plot_params_gradients_updates(epoch, avg_params_gradients_updates) if debug_time: print(' epoch %i took %f seconds' % (epoch, time.time() - timer)) print(' epoch %i, avg costs %f' % \ (epoch, numpy.mean(avg_costs))) print(' epoch %i, training error %f' % \ (epoch, numpy.mean(train_scoref()))) # TODO update lr(t) = lr(0) / (1 + lr(0) * lambda * t) # or another scheme for learning rate decay #with open(output_file_name + 'epoch_' +str(epoch) + '.pickle', 'wb') as f: # cPickle.dump(nnet, f) if debug_on_test_only: continue # we check the validation loss on every epoch validation_losses = valid_scoref() this_validation_loss = numpy.mean( validation_losses ) # TODO this is a mean of means (with different lengths) print(' epoch %i, validation error %f' % \ (epoch, this_validation_loss)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: with open(output_file_name + '.pickle', 'wb') as f: cPickle.dump(nnet, f) # improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iteration * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss # test it on the test set test_losses = test_scoref() test_score = numpy.mean( test_losses ) # TODO this is a mean of means (with different lengths) print((' epoch %i, test error of best model %f') % (epoch, test_score)) if patience <= iteration: # TODO correct that done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f, ' 'with test performance %f') % (best_validation_loss, test_score)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) with open(output_file_name + '_final.pickle', 'wb') as f: cPickle.dump(nnet, f)
def test_DBN(finetune_lr=0.1, pretraining_epochs=42, # TODO 1000 pretrain_lr=0.01, k=1, training_epochs=42, # TODO 1000 dataset=DATASET, batch_size=10): """ :type finetune_lr: float :param finetune_lr: learning rate used in the finetune stage 微调阶段用的学习率 :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining 预训练次数 :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training 预训练时用的学习率 :type k: int :param k: number of Gibbs steps in CD/PCD 做Gibbs步骤的数目 :type training_epochs: int :param training_epochs: maximal number of iterations to run the optimizer 最大优化次数(微调次数) :type dataset: string :param dataset: path the the pickled dataset 数据文件路径 :type batch_size: int :param batch_size: the size of a minibatch 小批量数据大小 """ print "loading dataset from", dataset #读dataset文件夹下的训练集数据和标签aligned_train_xdata.npy,aligned_train_ylabels.npy #测试集数据和标签aligned_test_xdata.npy,aligned_test_ylabels.npy #生成校验集数据和标签valid_set_x,valid_set_y datasets = load_data(dataset, nframes=N_FRAMES, unit=False, normalize=False) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] print "dataset loaded!" print "train set size", train_set_x.shape[0] print "validation set size", valid_set_x.shape[0] print "test set size", test_set_x.shape[0] # compute number of minibatches for training, validation and testing # 计算小批量数据数量 n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # numpy random generator # numpy随机数生成器 numpy_rng = numpy.random.RandomState(123) print '... building the model' # construct the Deep Belief Network # 建立DBN dbn = DBN(numpy_rng=numpy_rng, n_ins=DIMENSION * N_FRAMES, hidden_layers_sizes=[512, 512], n_outs=N_OUTS) ######################### # PRETRAINING THE MODEL # ######################### # 预训练模型 print '... getting the pretraining functions' pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size, k=k) print '... pre-training the model' start_time = time.clock() ## Pre-train layer-wise # 逐层预训练 for i in xrange(dbn.n_layers): # go through pretraining epochs # 迭代进行预训练 for epoch in xrange(pretraining_epochs): # go through the training set # 遍历训练集 c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index=batch_index, lr=pretrain_lr / (1. + 0.5 * batch_index))) # TODO print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), print numpy.mean(c) end_time = time.clock() print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) ######################## # FINETUNING THE MODEL # ######################## # 微调模型 # get the training, validation and testing function for the model # 生成训练、校验、测试函数 print '... getting the finetuning functions' train_fn, validate_model, test_model = dbn.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr) print '... finetuning the model' # early-stopping parameters 停止参数 patience = 4 * n_train_batches # look as this many examples regardless 查看这么多样本 patience_increase = 2. # wait this much longer when a new best is # found 找到新的最佳时等待的时间 improvement_threshold = 0.995 # a relative improvement of this much is # considered significant 相关提升门限,高于门限才被认为是有效的 validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch # 遍历这么多个小批量数据才在校验集上校验一次网络 # 在这种情况下,每次迭代都校验 best_params = None best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 # 迭代进行微调 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 # 遍历每个小批量数据 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_fn(minibatch_index) iter = epoch * n_train_batches + minibatch_index # 每validation_frequency次进行一次校验 if (iter + 1) % validation_frequency == 0: validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now # 得到最佳校验得分 if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough # 如果损失提升足够好,提升patience if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) # save best validation score and iteration number # 存储最佳校验结果和迭代次数 best_validation_loss = this_validation_loss best_iter = iter # test it on the test set # 在测试集上测试 test_losses = test_model() test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) #patient小于迭代次数,停止迭代 if patience <= iter: done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) with open('dbn_1.pickle', 'w') as f: cPickle.dump(dbn, f)
def test_RRNN( finetune_lr=0.0001, pretraining_epochs=0, pretrain_lr=0.01, k=1, training_epochs=500, # TODO 100+ dataset=DATASET, batch_size=100): """ :type learning_rate: float :param learning_rate: learning rate used in the finetune stage :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training :type k: int :param k: number of Gibbs steps in CD/PCD :type training_epochs: int :param training_epochs: maximal number of iterations ot run the optimizer :type dataset: string :param dataset: path the the pickled dataset :type batch_size: int :param batch_size: the size of a minibatch """ print "output file name:", output_file_name print "loading dataset from", dataset datasets = load_data(dataset, nframes=1, features='fbank', scaling='normalize', cv_frac='fixed', speakers=False, numpy_array_only=True) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] print "dataset loaded!" print "train set size", train_set_x.shape[0] print "validation set size", valid_set_x.shape[0] print "test set size", test_set_x.shape[0] print "phones in train", len(set(train_set_y)) print "phones in valid", len(set(valid_set_y)) print "phones in test", len(set(test_set_y)) to_int = {} with open('timit_to_int_and_to_state_dicts_tuple.pickle') as f: # TODO to_int, _ = cPickle.load(f) train_set_iterator = DatasetSentencesIterator(train_set_x, train_set_y, to_int, N_FRAMES) valid_set_iterator = DatasetSentencesIterator(valid_set_x, valid_set_y, to_int, N_FRAMES) test_set_iterator = DatasetSentencesIterator(test_set_x, test_set_y, to_int, N_FRAMES) # numpy random generator numpy_rng = numpy.random.RandomState(123) print '... building the model' nnet = RRNN(numpy_rng=numpy_rng, n_ins=N_FRAMES * N_FEATURES, relu_layers_sizes=[2400, 2400, 2400, 2400], recurrent_connections=[], n_outs=len(set(train_set_y))) # get the training, validation and testing function for the model print '... getting the finetuning functions' train_fn = nnet.get_adadelta_trainer() #train_fn = nnet.get_SGD_trainer() #train_fn = nnet.get_adagrad_trainer() train_scoref = nnet.score_classif(train_set_iterator) valid_scoref = nnet.score_classif(valid_set_iterator) test_scoref = nnet.score_classif(test_set_iterator) dataset_iterator = train_set_iterator if DEBUG_ON_TEST_ONLY: dataset_iterator = test_set_iterator train_scoref = test_scoref print '... finetuning the model' # early-stopping parameters patience = 1000 # look as this many examples regardless TODO patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 avg_costs = [] for iteration, (x, y) in enumerate(dataset_iterator): avg_cost = train_fn(x, y) #avg_cost = train_fn(x, y, lr=finetune_lr) avg_costs.append(avg_cost) #print(' epoch %i, sentence %i, ' #'avg cost for this sentence %f' % \ # (epoch, iteration, avg_cost)) #print(' epoch %i, avg costs %f, avg accudeltas %f' % \ # (epoch, numpy.mean(avg_costs), numpy.mean([T.mean(ad).eval() for ad in nnet._accudeltas]))) print(' epoch %i, avg costs %f' % \ (epoch, numpy.mean(avg_costs))) print(' epoch %i, training error %f %%' % \ (epoch, numpy.mean(train_scoref()) * 100.)) if DEBUG_ON_TEST_ONLY: continue # we check the validation loss on every epoch validation_losses = valid_scoref() this_validation_loss = numpy.mean( validation_losses ) # TODO this is a mean of means (with different lengths) print(' epoch %i, validation error %f %%' % \ (epoch, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: with open(output_file_name + '.pickle', 'w') as f: cPickle.dump(nnet, f) # improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iteration * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss # test it on the test set test_losses = test_scoref() test_score = numpy.mean( test_losses ) # TODO this is a mean of means (with different lengths) print((' epoch %i, test error of ' 'best model %f %%') % (epoch, test_score * 100.)) if patience <= iteration: # TODO correct that done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f %%, ' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) with open(output_file_name + '.pickle', 'w') as f: cPickle.dump(nnet, f)
def train_DBN(finetune_lr=0.01, pretraining_epochs=100, pretrain_lr=0.001, k=1, training_epochs=200, dataset=DATASET, batch_size=100, dbn_load_from=''): """ :type learning_rate: float :param learning_rate: learning rate used in the finetune stage :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training :type k: int :param k: number of Gibbs steps in CD/PCD :type training_epochs: int :param training_epochs: maximal number of iterations ot run the optimizer :type dataset: string :param dataset: path the the pickled dataset :type batch_size: int :param batch_size: the size of a minibatch """ print "loading dataset from", dataset ###datasets = load_data(dataset, nframes=N_FRAMES, features='MFCC', scaling='normalize', pca_whiten=False, cv_frac=0.2, dataset_name='TIMIT_wo_sa', speakers=SPEAKERS) datasets = load_data(dataset, nframes=N_FRAMES, features='MFCC', scaling='normalize', pca_whiten=False, cv_frac='fixed', dataset_name='TIMIT_train_dev_test', speakers=SPEAKERS) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] N_OUTS = 62 * 3 # #phones * #states if SPEAKERS: from collections import Counter c = Counter(train_set_y.eval()) N_OUTS = len(c) print "dataset loaded!" print "train set size", train_set_x.shape[0] print "validation set size", valid_set_x.shape[0] print "test set size", test_set_x.shape[0] print "N_OUTS:", N_OUTS # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # numpy random generator numpy_rng = numpy.random.RandomState(123) print '... building the model' # construct the Deep Belief Network print "train_set_x.shape.eval()", train_set_x.shape.eval() assert (train_set_x.shape[1].eval() == N_FRAMES * 39) # check dbn = DBN(numpy_rng=numpy_rng, n_ins=train_set_x.shape[1].eval(), hidden_layers_sizes=[2496, 2496, 2496], n_outs=N_OUTS) ######################### # PRETRAINING THE MODEL # ######################### print '... evaluating on MFCC only, error rate of a LogisticRegression:' on_top_of_MFCC_fn = dbn.pretraining_eval_function(layer=-1, train_set_x=train_set_x, train_set_y=train_set_y, valid_set_x=valid_set_x, valid_set_y=valid_set_y, test_set_x=test_set_x, test_set_y=test_set_y, batch_size=batch_size) print 'error rate:', on_top_of_MFCC_fn() #dbn = None ### TOREMOVE #with open('dbn_analyze_timit__plr1.0E-03_pep100_flr1.0E-03_fep_10_k1_layer_1.pickle') as f: ### TOREMOVE # dbn = cPickle.load(f) ### TOREMOVE print '... getting the pretraining functions' pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size, k=k) pretraining_eval_fns = [ dbn.pretraining_eval_function(layer=ii, train_set_x=train_set_x, train_set_y=train_set_y, valid_set_x=valid_set_x, valid_set_y=valid_set_y, test_set_x=test_set_x, test_set_y=test_set_y, batch_size=batch_size) for ii in xrange(dbn.n_layers) ] for i in xrange(dbn.n_layers): print i, pretraining_eval_fns[i]() print '... pre-training the model' start_time = time.clock() ## Pre-train layer-wise for i in xrange(dbn.n_layers): ####################### # go through pretraining epochs for epoch in xrange(pretraining_epochs): # go through the training set c = [] for batch_index in xrange(n_train_batches): tmp_lr = pretrain_lr / (1. + 0.05 * batch_index) # TODO if i == 0: tmp_lr /= LEARNING_RATE_DENOMINATOR_FOR_GAUSSIAN c.append(pretraining_fns[i](index=batch_index, lr=tmp_lr)) print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), print numpy.mean(c) ############################## print( '>>> (cross_val, test) error rates of LogisticRegression on top of the hidden layer %d is' % i) print(pretraining_eval_fns[i]()) # TODO stop pretraining when this error rate goes up (early stopping) ############################## with open(output_file_name + '_layer_' + str(i) + '.pickle', 'w') as f: cPickle.dump(dbn, f) print "dumped a partially pre-trained model" ####################### end_time = time.clock() print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) ######################## # FINETUNING THE MODEL # ######################## if dbn_load_from != '': with open(dbn_load_from) as f: dbn = cPickle.load(f) print 'loaded this dbn:', dbn_load_from #with open(output_file_name + '_layer_2.pickle') as f: # dbn = cPickle.load(f) #datasets = load_data(dataset, nframes=N_FRAMES, unit=False, student=True, pca_whiten=False, cv_frac=0.2, dataset_name='TIMIT', speakers=SPEAKERS) #train_set_x, train_set_y, valid_set_x, valid_set_y, test_set_x, test_set_y = None, None, None, None, None, None #train_set_x, train_set_y = datasets[0] #valid_set_x, valid_set_y = datasets[1] #test_set_x, test_set_y = datasets[2] #n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # get the training, validation and testing function for the model print '... getting the finetuning functions' train_fn, validate_model, test_model = dbn.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr) print '... finetuning the model' # early-stopping parameters patience = 4 * n_train_batches # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatches before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 print "number of training (fine-tuning) batches", n_train_batches while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_fn(minibatch_index) iter = epoch * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) ############################## for layer_ind in xrange(dbn.n_layers): print( '>>> (cross-val, test) error rate of a LogisticRegression on top of layer %d is' % layer_ind) print(pretraining_eval_fns[layer_ind]()) ############################## # if we got the best validation score until now if this_validation_loss < best_validation_loss: with open(output_file_name + '.pickle', 'w') as f: cPickle.dump(dbn, f) #improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = test_model() test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) with open(output_file_name + '.pickle', 'w') as f: cPickle.dump(dbn, f)
def test_SRNN(finetune_lr=0.01, pretraining_epochs=0, pretrain_lr=0.01, k=1, training_epochs=1000, # TODO 100+ dataset=DATASET, batch_size=100): """ :type learning_rate: float :param learning_rate: learning rate used in the finetune stage :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training :type k: int :param k: number of Gibbs steps in CD/PCD :type training_epochs: int :param training_epochs: maximal number of iterations ot run the optimizer :type dataset: string :param dataset: path the the pickled dataset :type batch_size: int :param batch_size: the size of a minibatch """ print "loading dataset from", dataset #datasets = load_data(dataset, nframes=N_FRAMES, features='fbank', scaling='normalize', cv_frac=0.2, speakers=False, numpy_array_only=True) #datasets = load_data(dataset, nframes=N_FRAMES, features='fbank', scaling='student', cv_frac='fixed', speakers=False, numpy_array_only=True) datasets = load_data(dataset, nframes=1, features='fbank', scaling='student', cv_frac='fixed', speakers=False, numpy_array_only=True) #datasets = load_data(dataset, nframes=1, features='fbank', scaling='student', cv_frac=0.2, speakers=False, numpy_array_only=True) train_set_x, train_set_y = datasets[0] # if speakers, do test/test/test valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] print "dataset loaded!" print "train set size", train_set_x.shape[0] print "validation set size", valid_set_x.shape[0] print "test set size", test_set_x.shape[0] print "phones in train", len(set(train_set_y)) print "phones in valid", len(set(valid_set_y)) print "phones in test", len(set(test_set_y)) to_int = {} with open('timit_to_int_and_to_state_dicts_tuple.pickle') as f: # TODO to_int, _ = cPickle.load(f) train_set_iterator = DatasetSentencesIterator(train_set_x, train_set_y, to_int, N_FRAMES) valid_set_iterator = DatasetSentencesIterator(valid_set_x, valid_set_y, to_int, N_FRAMES) test_set_iterator = DatasetSentencesIterator(test_set_x, test_set_y, to_int, N_FRAMES) # numpy random generator numpy_rng = numpy.random.RandomState(123) print '... building the model' n_outs = len(set(train_set_y)) dbn = SRNN(numpy_rng=numpy_rng, n_ins=N_FRAMES * N_FEATURES, relu_layers_sizes=[1024, 1024, 1024], n_outs=n_outs) # get the training, validation and testing function for the model print '... getting the finetuning functions' first_pass, train_fn = dbn.get_stacked_adadelta_trainer() train_scoref = dbn.score_stacked_classif(train_set_iterator) valid_scoref = dbn.score_stacked_classif(valid_set_iterator) test_scoref = dbn.score_stacked_classif(test_set_iterator) print '... finetuning the model' # early-stopping parameters patience = 1000 # look as this many examples regardless TODO patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant best_validation_error = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 avg_costs = [] for iteration, (x, y) in enumerate(train_set_iterator): #if best_validation_error < 0.5: # this is a hack: if epoch > 1: # this is a hack # TODO normally wait for total convergence and redo the # training this way (because doing 2 trainings would not # badly learn Ws and would reset Adadelta): p_y_init = numpy.zeros((x.shape[0], n_outs), dtype='float32') + 1./n_outs p_y = first_pass(x, p_y_init) if N_FRAMES_WINDOW > 0: p_y = numpy.concatenate([p_y_init[:N_FRAMES_WINDOW], p_y[:-N_FRAMES_WINDOW]]) avg_cost = train_fn(x, p_y, y) else: p_y_init = numpy.zeros((x.shape[0], n_outs), dtype='float32') avg_cost = train_fn(x, p_y_init, y) avg_costs.append(avg_cost) #print(' epoch %i, sentence %i, ' #'avg cost for this sentence %f' % \ # (epoch, iteration, avg_cost)) print(' epoch %i, avg costs %f' % \ (epoch, numpy.mean(avg_costs))) print(' epoch %i, training error %f %%' % \ (epoch, numpy.mean(train_scoref()) * 100.)) # we check the validation error on every epoch validation_errors = valid_scoref() this_validation_error = numpy.mean(validation_errors) # TODO this is a mean of means (with different lengths) print(' epoch %i, validation error %f %%' % \ (epoch, this_validation_error * 100.)) # if we got the best validation score until now if this_validation_error < best_validation_error: with open(output_file_name + '.pickle', 'w') as f: cPickle.dump(dbn, f) # improve patience if error improvement is good enough if (this_validation_error < best_validation_error * improvement_threshold): patience = max(patience, iteration * patience_increase) # save best validation score and iteration number best_validation_error = this_validation_error # test it on the test set test_errors = test_scoref() test_error = numpy.mean(test_errors) # TODO this is a mean of means (with different lengths) print((' epoch %i, test error of ' 'best model %f %%') % (epoch, test_error * 100.)) if patience <= iteration: # TODO correct that done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f %%, ' 'with test performance %f %%') % (best_validation_error * 100., test_score * 100.)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) with open(output_file_name + '.pickle', 'w') as f: cPickle.dump(dbn, f)
def test_DBN( finetune_lr=0.005, pretraining_epochs=69, # TODO 100+ pretrain_lr=0.001, k=1, training_epochs=69, # TODO 100+ dataset=DATASET, batch_size=20): """ :type learning_rate: float :param learning_rate: learning rate used in the finetune stage :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training :type k: int :param k: number of Gibbs steps in CD/PCD :type training_epochs: int :param training_epochs: maximal number of iterations ot run the optimizer :type dataset: string :param dataset: path the the pickled dataset :type batch_size: int :param batch_size: the size of a minibatch """ print "loading dataset from", dataset # datasets = load_data(dataset, nframes=N_FRAMES, unit=False, normalize=True, cv_frac=0.0) # # unit=False because we don't want the [0-1] binary RBM projection # # normalize=True because we want the data to be 0 centered with 1 variance. # train_set_x, train_set_y = datasets[0] # valid_set_x, valid_set_y = datasets[1] # test_set_x, test_set_y = datasets[2] # print "dataset loaded!" # print "train set size", train_set_x.shape[0] # print "validation set size", valid_set_x.shape[0] # print "test set size", test_set_x.shape[0] # # # compute number of minibatches for training, validation and testing # n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # # # numpy random generator # numpy_rng = numpy.random.RandomState(123) # print '... building the model' # # construct the Deep Belief Network # dbn = DBN(numpy_rng=numpy_rng, n_ins=39 * N_FRAMES, # hidden_layers_sizes=[960, 960, 960], # n_outs=62 * 3) # # ######################### # # PRETRAINING THE MODEL # # ######################### # print '... getting the pretraining functions' # pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x, # batch_size=batch_size, # k=k) # # print '... pre-training the model' # start_time = time.clock() # ## Pre-train layer-wise # for i in xrange(dbn.n_layers): # # go through pretraining epochs # for epoch in xrange(pretraining_epochs): # # go through the training set # c = [] # for batch_index in xrange(n_train_batches): # tmp_lr = pretrain_lr / (1. + 0.5 * batch_index) # TODO # if i == 0: # tmp_lr /= LEARNING_RATE_DENOMINATOR_FOR_GAUSSIAN # c.append(pretraining_fns[i](index=batch_index, lr=tmp_lr)) # print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), # print numpy.mean(c) # with open(output_file_name + '_layer_' + str(i) + '.pickle', 'w') as f: # cPickle.dump(dbn, f) # print "dumped a partially pre-trained model" # # end_time = time.clock() # print >> sys.stderr, ('The pretraining code for file ' + # os.path.split(__file__)[1] + # ' ran for %.2fm' % ((end_time - start_time) / 60.)) ######################## # FINETUNING THE MODEL # ######################## with open('dbn_Gaussian_gpu_layer_2.pickle') as f: dbn = cPickle.load(f) datasets = load_data(dataset, nframes=N_FRAMES, unit=False, normalize=True, cv_frac=0.2) # unit=False because we don't want the [0-1] binary RBM projection # normalize=True because we want the data to be 0 centered with 1 variance. train_set_x, train_set_y, valid_set_x, valid_set_y, test_set_x, test_set_y = None, None, None, None, None, None train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # get the training, validation and testing function for the model print '... getting the finetuning functions' train_fn, validate_model, test_model = dbn.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr) print '... finetuning the model' # early-stopping parameters patience = 4 * n_train_batches # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 print "number of training (fine-tuning) batches", n_train_batches while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_fn(minibatch_index) iter = epoch * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = test_model() test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) with open(output_file_name + '.pickle', 'w') as f: cPickle.dump(dbn, f)
def test_DBN(finetune_lr=0.005, pretraining_epochs=10, # TODO 100+ pretrain_lr=0.01, k=1, training_epochs=100, # TODO 100+ dataset=DATASET, batch_size=200): """ :type learning_rate: float :param learning_rate: learning rate used in the finetune stage :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training :type k: int :param k: number of Gibbs steps in CD/PCD :type training_epochs: int :param training_epochs: maximal number of iterations ot run the optimizer :type dataset: string :param dataset: path the the pickled dataset :type batch_size: int :param batch_size: the size of a minibatch """ print "loading dataset from", dataset datasets = load_data(dataset, nframes=N_FRAMES, features='MFCC', scaling='normalize', pca_whiten=False, cv_frac=0.2) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] print "dataset loaded!" print "train set size", train_set_x.shape[0] print "validation set size", valid_set_x.shape[0] print "test set size", test_set_x.shape[0] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # numpy random generator numpy_rng = numpy.random.RandomState(123) print '... building the model' # construct the Deep Belief Network print "train_set_x.shape.eval()", train_set_x.shape.eval() assert(train_set_x.shape[1].eval() == N_FRAMES * 39) # check dbn = DBN(numpy_rng=numpy_rng, n_ins=train_set_x.shape[1].eval(), hidden_layers_sizes=[2496, 2496, 2496], n_outs=62 * 3) ######################### # PRETRAINING THE MODEL # ######################### print '... getting the pretraining functions' pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size, k=k) print '... pre-training the model' start_time = time.clock() ## Pre-train layer-wise for i in xrange(dbn.n_layers): # go through pretraining epochs for epoch in xrange(pretraining_epochs): # go through the training set c = [] for batch_index in xrange(n_train_batches): tmp_lr = pretrain_lr / (1. + 0.05 * batch_index) # TODO if i == 0: tmp_lr /= LEARNING_RATE_DENOMINATOR_FOR_GAUSSIAN c.append(pretraining_fns[i](index=batch_index, lr=tmp_lr)) print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), print numpy.mean(c) with open(output_file_name + '_layer_' + str(i) + '.pickle', 'w') as f: cPickle.dump(dbn, f) print "dumped a partially pre-trained model" end_time = time.clock() print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) ######################## # FINETUNING THE MODEL # ######################## #with open('dbn_Gaussian_gpu_layer_2.pickle') as f: # dbn = cPickle.load(f) # get the training, validation and testing function for the model print '... getting the finetuning functions' train_fn, validate_model, test_model = dbn.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr) print '... finetuning the model' # early-stopping parameters patience = 4 * n_train_batches # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatches before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 print "number of training (fine-tuning) batches", n_train_batches while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_fn(minibatch_index) iter = epoch * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: with open(output_file_name + '.pickle', 'w') as f: cPickle.dump(dbn, f) #improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = test_model() test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) with open(output_file_name + '.pickle', 'w') as f: cPickle.dump(dbn, f)
def run(dataset_path=DEFAULT_DATASET, dataset_name='timit', iterator_type=DatasetSentencesIterator, batch_size=100, nframes=13, features="fbank", init_lr=0.001, max_epochs=500, network_type="dropout_net", trainer_type="adadelta", layers_types=[Linear, ReLU, ReLU, ReLU, LogisticRegression], layers_sizes=[2400, 2400, 2400, 2400], dropout_rates=[0.2, 0.5, 0.5, 0.5, 0.5], recurrent_connections=[], prefix_fname='', debug_on_test_only=False, debug_print=0, debug_time=False, debug_plot=0): """ FIXME TODO """ output_file_name = dataset_name if prefix_fname != "": output_file_name = prefix_fname + "_" + dataset_name output_file_name += "_" + features + str(nframes) output_file_name += "_" + network_type + "_" + trainer_type print "output file name:", output_file_name n_ins = None n_outs = None print "loading dataset from", dataset_path data = load_data(dataset_path, nframes=1, features=features, scaling='normalize', cv_frac='fixed', speakers=False, numpy_array_only=True) train_set_x, train_set_y = data[0] valid_set_x, valid_set_y = data[1] test_set_x, test_set_y = data[2] assert train_set_x.shape[1] == valid_set_x.shape[1] assert test_set_x.shape[1] == valid_set_x.shape[1] print "dataset loaded!" print "train set size", train_set_x.shape[0] print "validation set size", valid_set_x.shape[0] print "test set size", test_set_x.shape[0] print "phones in train", len(set(train_set_y)) print "phones in valid", len(set(valid_set_y)) print "phones in test", len(set(test_set_y)) n_outs = len(set(train_set_y)) to_int = {} with open(dataset_name + '_to_int_and_to_state_dicts_tuple.pickle') as f: to_int, _ = cPickle.load(f) print "nframes:", nframes train_set_iterator = iterator_type(train_set_x, train_set_y, to_int, nframes=nframes, batch_size=batch_size) valid_set_iterator = iterator_type(valid_set_x, valid_set_y, to_int, nframes=nframes, batch_size=batch_size) test_set_iterator = iterator_type(test_set_x, test_set_y, to_int, nframes=nframes, batch_size=batch_size) n_ins = test_set_x.shape[1]*nframes assert n_ins != None assert n_outs != None # numpy random generator numpy_rng = numpy.random.RandomState(123) print '... building the model' # TODO the proper network type other than just dropout or not nnet = None if "dropout" in network_type: nnet = DropoutNet(numpy_rng=numpy_rng, n_ins=n_ins, layers_types=layers_types, layers_sizes=layers_sizes, dropout_rates=dropout_rates, n_outs=n_outs, debugprint=debug_print) else: nnet = NeuralNet(numpy_rng=numpy_rng, n_ins=n_ins, layers_types=layers_types, layers_sizes=layers_sizes, n_outs=n_outs, debugprint=debug_print) print "Created a neural net as:", print str(nnet) # get the training, validation and testing function for the model print '... getting the training functions' print trainer_type train_fn = None if debug_plot or debug_print: if trainer_type == "adadelta": train_fn = nnet.get_adadelta_trainer(debug=True) elif trainer_type == "adagrad": train_fn = nnet.get_adagrad_trainer(debug=True) else: train_fn = nnet.get_SGD_trainer(debug=True) else: if trainer_type == "adadelta": train_fn = nnet.get_adadelta_trainer() elif trainer_type == "adagrad": train_fn = nnet.get_adagrad_trainer() else: train_fn = nnet.get_SGD_trainer() train_scoref = nnet.score_classif(train_set_iterator) valid_scoref = nnet.score_classif(valid_set_iterator) test_scoref = nnet.score_classif(test_set_iterator) data_iterator = train_set_iterator if debug_on_test_only: data_iterator = test_set_iterator train_scoref = test_scoref print '... training the model' # early-stopping parameters patience = 1000 # look as this many examples regardless TODO patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 lr = init_lr timer = None if debug_plot: print_mean_weights_biases(nnet.params) #with open(output_file_name + 'epoch_0.pickle', 'wb') as f: # cPickle.dump(nnet, f) while (epoch < max_epochs) and (not done_looping): epoch = epoch + 1 avg_costs = [] avg_params_gradients_updates = [] if debug_time: timer = time.time() for iteration, (x, y) in enumerate(data_iterator): avg_cost = 0. if "delta" in trainer_type: # TODO remove need for this if avg_cost = train_fn(x, y) else: avg_cost = train_fn(x, y, lr) if type(avg_cost) == list: avg_costs.append(avg_cost[0]) else: avg_costs.append(avg_cost) if debug_print >= 2: print_mean_weights_biases(nnet.params) if debug_plot >= 2: plot_params_gradients_updates(epoch, avg_params_gradients_updates) if debug_time: print(' epoch %i took %f seconds' % (epoch, time.time() - timer)) print(' epoch %i, avg costs %f' % \ (epoch, numpy.mean(avg_costs))) print(' epoch %i, training error %f' % \ (epoch, numpy.mean(train_scoref()))) # TODO update lr(t) = lr(0) / (1 + lr(0) * lambda * t) # or another scheme for learning rate decay #with open(output_file_name + 'epoch_' +str(epoch) + '.pickle', 'wb') as f: # cPickle.dump(nnet, f) if debug_on_test_only: continue # we check the validation loss on every epoch validation_losses = valid_scoref() this_validation_loss = numpy.mean(validation_losses) # TODO this is a mean of means (with different lengths) print(' epoch %i, validation error %f' % \ (epoch, this_validation_loss)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: with open(output_file_name + '.pickle', 'wb') as f: cPickle.dump(nnet, f) # improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iteration * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss # test it on the test set test_losses = test_scoref() test_score = numpy.mean(test_losses) # TODO this is a mean of means (with different lengths) print((' epoch %i, test error of best model %f') % (epoch, test_score)) if patience <= iteration: # TODO correct that done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f, ' 'with test performance %f') % (best_validation_loss, test_score)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) with open(output_file_name + '_final.pickle', 'wb') as f: cPickle.dump(nnet, f)
def createLogisticRegression(n_ins, n_outs): numpy_rng = np.random.RandomState(123) ret = NeuralNet(numpy_rng=numpy_rng, n_ins=n_ins, layers_types=[LogisticRegression], layers_sizes=[], n_outs=n_outs, debugprint=False) return ret data = load_data(DATASET, nframes=NFRAMES, features='fbank', scaling='none', cv_frac='fixed', speakers=False, numpy_array_only=True) d = np.load("mean_std.npz") mean = d['mean'] std = d['std'] mean = np.tile(mean, NFRAMES) std = np.tile(std, NFRAMES) train_set_x, train_set_y = data[0] valid_set_x, valid_set_y = data[1] test_set_x, test_set_y = data[2] train_set_x = np.asarray((train_set_x - mean) / std, dtype='float32') valid_set_x = np.asarray((valid_set_x - mean) / std, dtype='float32') test_set_x = np.asarray((test_set_x - mean) / std, dtype='float32')
def run(dataset_path=DEFAULT_DATASET, dataset_name='timit', iterator_type=DatasetDTWIterator, batch_size=100, nframes=13, features="fbank", init_lr=0.001, max_epochs=500, network_type="dropout_net", trainer_type="adadelta", layers_types=[ReLU, ReLU, ReLU, ReLU, LogisticRegression], layers_sizes=[2400, 2400, 2400, 2400], dropout_rates=[0.2, 0.5, 0.5, 0.5, 0.5], recurrent_connections=[], prefix_fname='', debug_on_test_only=False, debug_print=0, debug_time=False, debug_plot=0): """ FIXME TODO """ output_file_name = dataset_name if prefix_fname != "": output_file_name = prefix_fname + "_" + dataset_name output_file_name += "_" + features + str(nframes) output_file_name += "_" + network_type + "_" + trainer_type output_file_name += "_emb_" + str(DIM_EMBEDDING) print "output file name:", output_file_name n_ins = None n_outs = None print "loading dataset from", dataset_path # TODO DO A FUNCTION if dataset_path[-7:] == '.joblib': if REDTW: data_same = joblib.load(dataset_path) shuffle(data_same) ten_percent = int(0.1 * len(data_same)) x_arr_same = numpy.r_[numpy.concatenate([e[3] for e in data_same]), numpy.concatenate([e[4] for e in data_same])] mean = numpy.mean(x_arr_same, 0) std = numpy.std(x_arr_same, 0) numpy.savez("mean_std_3", mean=mean, std=std) print x_arr_same.shape print "mean:", mean print "std:", std marginf = 0 #(nframes-1)/2 # TODO train_set_iterator = iterator_type(data_same[:-ten_percent], mean, std, nframes=nframes, batch_size=batch_size, marginf=marginf) valid_set_iterator = iterator_type(data_same[-ten_percent:], mean, std, nframes=nframes, batch_size=batch_size, marginf=marginf) #test_dataset_path = dataset_path[:-7].replace("train", "test") + '.joblib' test_dataset_path = dataset_path[:-7].replace("train", "dev") + '.joblib' data_same = joblib.load(test_dataset_path) test_set_iterator = iterator_type(data_same, mean, std, nframes=nframes, batch_size=batch_size, marginf=marginf, only_same=True) n_ins = mean.shape[0] * nframes n_outs = DIM_EMBEDDING else: data_same = joblib.load(dataset_path) #data_same = [(word_label, talker1, talker2, fbanks1, fbanks2, DTW_cost, DTW_1to2, DTW_2to1)] print "number of word paired:", len(data_same) if debug_print: # some stats on the DTW dtw_costs = zip(*data_same)[5] words_frames = numpy.asarray( [fb.shape[0] for fb in zip(*data_same)[3]]) print "mean DTW cost", numpy.mean( dtw_costs), "std dev", numpy.std(dtw_costs) print "mean word length in frames", numpy.mean( words_frames), "std dev", numpy.std(words_frames) print "mean DTW cost per frame", numpy.mean( dtw_costs / words_frames), "std dev", numpy.std( dtw_costs / words_frames) # generate data_diff: # spkr_words = {} same_spkr = 0 for i, tup in enumerate(data_same): # spkr_words[tup[1]].append((i, 0)) # spkr_words[tup[2]].append((i, 1)) if tup[1] == tup[2]: same_spkr += 1 # to_del = [] # for spkr, words in spkr_words.iteritems(): # if len(words) < 2: # to_del.append(spkr) # print "to del len:", len(to_del) # for td in to_del: # del spkr_words[td] ratio = same_spkr * 1. / len(data_same) print "ratio same spkr / all for same:", ratio data_diff = [] # keys = spkr_words.keys() # lkeys = len(keys) - 1 ldata_same = len(data_same) - 1 same_spkr_diff = 0 for i in xrange(len(data_same)): word_1 = random.randint(0, ldata_same) word_1_type = data_same[word_1][0] word_2 = random.randint(0, ldata_same) while data_same[word_2][0] == word_1_type: word_2 = random.randint(0, ldata_same) wt1 = random.randint(0, 1) wt2 = random.randint(0, 1) if data_same[word_1][1 + wt1] == data_same[word_2][1 + wt2]: same_spkr_diff += 1 p1 = data_same[word_1][3 + wt1] p2 = data_same[word_2][3 + wt2] r1 = p1[:min(len(p1), len(p2))] r2 = p2[:min(len(p1), len(p2))] data_diff.append((r1, r2)) ratio = same_spkr_diff * 1. / len(data_diff) print "ratio same spkr / all for diff:", ratio x_arr_same = numpy.r_[numpy.concatenate([e[3] for e in data_same]), numpy.concatenate([e[4] for e in data_same])] print x_arr_same.shape x_arr_diff = numpy.r_[numpy.concatenate([e[0] for e in data_diff]), numpy.concatenate([e[1] for e in data_diff])] print x_arr_diff.shape x_arr_all = numpy.concatenate([x_arr_same, x_arr_diff]) mean = numpy.mean(x_arr_all, 0) std = numpy.std(x_arr_all, 0) numpy.savez("mean_std_3", mean=mean, std=std) x_same = [((e[3][e[-2]] - mean) / std, (e[4][e[-1]] - mean) / std) for e in data_same] shuffle(x_same) # in place y_same = [[1 for _ in xrange(len(e[0]))] for i, e in enumerate(x_same)] x_diff = [((e[0] - mean) / std, (e[1] - mean) / std) for e in data_diff] #shuffle(x_diff) y_diff = [[0 for _ in xrange(len(e[0]))] for i, e in enumerate(x_diff)] y = [j for i in zip(y_same, y_diff) for j in i] x = [j for i in zip(x_same, x_diff) for j in i] x1, x2 = zip(*x) assert x1[0].shape[0] == x2[0].shape[0] assert x1[0].shape[1] == x2[0].shape[1] assert len(x1) == len(x2) assert len(x1) == len(y) ten_percent = int(0.1 * len(x1)) n_ins = x1[0].shape[1] * nframes n_outs = DIM_EMBEDDING print "nframes:", nframes marginf = (nframes - 1) / 2 # TODO train_set_iterator = iterator_type( x1[:-ten_percent], x2[:-ten_percent], y[:-ten_percent], # TODO nframes=nframes, batch_size=batch_size, marginf=marginf) valid_set_iterator = iterator_type( x1[-ten_percent:], x2[-ten_percent:], y[-ten_percent:], # TODO nframes=nframes, batch_size=batch_size, marginf=marginf) ### TEST SET test_dataset_path = dataset_path[:-7].replace("train", "dev") + '.joblib' data_same = joblib.load(test_dataset_path) # DO ONLY SAME x_arr_same = numpy.r_[numpy.concatenate([e[3] for e in data_same]), numpy.concatenate([e[4] for e in data_same])] print x_arr_same.shape x_same = [((e[3][e[-2]] - mean) / std, (e[4][e[-1]] - mean) / std) for e in data_same] shuffle(x_same) # in place y_same = [[1 for _ in xrange(len(e[0]))] for i, e in enumerate(x_same)] x = x_same y = y_same x1, x2 = zip(*x) test_set_iterator = iterator_type(x1, x2, y, nframes=nframes, batch_size=batch_size, marginf=marginf) else: data = load_data(dataset_path, nframes=1, features=features, scaling='normalize', cv_frac='fixed', speakers=False, numpy_array_only=True) train_set_x, train_set_y = data[0] valid_set_x, valid_set_y = data[1] test_set_x, test_set_y = data[2] assert train_set_x.shape[1] == valid_set_x.shape[1] assert test_set_x.shape[1] == valid_set_x.shape[1] print "dataset loaded!" print "train set size", train_set_x.shape[0] print "validation set size", valid_set_x.shape[0] print "test set size", test_set_x.shape[0] print "phones in train", len(set(train_set_y)) print "phones in valid", len(set(valid_set_y)) print "phones in test", len(set(test_set_y)) n_outs = len(set(train_set_y)) to_int = {} with open(dataset_name + '_to_int_and_to_state_dicts_tuple.pickle') as f: to_int, _ = cPickle.load(f) print "nframes:", nframes train_set_iterator = iterator_type(train_set_x, train_set_y, to_int, nframes=nframes, batch_size=batch_size) valid_set_iterator = iterator_type(valid_set_x, valid_set_y, to_int, nframes=nframes, batch_size=batch_size) test_set_iterator = iterator_type(test_set_x, test_set_y, to_int, nframes=nframes, batch_size=batch_size) n_ins = test_set_x.shape[1] * nframes assert n_ins != None assert n_outs != None # numpy random generator numpy_rng = numpy.random.RandomState(123) print '... building the model' # TODO the proper network type other than just dropout or not nnet = None fast_dropout = False if "fast_dropout" in network_type: fast_dropout = True if "ab_net" in network_type or "abnet" in network_type: if "dropout" in network_type: print "dropout ab net" nnet = DropoutABNeuralNet(numpy_rng=numpy_rng, n_ins=n_ins, layers_types=layers_types, layers_sizes=layers_sizes, n_outs=n_outs, loss='cos_cos2', rho=0.95, eps=1.E-6, max_norm=4., fast_drop=fast_dropout, debugprint=debug_print) else: print "ab net" nnet = ABNeuralNet(numpy_rng=numpy_rng, n_ins=n_ins, layers_types=layers_types, layers_sizes=layers_sizes, n_outs=n_outs, loss='dot_prod', rho=0.95, eps=1.E-6, max_norm=0., debugprint=debug_print) else: if "dropout" in network_type: nnet = DropoutNet(numpy_rng=numpy_rng, n_ins=n_ins, layers_types=layers_types, layers_sizes=layers_sizes, dropout_rates=dropout_rates, n_outs=n_outs, rho=0.95, eps=1.E-6, max_norm=0., fast_drop=fast_dropout, debugprint=debug_print) else: nnet = NeuralNet(numpy_rng=numpy_rng, n_ins=n_ins, layers_types=layers_types, layers_sizes=layers_sizes, n_outs=n_outs, rho=0.92, eps=1.E-6, max_norm=0., debugprint=debug_print) print "Created a neural net as:", print str(nnet) # get the training, validation and testing function for the model print '... getting the training functions' print trainer_type train_fn = None if debug_plot or debug_print: if trainer_type == "adadelta": train_fn = nnet.get_adadelta_trainer(debug=True) elif trainer_type == "adagrad": train_fn = nnet.get_adagrad_trainer(debug=True) else: train_fn = nnet.get_SGD_trainer(debug=True) else: if trainer_type == "adadelta": train_fn = nnet.get_adadelta_trainer() elif trainer_type == "adagrad": train_fn = nnet.get_adagrad_trainer() else: train_fn = nnet.get_SGD_trainer() train_scoref = nnet.score_classif_same_diff_separated(train_set_iterator) valid_scoref = nnet.score_classif_same_diff_separated(valid_set_iterator) test_scoref = nnet.score_classif(test_set_iterator) data_iterator = train_set_iterator if debug_on_test_only: data_iterator = test_set_iterator train_scoref = test_scoref print '... training the model' # early-stopping parameters patience = 1000 # look as this many examples regardless TODO patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 lr = init_lr timer = None if debug_plot: print_mean_weights_biases(nnet.params) #with open(output_file_name + 'epoch_0.pickle', 'wb') as f: # cPickle.dump(nnet, f) while (epoch < max_epochs) and (not done_looping): if REDTW and "ab_net" in network_type and ((epoch + 1) % 20) == 0: print "recomputing DTW:" data_iterator.recompute_DTW(nnet.transform_x1()) epoch = epoch + 1 avg_costs = [] avg_params_gradients_updates = [] if debug_time: timer = time.time() for iteration, (x, y) in enumerate(data_iterator): avg_cost = 0. if "ab_net" in network_type or "abnet" in network_type: # remove need for this if if "delta" in trainer_type: # TODO remove need for this if avg_cost = train_fn(x[0], x[1], y) else: avg_cost = train_fn(x[0], x[1], y, lr) if debug_print >= 3: print "cost:", avg_cost[0] if debug_plot >= 2: plot_costs(avg_cost[0]) if not len(avg_params_gradients_updates): avg_params_gradients_updates = map( numpy.asarray, avg_cost[1:]) else: avg_params_gradients_updates = rolling_avg_pgu( iteration, avg_params_gradients_updates, map(numpy.asarray, avg_cost[1:])) if debug_plot >= 3: plot_params_gradients_updates(iteration, avg_cost[1:]) else: if "delta" in trainer_type: # TODO remove need for this if avg_cost = train_fn(x, y) else: avg_cost = train_fn(x, y, lr) if type(avg_cost) == list: avg_costs.append(avg_cost[0]) else: avg_costs.append(avg_cost) if debug_print >= 2: print_mean_weights_biases(nnet.params) if debug_plot >= 2: plot_params_gradients_updates(epoch, avg_params_gradients_updates) if debug_time: print(' epoch %i took %f seconds' % (epoch, time.time() - timer)) avg_cost = numpy.mean(avg_costs) if numpy.isnan(avg_cost): print("avg costs is NaN so we're stopping here!") break print(' epoch %i, avg costs %f' % \ (epoch, avg_cost)) tmp_train = zip(*train_scoref()) print(' epoch %i, training error same %f, diff %f' % \ (epoch, numpy.mean(tmp_train[0]), numpy.mean(tmp_train[1]))) # TODO update lr(t) = lr(0) / (1 + lr(0) * lambda * t) # or another scheme for learning rate decay #with open(output_file_name + 'epoch_' +str(epoch) + '.pickle', 'wb') as f: # cPickle.dump(nnet, f, protocol=-1) if debug_on_test_only: continue # we check the validation loss on every epoch validation_losses = zip(*valid_scoref()) #this_validation_loss = -numpy.mean(validation_losses[0]) # TODO this is a mean of means (with different lengths) this_validation_loss = 0.5*(1.-numpy.mean(validation_losses[0])) +\ 0.5*numpy.mean(validation_losses[1]) print(' epoch %i, valid error same %f, diff %f' % \ (epoch, numpy.mean(validation_losses[0]), numpy.mean(validation_losses[1]))) # if we got the best validation score until now if this_validation_loss < best_validation_loss: with open(output_file_name + '.pickle', 'wb') as f: cPickle.dump(nnet, f, protocol=-1) # improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iteration * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss # test it on the test set test_losses = test_scoref() test_score_same = numpy.mean( test_losses[0] ) # TODO this is a mean of means (with different lengths) test_score_diff = numpy.mean( test_losses[1] ) # TODO this is a mean of means (with different lengths) print((' epoch %i, test error of best model same %f diff %f') % (epoch, test_score_same, test_score_diff)) if patience <= iteration: # TODO correct that done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f, ' 'with test performance %f') % (best_validation_loss, test_score)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) with open(output_file_name + '_final.pickle', 'wb') as f: cPickle.dump(nnet, f, protocol=-1)
def run(dataset_path=DEFAULT_DATASET, dataset_name='timit', iterator_type=DatasetSentencesIterator, batch_size=100, nframes=13, features="fbank", init_lr=0.001, max_epochs=500, network_type="dropout_XXX", trainer_type="adadelta", layers_types=[Linear, ReLU, ReLU, ReLU, LogisticRegression], layers_sizes=[2400, 2400, 2400, 2400], dropout_rates=[0.2, 0.5, 0.5, 0.5, 0.5], recurrent_connections=[], prefix_fname='', debug_on_test_only=False, debug_print=0, debug_time=False, debug_plot=0): """ FIXME TODO """ output_file_name = dataset_name if prefix_fname != "": output_file_name = prefix_fname + "_" + dataset_name output_file_name += "_" + features + str(nframes) output_file_name += "_" + network_type + "_" + trainer_type print "output file name:", output_file_name n_ins = None n_outs = None print "loading dataset from", dataset_path # TODO DO A FUNCTION if dataset_path[-7:] == '.joblib': if REDTW: data_same = joblib.load(dataset_path) shuffle(data_same) ten_percent = int(0.1 * len(data_same)) x_arr_same = numpy.r_[numpy.concatenate([e[3] for e in data_same]), numpy.concatenate([e[4] for e in data_same])] mean = numpy.mean(x_arr_same, 0) std = numpy.std(x_arr_same, 0) numpy.savez("mean_std_3", mean=mean, std=std) print x_arr_same.shape print "mean:", mean print "std:", std train_set_iterator = iterator_type(data_same[:-ten_percent], mean, std, nframes=nframes, batch_size=batch_size, marginf=3) valid_set_iterator = iterator_type(data_same[-ten_percent:], mean, std, nframes=nframes, batch_size=batch_size, marginf=3) test_dataset_path = "dtw_words_2_test.joblib" data_same = joblib.load(test_dataset_path) test_set_iterator = iterator_type(data_same, mean, std, nframes=nframes, batch_size=batch_size, marginf=3, only_same=True) n_ins = mean.shape[0] * nframes n_outs = 100 # TODO else: if OLD_DTW_DATA: data_same = joblib.load(dataset_path) #data_same = [(word_label, fbanks1, fbanks2, DTW_cost, DTW_1to2, DTW_2to1)] if debug_print: # some stats on the DTW dtw_costs = zip(*data_same)[3] words_frames = numpy.asarray([fb.shape[0] for fb in zip(*data_same)[1]]) print "mean DTW cost", numpy.mean(dtw_costs), "std dev", numpy.std(dtw_costs) print "mean word length in frames", numpy.mean(words_frames), "std dev", numpy.std(words_frames) print "mean DTW cost per frame", numpy.mean(dtw_costs/words_frames), "std dev", numpy.std(dtw_costs/words_frames) # /some stats on the DTW # TODO maybe ceil on the DTW cost to be considered "same" x_arr_same = numpy.r_[numpy.concatenate([e[1] for e in data_same]), numpy.concatenate([e[2] for e in data_same])] print x_arr_same.shape # we need about as much negative examples as positive ones # TODO wrap this in try except or if tmp = dataset_path.split('/') neg_data_path = "/".join(tmp[:-1]) + "/neg" + tmp[-1][3:] data_diff = joblib.load(neg_data_path) x_arr_diff = numpy.r_[numpy.concatenate([e[0] for e in data_diff]), numpy.concatenate([e[1] for e in data_diff])] print x_arr_diff.shape x_arr_all = numpy.concatenate([x_arr_same, x_arr_diff]) mean = numpy.mean(x_arr_all, 0) std = numpy.std(x_arr_all, 0) numpy.savez("mean_std", mean=mean, std=std) x_same = [((e[1][e[-2]] - mean) / std, (e[2][e[-1]] - mean) / std) for e in data_same] shuffle(x_same) # in place y_same = [[1 for _ in xrange(len(e[0]))] for i, e in enumerate(x_same)] x_diff = [((e[0] - mean) / std, (e[1] - mean) / std) for e in data_diff] shuffle(x_diff) y_diff = [[0 for _ in xrange(len(e[0]))] for i, e in enumerate(x_diff)] y = [j for i in zip(y_same, y_diff) for j in i] x = [j for i in zip(x_same, x_diff) for j in i] else: data_same = joblib.load(dataset_path) #data_same = [(word_label, talker1, talker2, fbanks1, fbanks2, DTW_cost, DTW_1to2, DTW_2to1)] print "number of word paired:", len(data_same) if debug_print: # some stats on the DTW dtw_costs = zip(*data_same)[5] words_frames = numpy.asarray([fb.shape[0] for fb in zip(*data_same)[3]]) print "mean DTW cost", numpy.mean(dtw_costs), "std dev", numpy.std(dtw_costs) print "mean word length in frames", numpy.mean(words_frames), "std dev", numpy.std(words_frames) print "mean DTW cost per frame", numpy.mean(dtw_costs/words_frames), "std dev", numpy.std(dtw_costs/words_frames) # generate data_diff: # spkr_words = {} same_spkr = 0 for i, tup in enumerate(data_same): print "TUP:", tup print "TUP 1:", tup[1] print "TUP 2:", tup[2] # spkr_words[tup[1]].append((i, 0)) # spkr_words[tup[2]].append((i, 1)) if tup[1] == tup[2]: same_spkr += 1 # to_del = [] # for spkr, words in spkr_words.iteritems(): # if len(words) < 2: # to_del.append(spkr) # print "to del len:", len(to_del) # for td in to_del: # del spkr_words[td] ratio = same_spkr * 1. / len(data_same) print "ratio same spkr / all for same:", ratio data_diff = [] # keys = spkr_words.keys() # lkeys = len(keys) - 1 ldata_same = len(data_same)-1 same_spkr_diff = 0 for i in xrange(len(data_same)): word_1 = random.randint(0, ldata_same) word_1_type = data_same[word_1][0] word_2 = random.randint(0, ldata_same) while data_same[word_2][0] == word_1_type: word_2 = random.randint(0, ldata_same) wt1 = random.randint(0, 1) wt2 = random.randint(0, 1) if data_same[word_1][1+wt1] == data_same[word_2][1+wt2]: same_spkr_diff += 1 p1 = data_same[word_1][3+wt1] p2 = data_same[word_2][3+wt2] r1 = p1[:min(len(p1), len(p2))] r2 = p2[:min(len(p1), len(p2))] data_diff.append((r1, r2)) ratio = same_spkr_diff * 1. / len(data_diff) print "ratio same spkr / all for diff:", ratio x_arr_same = numpy.r_[numpy.concatenate([e[3] for e in data_same]), numpy.concatenate([e[4] for e in data_same])] print x_arr_same.shape x_arr_diff = numpy.r_[numpy.concatenate([e[0] for e in data_diff]), numpy.concatenate([e[1] for e in data_diff])] print x_arr_diff.shape x_arr_all = numpy.concatenate([x_arr_same, x_arr_diff]) mean = numpy.mean(x_arr_all, 0) std = numpy.std(x_arr_all, 0) numpy.savez("mean_std_2", mean=mean, std=std) x_same = [((e[3][e[-2]] - mean) / std, (e[4][e[-1]] - mean) / std) for e in data_same] shuffle(x_same) # in place y_same = [[1 for _ in xrange(len(e[0]))] for i, e in enumerate(x_same)] x_diff = [((e[0] - mean) / std, (e[1] - mean) / std) for e in data_diff] #shuffle(x_diff) y_diff = [[0 for _ in xrange(len(e[0]))] for i, e in enumerate(x_diff)] y = [j for i in zip(y_same, y_diff) for j in i] x = [j for i in zip(x_same, x_diff) for j in i] x1, x2 = zip(*x) assert x1[0].shape[0] == x2[0].shape[0] assert x1[0].shape[1] == x2[0].shape[1] assert len(x1) == len(x2) assert len(x1) == len(y) ten_percent = int(0.1 * len(x1)) n_ins = x1[0].shape[1] * nframes n_outs = 100 # TODO print "nframes:", nframes train_set_iterator = iterator_type(x1[:-ten_percent], x2[:-ten_percent], y[:-ten_percent], # TODO nframes=nframes, batch_size=batch_size, marginf=3) # TODO margin pass this 3 along before valid_set_iterator = iterator_type(x1[-ten_percent:], x2[-ten_percent:], y[-ten_percent:], # TODO nframes=nframes, batch_size=batch_size, marginf=3) ### TEST SET if OLD_DTW_DATA: test_dataset_path = "/fhgfs/bootphon/scratch/gsynnaeve/TIMIT/train_dev_test_split/dtw_words_test.joblib" data_same = joblib.load(test_dataset_path) x_arr_same = numpy.r_[numpy.concatenate([e[1] for e in data_same]), numpy.concatenate([e[2] for e in data_same])] print x_arr_same.shape tmp = test_dataset_path.split('/') neg_data_path = "/".join(tmp[:-1]) + "/neg" + tmp[-1][3:] data_diff = joblib.load(neg_data_path) x_arr_diff = numpy.r_[numpy.concatenate([e[0] for e in data_diff]), numpy.concatenate([e[1] for e in data_diff])] print x_arr_diff.shape x_arr_all = numpy.concatenate([x_arr_same, x_arr_diff]) mean = numpy.mean(x_arr_all, 0) std = numpy.std(x_arr_all, 0) x_same = [((e[1][e[-2]] - mean) / std, (e[2][e[-1]] - mean) / std) for e in data_same] shuffle(x_same) # in place y_same = [[1 for _ in xrange(len(e[0]))] for i, e in enumerate(x_same)] x_diff = [((e[0] - mean) / std, (e[1] - mean) / std) for e in data_diff] shuffle(x_diff) y_diff = [[0 for _ in xrange(len(e[0]))] for i, e in enumerate(x_diff)] y = [j for i in zip(y_same, y_diff) for j in i] x = [j for i in zip(x_same, x_diff) for j in i] else: test_dataset_path = "./dtw_words_2_dev.joblib" data_same = joblib.load(test_dataset_path) # DO ONLY SAME x_arr_same = numpy.r_[numpy.concatenate([e[3] for e in data_same]), numpy.concatenate([e[4] for e in data_same])] print x_arr_same.shape x_same = [((e[3][e[-2]] - mean) / std, (e[4][e[-1]] - mean) / std) for e in data_same] shuffle(x_same) # in place y_same = [[1 for _ in xrange(len(e[0]))] for i, e in enumerate(x_same)] x = x_same y = y_same x1, x2 = zip(*x) test_set_iterator = iterator_type(x1, x2, y, nframes=nframes, batch_size=batch_size, marginf=3) else: data = load_data(dataset_path, nframes=1, features=features, scaling='normalize', cv_frac='fixed', speakers=False, numpy_array_only=True) train_set_x, train_set_y = data[0] valid_set_x, valid_set_y = data[1] test_set_x, test_set_y = data[2] assert train_set_x.shape[1] == valid_set_x.shape[1] assert test_set_x.shape[1] == valid_set_x.shape[1] print "dataset loaded!" print "train set size", train_set_x.shape[0] print "validation set size", valid_set_x.shape[0] print "test set size", test_set_x.shape[0] print "phones in train", len(set(train_set_y)) print "phones in valid", len(set(valid_set_y)) print "phones in test", len(set(test_set_y)) n_outs = len(set(train_set_y)) to_int = {} with open(dataset_name + '_to_int_and_to_state_dicts_tuple.pickle') as f: to_int, _ = cPickle.load(f) print "nframes:", nframes train_set_iterator = iterator_type(train_set_x, train_set_y, to_int, nframes=nframes, batch_size=batch_size) valid_set_iterator = iterator_type(valid_set_x, valid_set_y, to_int, nframes=nframes, batch_size=batch_size) test_set_iterator = iterator_type(test_set_x, test_set_y, to_int, nframes=nframes, batch_size=batch_size) n_ins = test_set_x.shape[1]*nframes assert n_ins != None assert n_outs != None # numpy random generator numpy_rng = numpy.random.RandomState(123) print '... building the model' # TODO the proper network type other than just dropout or not nnet = None if "dropout" in network_type: nnet = DropoutNet(numpy_rng=numpy_rng, n_ins=n_ins, layers_types=layers_types, layers_sizes=layers_sizes, dropout_rates=dropout_rates, n_outs=n_outs, debugprint=debug_print) elif "ab_net" in network_type: nnet = ABNeuralNet(numpy_rng=numpy_rng, n_ins=n_ins, layers_types=layers_types, layers_sizes=layers_sizes, n_outs=n_outs, debugprint=debug_print) else: nnet = NeuralNet(numpy_rng=numpy_rng, n_ins=n_ins, layers_types=layers_types, layers_sizes=layers_sizes, n_outs=n_outs, debugprint=debug_print) print "Created a neural net as:", print str(nnet) # get the training, validation and testing function for the model print '... getting the training functions' print trainer_type train_fn = None if debug_plot or debug_print: if trainer_type == "adadelta": train_fn = nnet.get_adadelta_trainer(debug=True) elif trainer_type == "adagrad": train_fn = nnet.get_adagrad_trainer(debug=True) else: train_fn = nnet.get_SGD_trainer(debug=True) else: if trainer_type == "adadelta": train_fn = nnet.get_adadelta_trainer() elif trainer_type == "adagrad": train_fn = nnet.get_adagrad_trainer() else: train_fn = nnet.get_SGD_trainer() train_scoref = nnet.score_classif(train_set_iterator) valid_scoref = nnet.score_classif(valid_set_iterator) test_scoref = nnet.score_classif(test_set_iterator) data_iterator = train_set_iterator if debug_on_test_only: data_iterator = test_set_iterator train_scoref = test_scoref print '... training the model' # early-stopping parameters patience = 1000 # look as this many examples regardless TODO patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 lr = init_lr timer = None if debug_plot: print_mean_weights_biases(nnet.params) #with open(output_file_name + 'epoch_0.pickle', 'wb') as f: # cPickle.dump(nnet, f) while (epoch < max_epochs) and (not done_looping): if REDTW and "ab_net" in network_type and ((epoch + 1) % 20) == 0: #abs(best_validation_loss) < 0.1: # TODO print "recomputing DTW:" data_iterator.recompute_DTW(nnet.transform_x1()) epoch = epoch + 1 avg_costs = [] avg_params_gradients_updates = [] if debug_time: timer = time.time() for iteration, (x, y) in enumerate(data_iterator): avg_cost = 0. if "ab_net" in network_type: # remove need for this if if "delta" in trainer_type: # TODO remove need for this if avg_cost = train_fn(x[0], x[1], y) else: avg_cost = train_fn(x[0], x[1], y, lr) if debug_print >= 3: print "cost:", avg_cost[0] if debug_plot >= 2: plot_costs(avg_cost[0]) if not len(avg_params_gradients_updates): avg_params_gradients_updates = avg_cost[1:] else: avg_params_gradients_updates = rolling_avg_pgu( iteration, avg_params_gradients_updates, avg_cost[1:]) if debug_plot >= 3: plot_params_gradients_updates(iteration, avg_cost[1:]) else: if "delta" in trainer_type: # TODO remove need for this if avg_cost = train_fn(x, y) else: avg_cost = train_fn(x, y, lr) if type(avg_cost) == list: avg_costs.append(avg_cost[0]) else: avg_costs.append(avg_cost) if debug_print >= 2: print_mean_weights_biases(nnet.params) if debug_plot >= 2: plot_params_gradients_updates(epoch, avg_params_gradients_updates) if debug_time: print(' epoch %i took %f seconds' % (epoch, time.time() - timer)) print(' epoch %i, avg costs %f' % \ (epoch, numpy.mean(avg_costs))) print(' epoch %i, training error %f' % \ (epoch, numpy.mean(train_scoref()))) # TODO update lr(t) = lr(0) / (1 + lr(0) * lambda * t) # or another scheme for learning rate decay #with open(output_file_name + 'epoch_' +str(epoch) + '.pickle', 'wb') as f: # cPickle.dump(nnet, f) if debug_on_test_only: continue # we check the validation loss on every epoch validation_losses = valid_scoref() this_validation_loss = numpy.mean(validation_losses) # TODO this is a mean of means (with different lengths) print(' epoch %i, validation error %f' % \ (epoch, this_validation_loss)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: with open(output_file_name + '.pickle', 'wb') as f: cPickle.dump(nnet, f) # improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iteration * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss # test it on the test set test_losses = test_scoref() test_score = numpy.mean(test_losses) # TODO this is a mean of means (with different lengths) print((' epoch %i, test error of best model %f') % (epoch, test_score)) if patience <= iteration: # TODO correct that done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f, ' 'with test performance %f') % (best_validation_loss, test_score)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) with open(output_file_name + '_final.pickle', 'wb') as f: cPickle.dump(nnet, f)
def test_DBN( finetune_lr=0.01, pretraining_epochs=0, pretrain_lr=0.01, k=1, training_epochs=200, # TODO 100+ dataset=DATASET, batch_size=100): """ :type learning_rate: float :param learning_rate: learning rate used in the finetune stage :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training :type k: int :param k: number of Gibbs steps in CD/PCD :type training_epochs: int :param training_epochs: maximal number of iterations ot run the optimizer :type dataset: string :param dataset: path the the pickled dataset :type batch_size: int :param batch_size: the size of a minibatch """ print "loading dataset from", dataset features = 'fbank' if features == 'MFCC': N_FEATURES = 39 elif features == 'fbank': N_FEATURES = 40 #datasets = load_data(dataset, nframes=N_FRAMES, features='fbank', scaling='normalize', cv_frac=0.2, speakers=False, numpy_array_only=True) #datasets = load_data(dataset, nframes=N_FRAMES, features='fbank', scaling='student', cv_frac='fixed', speakers=False, numpy_array_only=True) datasets = load_data(dataset, nframes=1, features=features, scaling='student', cv_frac='fixed', speakers=False, numpy_array_only=True) #datasets = load_data(dataset, nframes=1, features='fbank', scaling='student', cv_frac=0.2, speakers=False, numpy_array_only=True) train_set_x, train_set_y = datasets[0] # if speakers, do test/test/test valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] print "dataset loaded!" print "train set size", train_set_x.shape[0] print "validation set size", valid_set_x.shape[0] print "test set size", test_set_x.shape[0] print "phones in train", len(set(train_set_y)) print "phones in valid", len(set(valid_set_y)) print "phones in test", len(set(test_set_y)) to_int = {} with open('timit_to_int_and_to_state_dicts_tuple.pickle') as f: # TODO to_int, _ = cPickle.load(f) train_set_iterator = DatasetSentencesIterator(train_set_x, train_set_y, to_int, N_FRAMES) valid_set_iterator = DatasetSentencesIterator(valid_set_x, valid_set_y, to_int, N_FRAMES) test_set_iterator = DatasetSentencesIterator(test_set_x, test_set_y, to_int, N_FRAMES) # numpy random generator numpy_rng = numpy.random.RandomState(123) print '... building the model' # construct the Deep Belief Network dbn = DBN( numpy_rng=numpy_rng, n_ins=N_FRAMES * N_FEATURES, #hidden_layers_sizes=[2496, 2496, 2496], hidden_layers_sizes=[1024, 1024, 1024], n_outs=len(set(train_set_y))) #62 * 3) ######################### # PRETRAINING THE MODEL # ######################### # print '... getting the pretraining functions' # pretraining_fns = dbn.pretraining_functions(k=k) # # print '... pre-training the model' # start_time = time.clock() # ## Pre-train layer-wise # #for i in xrange(dbn.n_layers): # TODO # for i in xrange(1): # # go through pretraining epochs # for epoch in xrange(pretraining_epochs): # # go through the training set # c = [] # for batch_index, (batch_x, _) in enumerate(train_set_iterator): # tmp_lr = pretrain_lr / (1. + 0.05 * batch_index) # TODO # if i == 0: # tmp_lr /= PRELEARNING_RATE_DENOMINATOR_FOR_GAUSSIAN # c.append(pretraining_fns[i](batch_x=batch_x, lr=tmp_lr)) # print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), # print numpy.mean(c) # with open(output_file_name + '_layer_' + str(i) + '.pickle', 'w') as f: # cPickle.dump(dbn, f) # print "dumped a partially pre-trained model" # # end_time = time.clock() # print >> sys.stderr, ('The pretraining code for file ' + # os.path.split(__file__)[1] + # ' ran for %.2fm' % ((end_time - start_time) / 60.)) ######################## # FINETUNING THE MODEL # ######################## #with open('dbn_Gaussian_gpu_layer_2.pickle') as f: # dbn = cPickle.load(f) # get the training, validation and testing function for the model print '... getting the finetuning functions' train_fn = dbn.get_adadelta_trainer() train_scoref = dbn.score_classif(train_set_iterator) valid_scoref = dbn.score_classif(valid_set_iterator) test_scoref = dbn.score_classif(test_set_iterator) print '... finetuning the model' # early-stopping parameters patience = 1000 # look as this many examples regardless TODO patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 avg_costs = [] for iteration, (x, y) in enumerate(train_set_iterator): avg_cost = train_fn(x, y) avg_costs.append(avg_cost) #print(' epoch %i, sentence %i, ' #'avg cost for this sentence %f' % \ # (epoch, iteration, avg_cost)) print(' epoch %i, avg costs %f' % \ (epoch, numpy.mean(avg_costs))) print(' epoch %i, training error %f %%' % \ (epoch, numpy.mean(train_scoref()) * 100.)) # we check the validation loss on every epoch validation_losses = valid_scoref() this_validation_loss = numpy.mean( validation_losses ) # TODO this is a mean of means (with different lengths) print(' epoch %i, validation error %f %%' % \ (epoch, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: with open(output_file_name + '.pickle', 'w') as f: cPickle.dump(dbn, f) # improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iteration * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss # test it on the test set test_losses = test_scoref() test_score = numpy.mean( test_losses ) # TODO this is a mean of means (with different lengths) print((' epoch %i, test error of ' 'best model %f %%') % (epoch, test_score * 100.)) if patience <= iteration: # TODO correct that done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f %%, ' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) with open(output_file_name + '.pickle', 'w') as f: cPickle.dump(dbn, f)