def run_SAE_experiment(pretrain_lr=0.1, pretraining_epochs=3300, finetune_lr=0.1, training_epochs=4e5, L1_reg=0.0, L2_reg=1e-4, dataset='KSC.pkl', split_proportions=[6, 2, 2], hidden_layers_sizes=[20], corruption_levels=[0.], batch_size=20, log_file='log', restart=False, use_rate_schedule=True, load_pretrained_weights=False): """ Reproduce the paper... """ assert not (restart and load_pretrained_weights) assert not (load_pretrained_weights and len(hidden_layers_sizes) != 5) assert len(hidden_layers_sizes)==len(corruption_levels), \ "Error: hidden_layers_sizes and corruption_levels need to be of equal length" pretrain_rate_decay = (type(pretrain_lr) == tuple) train_rate_decay = (type(finetune_lr) == tuple) assert pretrain_rate_decay or type(pretrain_lr) == float assert train_rate_decay or type(finetune_lr) == float assert not (use_rate_schedule and train_rate_decay), ( 'Error:', 'Can not use adaptive rate schedule and linear rate schedule together') #cast number of epochsto int pretraining_epochs = int(pretraining_epochs) training_epochs = int(training_epochs) #check for linear rate schedules if pretrain_rate_decay: linear_pretrain_rates = True pretrain_rates = numpy.linspace(pretrain_lr[0], pretrain_lr[1], pretraining_epochs) else: pretrain_rates = [pretrain_lr] * pretraining_epochs if train_rate_decay: linear_train_rates = True train_rates = numpy.linspace(finetune_lr[0], finetune_lr[1], training_epochs) else: train_rates = [finetune_lr] * training_epochs #create a log object logger = Logger(log_file) #log run params if restart: logger.log("Restarting run using old best_model") logger.log("Running SAE Experiment...") logger.add_newline() logger.log("Runtime params:") logger.log("pretrain_lr=%s" % str(pretrain_lr)) logger.log("pretraining_epochs=%d" % pretraining_epochs) logger.log("finetune_lr=%s" % str(finetune_lr)) logger.log("training_epochs=%d" % training_epochs) logger.log("L1_reg=%f" % L1_reg) logger.log("L2_reg=%f" % L2_reg) logger.log("dataset=%s" % dataset) logger.log("split_proportions=%s" % str(split_proportions)) logger.log("hidden_layers_sizes=%s" % str(hidden_layers_sizes)) logger.log("corruption_levels=%s" % str(corruption_levels)) logger.log("batch_size=%d" % batch_size) logger.log("use_rate_schedule=%s" % use_rate_schedule) logger.log("load_pretrained_weights=%s" % load_pretrained_weights) logger.add_newline() datasets = load_data(dataset, split_proportions, logger) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size # numpy random generator numpy_rng = numpy.random.RandomState(89677) logger.log('... building the model') # construct the stacked denoising autoencoder class #since labels were cast to int32 need to do this to get the shared variable shared_train_set_y = train_set_y.owner.inputs[0] if not restart: sda = SdA(numpy_rng=numpy_rng, n_ins=train_set_x.get_value(borrow=True).shape[1], hidden_layers_sizes=hidden_layers_sizes, n_outs=numpy.unique( shared_train_set_y.get_value(borrow=True)).size, L1_reg=L1_reg, L2_reg=L2_reg) elif restart: logger.log("loading model from best_model.pkl") sda = cPickle.load(open('best_model.pkl', 'r')) elif load_pretrained_weights: logger.log("loading model from pretrained_model.pkl") sda = cPickle.load(open('pretrained_model.pkl', 'r')) #create dictionary to store training stat accumulation arrays for easy pickling train_stat_dict = {} ######################### # PRETRAINING THE MODEL # ######################### pretrainig_costs = [[] for i in xrange(sda.n_layers) ] # average pretrainig cost at each epoch train_stat_dict['pretrainig_costs'] = pretrainig_costs if not (restart or load_pretrained_weights or SKIP_PRETRAINING): logger.log('... getting the pretraining functions') pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size) logger.log('... pre-training the model') start_time = timeit.default_timer() ## Pre-train layer-wise for i in xrange(sda.n_layers): # go through pretraining epochs for epoch in xrange(pretraining_epochs): # go through the training set c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i]( index=batch_index, corruption=corruption_levels[i], lr=pretrain_rates[epoch])) logger.log('Pre-training layer %i, epoch %d, cost ' % (i, epoch)) logger.log(str(numpy.mean(c))) pretrainig_costs[i].append(numpy.mean(c)) end_time = timeit.default_timer() #save the pretrained model with open('pretrained_model.pkl', 'w') as f: cPickle.dump(sda, f) logger.log('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) else: logger.log("skipping pretraining") ######################## # FINETUNING THE MODEL # ######################## # get the training, validation and testing function for the model logger.log('... getting the finetuning functions') (train_fn, validate_model_NLL, validate_model_zero_one, test_model) = sda.build_finetune_functions(datasets=datasets, batch_size=batch_size) logger.log('... finetunning the model') # early-stopping parameters patience = 100 * n_train_batches # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf test_score = 0. start_time = timeit.default_timer() done_looping = False epoch = 0 iter = 0 # global minibatch iteration minibatch_avg_NLL = [] #array to accumulate NLL cost over over minibatches training_NLL = [ ] # average training NLL cost at each epoch (really after val_freq iters) validation_NLL = [] # average validation NLL cost at each epoch validation_zero_one = [ ] # average zero one cost at each epoch (% misclassified) train_stat_dict['training_NLL'] = training_NLL train_stat_dict['validation_NLL'] = validation_NLL train_stat_dict['validation_zero_one'] = validation_zero_one while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): iter += 1 minibatch_avg_NLL.append( train_fn(minibatch_index, lr=train_rates[epoch - 1])) if iter % validation_frequency == 0: """validation zero one loss """ validation_zero_one_losses = validate_model_zero_one() validation_zero_one.append( numpy.mean(validation_zero_one_losses)) #validation NLL cost validation_NLL_losses = validate_model_NLL() validation_NLL.append(numpy.mean(validation_NLL_losses)) #training NLL cost training_NLL.append(numpy.mean(minibatch_avg_NLL)) minibatch_avg_NLL = [] #reset the NLL accumulator logger.log('epoch %i, minibatch %i/%i:' % (epoch, minibatch_index + 1, n_train_batches)) logger.log('\ttraining NLL loss: %f ' % training_NLL[-1]) logger.log('\tvalidation NLL loss: %f ' % validation_NLL[-1]) logger.log('\tvalidation zero one loss: %f %%' % (validation_zero_one[-1] * 100.)) # if we got the best validation score until now if validation_zero_one[-1] < best_validation_loss: #improve patience if loss improvement is good enough if (validation_zero_one[-1] < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) else: print "improvemnt not good enough: %f" % ( validation_zero_one[-1] / best_validation_loss) # save best validation score and iteration number best_validation_loss = validation_zero_one[-1] best_iter = iter # test it on the test set test_zero_one_losses = test_model() test_score = numpy.mean(test_zero_one_losses) print '\t\ttest zero one loss of best model %f %%' % ( test_score * 100.) #save the best model with open('best_model.pkl', 'w') as f: cPickle.dump(sda, f) if patience <= iter: pass #done_looping = True #break if use_rate_schedule and epoch % 100 == 0: if validation_NLL[epoch - 100] - validation_NLL[epoch - 1] < 1e-4: finetune_lr = max(finetune_lr / 2., 1e-6) train_rates = [finetune_lr] * training_epochs logger.log("Reducing learning rate. new rate: %f" % finetune_lr) #save train_stat_dict to a .mat file sio.savemat('train_stats.mat', train_stat_dict) #with open('train_stat_dict.pkl','w') as f: # cPickle.dump(train_stat_dict,f) end_time = timeit.default_timer() logger.log(('Optimization complete with best validation score of %f %%, ' 'on iteration %i, ' 'with test performance %f %%') % (best_validation_loss * 100., best_iter, test_score * 100.)) logger.log('The training code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) logger.close()
def run_SAE_experiment(pretrain_lr=0.1,pretraining_epochs=3300, finetune_lr=0.1,training_epochs=4e5, L1_reg=0.0,L2_reg=1e-4, dataset='KSC.pkl', split_proportions = [6,2,2], hidden_layers_sizes=[20], corruption_levels=[0.], batch_size=20, log_file='log', restart = False, use_rate_schedule=True, load_pretrained_weights=False): """ Reproduce the paper... """ assert not(restart and load_pretrained_weights) assert not(load_pretrained_weights and len(hidden_layers_sizes)!=5) assert len(hidden_layers_sizes)==len(corruption_levels), \ "Error: hidden_layers_sizes and corruption_levels need to be of equal length" pretrain_rate_decay = (type(pretrain_lr)==tuple) train_rate_decay = (type(finetune_lr)==tuple) assert pretrain_rate_decay or type(pretrain_lr)==float assert train_rate_decay or type(finetune_lr)==float assert not (use_rate_schedule and train_rate_decay), ('Error:', 'Can not use adaptive rate schedule and linear rate schedule together' ) #cast number of epochsto int pretraining_epochs = int(pretraining_epochs) training_epochs = int(training_epochs) #check for linear rate schedules if pretrain_rate_decay: linear_pretrain_rates = True pretrain_rates = numpy.linspace(pretrain_lr[0],pretrain_lr[1],pretraining_epochs) else: pretrain_rates = [pretrain_lr]*pretraining_epochs if train_rate_decay: linear_train_rates = True train_rates = numpy.linspace(finetune_lr[0],finetune_lr[1],training_epochs) else: train_rates = [finetune_lr]*training_epochs #create a log object logger = Logger(log_file) #log run params if restart: logger.log("Restarting run using old best_model") logger.log("Running SAE Experiment...") logger.add_newline() logger.log("Runtime params:") logger.log("pretrain_lr=%s" % str(pretrain_lr)) logger.log("pretraining_epochs=%d" % pretraining_epochs) logger.log("finetune_lr=%s" % str(finetune_lr)) logger.log("training_epochs=%d" % training_epochs) logger.log("L1_reg=%f" % L1_reg) logger.log("L2_reg=%f" % L2_reg) logger.log("dataset=%s" % dataset) logger.log("split_proportions=%s" % str(split_proportions)) logger.log("hidden_layers_sizes=%s" % str(hidden_layers_sizes)) logger.log("corruption_levels=%s" % str(corruption_levels)) logger.log("batch_size=%d" % batch_size) logger.log("use_rate_schedule=%s" % use_rate_schedule) logger.log("load_pretrained_weights=%s" % load_pretrained_weights) logger.add_newline() datasets = load_data(dataset,split_proportions,logger) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size # numpy random generator numpy_rng = numpy.random.RandomState(89677) logger.log( '... building the model') # construct the stacked denoising autoencoder class #since labels were cast to int32 need to do this to get the shared variable shared_train_set_y = train_set_y.owner.inputs[0] if not restart: sda = SdA( numpy_rng=numpy_rng, n_ins=train_set_x.get_value(borrow=True).shape[1], hidden_layers_sizes=hidden_layers_sizes, n_outs=numpy.unique(shared_train_set_y.get_value(borrow=True)).size, L1_reg=L1_reg, L2_reg=L2_reg ) elif restart: logger.log("loading model from best_model.pkl") sda = cPickle.load(open('best_model.pkl','r')) elif load_pretrained_weights: logger.log("loading model from pretrained_model.pkl") sda = cPickle.load(open('pretrained_model.pkl','r')) #create dictionary to store training stat accumulation arrays for easy pickling train_stat_dict = {} ######################### # PRETRAINING THE MODEL # ######################### pretrainig_costs = [ [] for i in xrange(sda.n_layers) ] # average pretrainig cost at each epoch train_stat_dict['pretrainig_costs'] = pretrainig_costs if not (restart or load_pretrained_weights or SKIP_PRETRAINING): logger.log( '... getting the pretraining functions') pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size) logger.log( '... pre-training the model') start_time = timeit.default_timer() ## Pre-train layer-wise for i in xrange(sda.n_layers): # go through pretraining epochs for epoch in xrange(pretraining_epochs): # go through the training set c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index=batch_index, corruption=corruption_levels[i], lr=pretrain_rates[epoch])) logger.log('Pre-training layer %i, epoch %d, cost ' % (i, epoch) ) logger.log( str(numpy.mean(c)) ) pretrainig_costs[i].append(numpy.mean(c)) end_time = timeit.default_timer() #save the pretrained model with open('pretrained_model.pkl', 'w') as f: cPickle.dump(sda, f) logger.log( 'The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.) ) else: logger.log("skipping pretraining") ######################## # FINETUNING THE MODEL # ######################## # get the training, validation and testing function for the model logger.log( '... getting the finetuning functions') ( train_fn, validate_model_NLL, validate_model_zero_one, test_model ) = sda.build_finetune_functions( datasets=datasets, batch_size=batch_size ) logger.log( '... finetunning the model') # early-stopping parameters patience = 100 * n_train_batches # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf test_score = 0. start_time = timeit.default_timer() done_looping = False epoch = 0 iter = 0 # global minibatch iteration minibatch_avg_NLL = [] #array to accumulate NLL cost over over minibatches training_NLL = [] # average training NLL cost at each epoch (really after val_freq iters) validation_NLL = [] # average validation NLL cost at each epoch validation_zero_one = [] # average zero one cost at each epoch (% misclassified) train_stat_dict['training_NLL'] = training_NLL train_stat_dict['validation_NLL'] = validation_NLL train_stat_dict['validation_zero_one'] = validation_zero_one while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): iter += 1 minibatch_avg_NLL.append( train_fn(minibatch_index,lr= train_rates[epoch-1] ) ) if iter % validation_frequency == 0: """validation zero one loss """ validation_zero_one_losses = validate_model_zero_one() validation_zero_one.append( numpy.mean(validation_zero_one_losses) ) #validation NLL cost validation_NLL_losses = validate_model_NLL() validation_NLL.append( numpy.mean(validation_NLL_losses) ) #training NLL cost training_NLL.append( numpy.mean(minibatch_avg_NLL) ) minibatch_avg_NLL = [] #reset the NLL accumulator logger.log( 'epoch %i, minibatch %i/%i:' % (epoch, minibatch_index + 1, n_train_batches)) logger.log('\ttraining NLL loss: %f ' % training_NLL[-1]) logger.log('\tvalidation NLL loss: %f ' % validation_NLL[-1]) logger.log('\tvalidation zero one loss: %f %%' % (validation_zero_one[-1] * 100.)) # if we got the best validation score until now if validation_zero_one[-1] < best_validation_loss: #improve patience if loss improvement is good enough if ( validation_zero_one[-1] < best_validation_loss * improvement_threshold ): patience = max(patience, iter * patience_increase) else: print "improvemnt not good enough: %f" % (validation_zero_one[-1]/best_validation_loss) # save best validation score and iteration number best_validation_loss = validation_zero_one[-1] best_iter = iter # test it on the test set test_zero_one_losses = test_model() test_score = numpy.mean(test_zero_one_losses) print '\t\ttest zero one loss of best model %f %%' % (test_score * 100.) #save the best model with open('best_model.pkl', 'w') as f: cPickle.dump(sda, f) if patience <= iter: pass #done_looping = True #break if use_rate_schedule and epoch%100==0: if validation_NLL[epoch-100]-validation_NLL[epoch-1]<1e-4: finetune_lr = max(finetune_lr/2.,1e-6) train_rates = [finetune_lr]*training_epochs logger.log("Reducing learning rate. new rate: %f" % finetune_lr) #save train_stat_dict to a .mat file sio.savemat('train_stats.mat',train_stat_dict) #with open('train_stat_dict.pkl','w') as f: # cPickle.dump(train_stat_dict,f) end_time = timeit.default_timer() logger.log( ( 'Optimization complete with best validation score of %f %%, ' 'on iteration %i, ' 'with test performance %f %%' ) % (best_validation_loss * 100., best_iter, test_score * 100.) ) logger.log ('The training code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.) ) logger.close()
def run_deepNet(dataset='KSC.pkl', split_proportions=[6, 2, 2], hidden_sizes=[20], hidden_nonlinearity=lasagne.nonlinearities.rectify, dropout_probs=[0.5], learning_rate=0.1, momentum=0.9, num_epochs=int(5e4), minibatch_size=64, log_file='log'): #create a log object logger = Logger(log_file) #log run params logger.log("Running run_deepNet Experiment...") logger.add_newline() logger.log("Runtime params:") logger.log("dataset=%s" % dataset) logger.log("split_proportions=%s" % str(split_proportions)) logger.log("hidden_sizes=%s" % str(hidden_sizes)) logger.log("hidden_nonlinearity=%s" % str(hidden_nonlinearity)) logger.log("dropout_probs=%s" % str(dropout_probs)) logger.log("learning_rate=%s" % str(learning_rate)) logger.log("momentum=%s" % str(momentum)) logger.log("num_epochs=%d" % num_epochs) logger.log("minibatch_size=%d" % minibatch_size) #Load the data train_set, val_set, test_set = load_data(dataset, split_proportions, logger, shared=False) x_train, y_train = train_set x_val, y_val = val_set x_test, y_test = test_set #normalize data to zero mean unit variance x_mean = np.mean(x_train) x_std = np.std(x_train) x_train = (x_train - x_mean) / x_std x_val = (x_val - x_mean) / x_std x_test = (x_test - x_mean) / x_std #prepare theano variables for inputs and targets input_var = T.matrix('inputs') target_var = T.ivector('targets') #build the model logger.log('... building the model') input_size = x_train.shape[1] output_size = np.unique(y_train).size #net = build_network(input_var, #input_size, #hidden_sizes, #hidden_nonlinearity, #dropout_probs, #output_size) net = cPickle.load(open('best_model.pkl', 'r')) layers = lasagne.layers.get_all_layers(net) input_var = layers[0].input_var #create loss expression for training logger.log('... building expressions and compiling train functions') predicition = lasagne.layers.get_output(net) loss = lasagne.objectives.categorical_crossentropy(predicition, target_var) loss = loss.mean() #create update expressions for training params = lasagne.layers.get_all_params(net, trainable=True) #linearly decay learning rate learning_rate = np.linspace(learning_rate[0], learning_rate[1], num_epochs) lr = theano.shared(np.array(learning_rate[0], dtype=theano.config.floatX)) #linearly grow momentum momentum = np.linspace(momentum[0], momentum[1], num_epochs) mom = theano.shared(np.array(momentum[0], dtype=theano.config.floatX)) updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=lr, momentum=mom) #create loss expression for validation/testing test_prediction = lasagne.layers.get_output(net, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy( test_prediction, target_var) test_loss = loss.mean() #create an expression for the classification accuracy test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) #compile the training function train_fn = theano.function([input_var, target_var], loss, updates=updates) #compile a validation function for the validation loss and accuracy val_fn = theano.function([input_var, target_var], [test_loss, test_acc]) #train the model logger.log('... training the model') start_time = timeit.default_timer() best_validation_loss = np.inf training_NLL = [ ] # average training NLL cost at each epoch (really after val_freq iters) validation_NLL = [] # average validation NLL cost at each epoch validation_zero_one = [ ] # average zero one cost at each epoch (% misclassified) train_stat_dict = {} train_stat_dict['training_NLL'] = training_NLL train_stat_dict['validation_NLL'] = validation_NLL train_stat_dict['validation_zero_one'] = validation_zero_one for epoch in xrange(num_epochs): #do a pass over the training data lr.set_value(learning_rate[epoch]) mom.set_value(momentum[epoch]) train_err = 0 train_batches = 0 for batch in iterate_minibatches(x_train, y_train, minibatch_size, shuffle=True): inputs, targets = batch train_err += train_fn(inputs, targets) train_batches += 1 #do a pass over the validation data val_err = 0 val_acc = 0 val_batches = 0 for batch in iterate_minibatches(x_val, y_val, minibatch_size, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) val_err += err val_acc += acc val_batches += 1 #record results training_NLL.append(train_err / train_batches) validation_NLL.append(val_err / val_batches) validation_zero_one.append(1 - (val_acc / val_batches)) logger.log('epoch %i:' % (epoch)) logger.log('\ttraining NLL loss: %f ' % training_NLL[-1]) logger.log('\tvalidation NLL loss: %f ' % validation_NLL[-1]) logger.log('\tvalidation zero one loss: %f %%' % (validation_zero_one[-1] * 100.)) # if we got the best validation score until now if validation_zero_one[-1] < best_validation_loss: # save best validation score and iteration number best_validation_loss = validation_zero_one[-1] best_epoch = epoch #save the best model with open('best_model.pkl', 'w') as f: cPickle.dump(net, f) # update the best model in a sliding window looking back 50 epochs #window_start = max(len(validation_zero_one)-50,0) #window_end = len(validation_zero_one) #if validation_zero_one[-1] == min(validation_zero_one[window_start:window_end]): ## save best validation score and iteration number #best_window_validation_loss = validation_zero_one[-1] #best_window_epoch = epoch ##save the best model #with open('best_window_model.pkl', 'w') as f: #cPickle.dump(net,f) if (epoch - best_epoch) > 1e4: logger.log("Early stopping...") break ######post training####### #save train_stat_dict to a .mat file sio.savemat('train_stats.mat', train_stat_dict) #with open('train_stat_dict.pkl','w') as f: # cPickle.dump(train_stat_dict,f) # After training, we compute and print the test error: #load best model logger.log("loading model from best_model.pkl") net = cPickle.load(open('best_model.pkl', 'r')) #logger.log("loading model from best_window_model.pkl") #window_net = cPickle.load(open('best_window_model.pkl','r')) test_err, test_acc = predict(net, x_test, y_test) test_score = 1 - test_acc #test_err, test_acc = predict(window_net,x_test,y_test) #window_test_score = 1 - test_acc end_time = timeit.default_timer() logger.log(('Optimization complete with best validation score of %f %%, ' 'on epoch %i, ' 'with test performance %f %%') % (best_validation_loss * 100., best_epoch, test_score * 100.)) logger.log('The training code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) logger.close()
def run_deepNet(dataset = 'KSC.pkl', split_proportions = [6,2,2], hidden_sizes = [20], hidden_nonlinearity = lasagne.nonlinearities.rectify, dropout_probs = [0.5], learning_rate = 0.1, momentum = 0.9, num_epochs = int(5e4), minibatch_size = 64, log_file = 'log'): #create a log object logger = Logger(log_file) #log run params logger.log("Running run_deepNet Experiment...") logger.add_newline() logger.log("Runtime params:") logger.log("dataset=%s" % dataset) logger.log("split_proportions=%s" % str(split_proportions)) logger.log("hidden_sizes=%s" % str(hidden_sizes)) logger.log("hidden_nonlinearity=%s" % str(hidden_nonlinearity)) logger.log("dropout_probs=%s" % str(dropout_probs)) logger.log("learning_rate=%s" % str(learning_rate)) logger.log("momentum=%s" % str(momentum)) logger.log("num_epochs=%d" % num_epochs) logger.log("minibatch_size=%d" % minibatch_size) #Load the data train_set,val_set,test_set = load_data(dataset,split_proportions, logger,shared=False) x_train, y_train = train_set x_val, y_val = val_set x_test, y_test = test_set #normalize data to zero mean unit variance x_mean = np.mean(x_train) x_std = np.std(x_train) x_train = (x_train-x_mean)/x_std x_val = (x_val-x_mean)/x_std x_test = (x_test-x_mean)/x_std #prepare theano variables for inputs and targets input_var = T.matrix('inputs') target_var = T.ivector('targets') #build the model logger.log( '... building the model') input_size = x_train.shape[1] output_size=np.unique(y_train).size #net = build_network(input_var, #input_size, #hidden_sizes, #hidden_nonlinearity, #dropout_probs, #output_size) net = cPickle.load(open('best_model.pkl','r')) layers = lasagne.layers.get_all_layers(net) input_var = layers[0].input_var #create loss expression for training logger.log( '... building expressions and compiling train functions') predicition = lasagne.layers.get_output(net) loss = lasagne.objectives.categorical_crossentropy(predicition,target_var) loss = loss.mean() #create update expressions for training params = lasagne.layers.get_all_params(net,trainable=True) #linearly decay learning rate learning_rate = np.linspace(learning_rate[0],learning_rate[1],num_epochs) lr = theano.shared(np.array(learning_rate[0],dtype=theano.config.floatX)) #linearly grow momentum momentum = np.linspace(momentum[0],momentum[1],num_epochs) mom = theano.shared(np.array(momentum[0],dtype=theano.config.floatX)) updates = lasagne.updates.nesterov_momentum(loss,params, learning_rate=lr, momentum=mom) #create loss expression for validation/testing test_prediction = lasagne.layers.get_output(net,deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var) test_loss = loss.mean() #create an expression for the classification accuracy test_acc = T.mean(T.eq(T.argmax(test_prediction,axis=1), target_var), dtype=theano.config.floatX) #compile the training function train_fn = theano.function([input_var,target_var], loss, updates=updates) #compile a validation function for the validation loss and accuracy val_fn = theano.function([input_var,target_var], [test_loss,test_acc]) #train the model logger.log( '... training the model') start_time = timeit.default_timer() best_validation_loss = np.inf training_NLL = [] # average training NLL cost at each epoch (really after val_freq iters) validation_NLL = [] # average validation NLL cost at each epoch validation_zero_one = [] # average zero one cost at each epoch (% misclassified) train_stat_dict = {} train_stat_dict['training_NLL'] = training_NLL train_stat_dict['validation_NLL'] = validation_NLL train_stat_dict['validation_zero_one'] = validation_zero_one for epoch in xrange(num_epochs): #do a pass over the training data lr.set_value(learning_rate[epoch]) mom.set_value(momentum[epoch]) train_err=0 train_batches=0 for batch in iterate_minibatches(x_train,y_train, minibatch_size,shuffle=True): inputs, targets = batch train_err += train_fn(inputs, targets) train_batches += 1 #do a pass over the validation data val_err = 0 val_acc = 0 val_batches = 0 for batch in iterate_minibatches(x_val, y_val, minibatch_size,shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) val_err += err val_acc += acc val_batches += 1 #record results training_NLL.append(train_err / train_batches) validation_NLL.append(val_err / val_batches) validation_zero_one.append(1-(val_acc/val_batches)) logger.log( 'epoch %i:' % (epoch)) logger.log('\ttraining NLL loss: %f ' % training_NLL[-1]) logger.log('\tvalidation NLL loss: %f ' % validation_NLL[-1]) logger.log('\tvalidation zero one loss: %f %%' % (validation_zero_one[-1] * 100.)) # if we got the best validation score until now if validation_zero_one[-1] < best_validation_loss: # save best validation score and iteration number best_validation_loss = validation_zero_one[-1] best_epoch = epoch #save the best model with open('best_model.pkl', 'w') as f: cPickle.dump(net,f) # update the best model in a sliding window looking back 50 epochs #window_start = max(len(validation_zero_one)-50,0) #window_end = len(validation_zero_one) #if validation_zero_one[-1] == min(validation_zero_one[window_start:window_end]): ## save best validation score and iteration number #best_window_validation_loss = validation_zero_one[-1] #best_window_epoch = epoch ##save the best model #with open('best_window_model.pkl', 'w') as f: #cPickle.dump(net,f) if (epoch-best_epoch)>1e4: logger.log("Early stopping...") break ######post training####### #save train_stat_dict to a .mat file sio.savemat('train_stats.mat',train_stat_dict) #with open('train_stat_dict.pkl','w') as f: # cPickle.dump(train_stat_dict,f) # After training, we compute and print the test error: #load best model logger.log("loading model from best_model.pkl") net = cPickle.load(open('best_model.pkl','r')) #logger.log("loading model from best_window_model.pkl") #window_net = cPickle.load(open('best_window_model.pkl','r')) test_err, test_acc = predict(net,x_test,y_test) test_score = 1 - test_acc #test_err, test_acc = predict(window_net,x_test,y_test) #window_test_score = 1 - test_acc end_time = timeit.default_timer() logger.log( ( 'Optimization complete with best validation score of %f %%, ' 'on epoch %i, ' 'with test performance %f %%' ) % (best_validation_loss * 100., best_epoch, test_score * 100.) ) logger.log ('The training code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.) ) logger.close()