def init_classifier(self): print "Constructing classifier" # we don't want to save arrays in DD objects, so # we recreate those arrays here nhl = self.hp.num_hidden_layers layers_sizes = [self.hp.hidden_layers_sizes] * nhl corruption_levels = [self.hp.corruption_levels] * nhl # construct the stacked denoising autoencoder class self.classifier = SdA( \ train_set_x= self.train_set_x, \ train_set_y = self.train_set_y,\ batch_size = self.hp.minibatch_size, \ n_ins= self.n_ins, \ hidden_layers_sizes = layers_sizes, \ n_outs = self.n_outs, \ corruption_levels = corruption_levels,\ rng = self.rng,\ pretrain_lr = self.hp.pretraining_lr, \ finetune_lr = self.hp.finetuning_lr,\ input_divider = self.input_divider ) #theano.printing.pydotprint(self.classifier.pretrain_functions[0], "function.graph") sys.stdout.flush()
class SdaSgdOptimizer: def __init__(self, dataset, hyperparameters, n_ins, n_outs, input_divider=1.0, series_mux=None): self.dataset = dataset self.hp = hyperparameters self.n_ins = n_ins self.n_outs = n_outs self.input_divider = input_divider if not series_mux: series_mux = DummyMux() print "No series multiplexer set" self.series_mux = series_mux self.rng = numpy.random.RandomState(1234) self.init_datasets() self.init_classifier() sys.stdout.flush() def init_datasets(self): print "init_datasets" sys.stdout.flush() train_set, valid_set, test_set = self.dataset self.test_set_x, self.test_set_y = shared_dataset(test_set) self.valid_set_x, self.valid_set_y = shared_dataset(valid_set) self.train_set_x, self.train_set_y = shared_dataset(train_set) # compute number of minibatches for training, validation and testing self.n_train_batches = self.train_set_x.value.shape[0] / self.hp.minibatch_size self.n_valid_batches = self.valid_set_x.value.shape[0] / self.hp.minibatch_size # remove last batch in case it's incomplete self.n_test_batches = (self.test_set_x.value.shape[0] / self.hp.minibatch_size) - 1 def init_classifier(self): print "Constructing classifier" # we don't want to save arrays in DD objects, so # we recreate those arrays here nhl = self.hp.num_hidden_layers layers_sizes = [self.hp.hidden_layers_sizes] * nhl corruption_levels = [self.hp.corruption_levels] * nhl # construct the stacked denoising autoencoder class self.classifier = SdA( \ train_set_x= self.train_set_x, \ train_set_y = self.train_set_y,\ batch_size = self.hp.minibatch_size, \ n_ins= self.n_ins, \ hidden_layers_sizes = layers_sizes, \ n_outs = self.n_outs, \ corruption_levels = corruption_levels,\ rng = self.rng,\ pretrain_lr = self.hp.pretraining_lr, \ finetune_lr = self.hp.finetuning_lr,\ input_divider = self.input_divider ) #theano.printing.pydotprint(self.classifier.pretrain_functions[0], "function.graph") sys.stdout.flush() def train(self): self.pretrain() self.finetune() def pretrain(self): print "STARTING PRETRAINING, time = ", datetime.datetime.now() sys.stdout.flush() #time_acc_func = 0.0 #time_acc_total = 0.0 start_time = time.clock() ## Pre-train layer-wise for i in xrange(self.classifier.n_layers): # go through pretraining epochs for epoch in xrange(self.hp.pretraining_epochs_per_layer): # go through the training set for batch_index in xrange(self.n_train_batches): #t1 = time.clock() c = self.classifier.pretrain_functions[i](batch_index) #t2 = time.clock() #time_acc_func += t2 - t1 #if batch_index % 500 == 0: # print "acc / total", time_acc_func / (t2 - start_time), time_acc_func self.series_mux.append("reconstruction_error", c) print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c sys.stdout.flush() self.series_mux.append("params", self.classifier.all_params) end_time = time.clock() print ('Pretraining took %f minutes' %((end_time-start_time)/60.)) self.hp.update({'pretraining_time': end_time-start_time}) sys.stdout.flush() def finetune(self): print "STARTING FINETUNING, time = ", datetime.datetime.now() index = T.lscalar() # index to a [mini]batch minibatch_size = self.hp.minibatch_size # create a function to compute the mistakes that are made by the model # on the validation set, or testing set shared_divider = theano.shared(numpy.asarray(self.input_divider, dtype=theano.config.floatX)) test_model = theano.function([index], self.classifier.errors, givens = { self.classifier.x: self.test_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider, self.classifier.y: self.test_set_y[index*minibatch_size:(index+1)*minibatch_size]}) validate_model = theano.function([index], self.classifier.errors, givens = { self.classifier.x: self.valid_set_x[index*minibatch_size:(index+1)*minibatch_size] / shared_divider, self.classifier.y: self.valid_set_y[index*minibatch_size:(index+1)*minibatch_size]}) # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(self.n_train_batches, patience/2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = float('inf') test_score = 0. start_time = time.clock() done_looping = False epoch = 0 while (epoch < self.hp.max_finetuning_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(self.n_train_batches): cost_ij = self.classifier.finetune(minibatch_index) iter = epoch * self.n_train_batches + minibatch_index self.series_mux.append("training_error", cost_ij) if (iter+1) % validation_frequency == 0: validation_losses = [validate_model(i) for i in xrange(self.n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) self.series_mux.append("validation_error", this_validation_loss) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index+1, self.n_train_batches, \ this_validation_loss*100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold : patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [test_model(i) for i in xrange(self.n_test_batches)] test_score = numpy.mean(test_losses) self.series_mux.append("test_error", test_score) print((' epoch %i, minibatch %i/%i, test error of best ' 'model %f %%') % (epoch, minibatch_index+1, self.n_train_batches, test_score*100.)) sys.stdout.flush() self.series_mux.append("params", self.classifier.all_params) if patience <= iter : done_looping = True break end_time = time.clock() self.hp.update({'finetuning_time':end_time-start_time,\ 'best_validation_error':best_validation_loss,\ 'test_score':test_score, 'num_finetuning_epochs':epoch}) print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score*100.)) print ('The finetuning ran for %f minutes' % ((end_time-start_time)/60.))
class SdaSgdOptimizer: def __init__(self, dataset_name, dataset, hyperparameters, n_ins, n_outs, examples_per_epoch, series=default_series, max_minibatches=None): self.dataset_name = dataset_name self.dataset = dataset self.hp = hyperparameters self.n_ins = n_ins self.n_outs = n_outs self.parameters_pre=[] if (self.dataset_name == "upper"): self.class_offset = 10 elif (self.dataset_name == "lower"): self.class_offset = 36 else: self.class_offset = 0 self.max_minibatches = max_minibatches print "SdaSgdOptimizer, max_minibatches =", max_minibatches self.ex_per_epoch = examples_per_epoch self.mb_per_epoch = examples_per_epoch / self.hp.minibatch_size self.series = series self.rng = numpy.random.RandomState(1234) self.init_classifier() sys.stdout.flush() def init_classifier(self): print "Constructing classifier" # we don't want to save arrays in DD objects, so # we recreate those arrays here nhl = self.hp.num_hidden_layers layers_sizes = [self.hp.hidden_layers_sizes] * nhl corruption_levels = [self.hp.corruption_levels] * nhl # construct the stacked denoising autoencoder class self.classifier = SdA( \ batch_size = self.hp.minibatch_size, \ n_ins= self.n_ins, \ hidden_layers_sizes = layers_sizes, \ n_outs = self.n_outs, \ corruption_levels = corruption_levels,\ rng = self.rng,\ pretrain_lr = self.hp.pretraining_lr, \ finetune_lr = self.hp.finetuning_lr) #theano.printing.pydotprint(self.classifier.pretrain_functions[0], "function.graph") sys.stdout.flush() def train(self): self.pretrain(self.dataset) self.finetune(self.dataset) def pretrain(self,dataset,decrease=0): print "STARTING PRETRAINING, time = ", datetime.datetime.now() sys.stdout.flush() un_fichier=int(819200.0/self.hp.minibatch_size) #Number of batches in a P07 file start_time = time.clock() ######## This is hardcoaded. THe 0.95 parameter is hardcoaded and can be changed at will ### #Set the decreasing rate of the learning rate. We want the final learning rate to #be 5% of the original learning rate. The decreasing factor is linear decreasing = (decrease*self.hp.pretraining_lr)/float(self.hp.pretraining_epochs_per_layer*800000/self.hp.minibatch_size) ## Pre-train layer-wise for i in xrange(self.classifier.n_layers): # go through pretraining epochs #To reset the learning rate to his original value learning_rate=self.hp.pretraining_lr for epoch in xrange(self.hp.pretraining_epochs_per_layer): # go through the training set batch_index=0 count=0 num_files=0 for x,y in dataset.train(self.hp.minibatch_size): y = y - self.class_offset c = self.classifier.pretrain_functions[i](x,learning_rate) count +=1 self.series["reconstruction_error"].append((epoch, batch_index), c) batch_index+=1 #If we need to decrease the learning rate for the pretrain if decrease != 0: learning_rate -= decreasing # useful when doing tests if self.max_minibatches and batch_index >= self.max_minibatches: break #When we pass through the data only once (the case with P07) #There is approximately 800*1024=819200 examples per file (1k per example and files are 800M) if self.hp.pretraining_epochs_per_layer == 1 and count%un_fichier == 0: print 'Pre-training layer %i, epoch %d, cost '%(i,num_files),c num_files+=1 sys.stdout.flush() self.series['params'].append((num_files,), self.classifier.all_params) #When NIST is used if self.hp.pretraining_epochs_per_layer > 1: print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c sys.stdout.flush() self.series['params'].append((epoch,), self.classifier.all_params) end_time = time.clock() print ('Pretraining took %f minutes' %((end_time-start_time)/60.)) self.hp.update({'pretraining_time': end_time-start_time}) sys.stdout.flush() #To be able to load them later for tests on finetune self.parameters_pre=[copy(x.value) for x in self.classifier.params] f = open('params_pretrain.txt', 'w') cPickle.dump(self.parameters_pre,f,protocol=-1) f.close() def finetune(self,dataset,dataset_test,num_finetune,ind_test,special=0,decrease=0): if special != 0 and special != 1: sys.exit('Bad value for variable special. Must be in {0,1}') print "STARTING FINETUNING, time = ", datetime.datetime.now() minibatch_size = self.hp.minibatch_size if ind_test == 0 or ind_test == 20: nom_test = "NIST" nom_train="P07" else: nom_test = "P07" nom_train = "NIST" # create a function to compute the mistakes that are made by the model # on the validation set, or testing set test_model = \ theano.function( [self.classifier.x,self.classifier.y], self.classifier.errors) # givens = { # self.classifier.x: ensemble_x, # self.classifier.y: ensemble_y]}) validate_model = \ theano.function( [self.classifier.x,self.classifier.y], self.classifier.errors) # givens = { # self.classifier.x: , # self.classifier.y: ]}) # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(self.mb_per_epoch, patience/2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch if self.max_minibatches and validation_frequency > self.max_minibatches: validation_frequency = self.max_minibatches / 2 best_params = None best_validation_loss = float('inf') test_score = 0. start_time = time.clock() done_looping = False epoch = 0 total_mb_index = 0 minibatch_index = 0 parameters_finetune=[] if ind_test == 21: learning_rate = self.hp.finetuning_lr / 10.0 else: learning_rate = self.hp.finetuning_lr #The initial finetune lr while (epoch < num_finetune) and (not done_looping): epoch = epoch + 1 for x,y in dataset.train(minibatch_size,bufsize=buffersize): minibatch_index += 1 y = y - self.class_offset if special == 0: cost_ij = self.classifier.finetune(x,y,learning_rate) elif special == 1: cost_ij = self.classifier.finetune2(x,y) total_mb_index += 1 self.series["training_error"].append((epoch, minibatch_index), cost_ij) if (total_mb_index+1) % validation_frequency == 0: #minibatch_index += 1 #The validation set is always NIST (we want the model to be good on NIST) if ind_test == 0 | ind_test == 20: iter=dataset_test.valid(minibatch_size,bufsize=buffersize) else: iter = dataset.valid(minibatch_size,bufsize=buffersize) if self.max_minibatches: iter = itermax(iter, self.max_minibatches) validation_losses = [validate_model(x,y - self.class_offset) for x,y in iter] this_validation_loss = numpy.mean(validation_losses) self.series["validation_error"].\ append((epoch, minibatch_index), this_validation_loss*100.) print('epoch %i, minibatch %i, validation error on NIST : %f %%' % \ (epoch, minibatch_index+1, \ this_validation_loss*100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold : patience = max(patience, total_mb_index * patience_increase) # save best validation score, iteration number and parameters best_validation_loss = this_validation_loss best_iter = total_mb_index parameters_finetune=[copy(x.value) for x in self.classifier.params] # test it on the test set iter = dataset.test(minibatch_size,bufsize=buffersize) if self.max_minibatches: iter = itermax(iter, self.max_minibatches) test_losses = [test_model(x,y - self.class_offset) for x,y in iter] test_score = numpy.mean(test_losses) #test it on the second test set iter2 = dataset_test.test(minibatch_size,bufsize=buffersize) if self.max_minibatches: iter2 = itermax(iter2, self.max_minibatches) test_losses2 = [test_model(x,y - self.class_offset) for x,y in iter2] test_score2 = numpy.mean(test_losses2) self.series["test_error"].\ append((epoch, minibatch_index), test_score*100.) print((' epoch %i, minibatch %i, test error on dataset %s (train data) of best ' 'model %f %%') % (epoch, minibatch_index+1,nom_train, test_score*100.)) print((' epoch %i, minibatch %i, test error on dataset %s of best ' 'model %f %%') % (epoch, minibatch_index+1,nom_test, test_score2*100.)) if patience <= total_mb_index: done_looping = True break #to exit the FOR loop sys.stdout.flush() # useful when doing tests if self.max_minibatches and minibatch_index >= self.max_minibatches: break if decrease == 1: if (ind_test == 21 & epoch % 100 == 0) | ind_test == 20 | (ind_test == 1 & epoch % 100 == 0) : learning_rate /= 2 #divide the learning rate by 2 for each new epoch of P07 (or 100 of NIST) self.series['params'].append((epoch,), self.classifier.all_params) if done_looping == True: #To exit completly the fine-tuning break #to exit the WHILE loop end_time = time.clock() self.hp.update({'finetuning_time':end_time-start_time,\ 'best_validation_error':best_validation_loss,\ 'test_score':test_score, 'num_finetuning_epochs':epoch}) print(('\nOptimization complete with best validation score of %f %%,' 'with test performance %f %% on dataset %s ') % (best_validation_loss * 100., test_score*100.,nom_train)) print(('The test score on the %s dataset is %f')%(nom_test,test_score2*100.)) print ('The finetuning ran for %f minutes' % ((end_time-start_time)/60.)) sys.stdout.flush() #Save a copy of the parameters in a file to be able to get them in the future if special == 1: #To keep a track of the value of the parameters f = open('params_finetune_stanford.txt', 'w') cPickle.dump(parameters_finetune,f,protocol=-1) f.close() elif ind_test == 0 | ind_test == 20: #To keep a track of the value of the parameters f = open('params_finetune_P07.txt', 'w') cPickle.dump(parameters_finetune,f,protocol=-1) f.close() elif ind_test== 1: #For the run with 2 finetunes. It will be faster. f = open('params_finetune_NIST.txt', 'w') cPickle.dump(parameters_finetune,f,protocol=-1) f.close() elif ind_test== 21: #To keep a track of the value of the parameters f = open('params_finetune_P07_then_NIST.txt', 'w') cPickle.dump(parameters_finetune,f,protocol=-1) f.close() #Set parameters like they where right after pre-train or finetune def reload_parameters(self,which): #self.parameters_pre=pickle.load('params_pretrain.txt') f = open(which) self.parameters_pre=cPickle.load(f) f.close() for idx,x in enumerate(self.parameters_pre): if x.dtype=='float64': self.classifier.params[idx].value=theano._asarray(copy(x),dtype=theano.config.floatX) else: self.classifier.params[idx].value=copy(x) def training_error(self,dataset): # create a function to compute the mistakes that are made by the model # on the validation set, or testing set test_model = \ theano.function( [self.classifier.x,self.classifier.y], self.classifier.errors) iter2 = dataset.train(self.hp.minibatch_size,bufsize=buffersize) train_losses2 = [test_model(x,y - self.class_offset) for x,y in iter2] train_score2 = numpy.mean(train_losses2) print "Training error is: " + str(train_score2)
class SdaSgdOptimizer: def __init__( self, dataset, hyperparameters, n_ins, n_outs, examples_per_epoch, series=default_series, max_minibatches=None ): self.dataset = dataset self.hp = hyperparameters self.n_ins = n_ins self.n_outs = n_outs self.parameters_pre = [] self.max_minibatches = max_minibatches print "SdaSgdOptimizer, max_minibatches =", max_minibatches self.ex_per_epoch = examples_per_epoch self.mb_per_epoch = examples_per_epoch / self.hp.minibatch_size self.series = series self.rng = numpy.random.RandomState(1234) self.init_classifier() sys.stdout.flush() def init_classifier(self): print "Constructing classifier" # we don't want to save arrays in DD objects, so # we recreate those arrays here nhl = self.hp.num_hidden_layers layers_sizes = [self.hp.hidden_layers_sizes] * nhl corruption_levels = [self.hp.corruption_levels] * nhl # construct the stacked denoising autoencoder class self.classifier = SdA( batch_size=self.hp.minibatch_size, n_ins=self.n_ins, hidden_layers_sizes=layers_sizes, n_outs=self.n_outs, corruption_levels=corruption_levels, rng=self.rng, pretrain_lr=self.hp.pretraining_lr, finetune_lr=self.hp.finetuning_lr, ) # theano.printing.pydotprint(self.classifier.pretrain_functions[0], "function.graph") sys.stdout.flush() def train(self): self.pretrain(self.dataset) self.finetune(self.dataset) def pretrain(self, dataset, decrease=0): print "STARTING PRETRAINING, time = ", datetime.datetime.now() sys.stdout.flush() un_fichier = int(819200.0 / self.hp.minibatch_size) # Number of batches in a P07 file start_time = time.clock() ######## This is hardcoaded. THe 0.95 parameter is hardcoaded and can be changed at will ### # Set the decreasing rate of the learning rate. We want the final learning rate to # be 5% of the original learning rate. The decreasing factor is linear decreasing = (decrease * self.hp.pretraining_lr) / float( self.hp.pretraining_epochs_per_layer * 800000 / self.hp.minibatch_size ) ## Pre-train layer-wise for i in xrange(self.classifier.n_layers): # go through pretraining epochs # To reset the learning rate to his original value learning_rate = self.hp.pretraining_lr for epoch in xrange(self.hp.pretraining_epochs_per_layer): # go through the training set batch_index = 0 count = 0 num_files = 0 for x, y in dataset.train(self.hp.minibatch_size): c = self.classifier.pretrain_functions[i](x, learning_rate) count += 1 self.series["reconstruction_error"].append((epoch, batch_index), c) batch_index += 1 # If we need to decrease the learning rate for the pretrain if decrease != 0: learning_rate -= decreasing # useful when doing tests if self.max_minibatches and batch_index >= self.max_minibatches: break # When we pass through the data only once (the case with P07) # There is approximately 800*1024=819200 examples per file (1k per example and files are 800M) if self.hp.pretraining_epochs_per_layer == 1 and count % un_fichier == 0: print "Pre-training layer %i, epoch %d, cost " % (i, num_files), c num_files += 1 sys.stdout.flush() self.series["params"].append((num_files,), self.classifier.all_params) # When NIST is used if self.hp.pretraining_epochs_per_layer > 1: print "Pre-training layer %i, epoch %d, cost " % (i, epoch), c sys.stdout.flush() self.series["params"].append((epoch,), self.classifier.all_params) end_time = time.clock() print ("Pretraining took %f minutes" % ((end_time - start_time) / 60.0)) self.hp.update({"pretraining_time": end_time - start_time}) sys.stdout.flush() # To be able to load them later for tests on finetune self.parameters_pre = [copy(x.value) for x in self.classifier.params] f = open("params_pretrain.txt", "w") cPickle.dump(self.parameters_pre, f, protocol=-1) f.close() def finetune(self, dataset, dataset_test, num_finetune, ind_test, special=0, decrease=0, dataset_test2=None): if special != 0 and special != 1: sys.exit("Bad value for variable special. Must be in {0,1}") print "STARTING FINETUNING, time = ", datetime.datetime.now() minibatch_size = self.hp.minibatch_size if ind_test == 0 or ind_test == 20: nom_test = "NIST" nom_train = "P07" elif ind_test == 30: nom_train = "PNIST07" nom_test = "NIST" nom_test2 = "P07" elif ind_test == 31: nom_train = "NIST" nom_test = "PNIST07" nom_test2 = "P07" else: nom_test = "P07" nom_train = "NIST" # create a function to compute the mistakes that are made by the model # on the validation set, or testing set test_model = theano.function([self.classifier.x, self.classifier.y], self.classifier.errors) # givens = { # self.classifier.x: ensemble_x, # self.classifier.y: ensemble_y]}) validate_model = theano.function([self.classifier.x, self.classifier.y], self.classifier.errors) # givens = { # self.classifier.x: , # self.classifier.y: ]}) # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2.0 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(self.mb_per_epoch, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch if self.max_minibatches and validation_frequency > self.max_minibatches: validation_frequency = self.max_minibatches / 2 best_params = None best_validation_loss = float("inf") test_score = 0.0 start_time = time.clock() done_looping = False epoch = 0 total_mb_index = 0 minibatch_index = 0 parameters_finetune = [] if ind_test == 21 | ind_test == 31: learning_rate = self.hp.finetuning_lr / 10.0 else: learning_rate = self.hp.finetuning_lr # The initial finetune lr while (epoch < num_finetune) and (not done_looping): epoch = epoch + 1 for x, y in dataset.train(minibatch_size, bufsize=buffersize): minibatch_index += 1 if special == 0: cost_ij = self.classifier.finetune(x, y, learning_rate) elif special == 1: cost_ij = self.classifier.finetune2(x, y) total_mb_index += 1 self.series["training_error"].append((epoch, minibatch_index), cost_ij) if (total_mb_index + 1) % validation_frequency == 0: # minibatch_index += 1 # The validation set is always NIST (we want the model to be good on NIST) if ind_test == 0 | ind_test == 20 | ind_test == 30: iter = dataset_test.valid(minibatch_size, bufsize=buffersize) else: iter = dataset.valid(minibatch_size, bufsize=buffersize) if self.max_minibatches: iter = itermax(iter, self.max_minibatches) validation_losses = [validate_model(x, y) for x, y in iter] this_validation_loss = numpy.mean(validation_losses) self.series["validation_error"].append((epoch, minibatch_index), this_validation_loss * 100.0) print ( "epoch %i, minibatch %i, validation error on NIST : %f %%" % (epoch, minibatch_index + 1, this_validation_loss * 100.0) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, total_mb_index * patience_increase) # save best validation score, iteration number and parameters best_validation_loss = this_validation_loss best_iter = total_mb_index parameters_finetune = [copy(x.value) for x in self.classifier.params] # test it on the test set iter = dataset.test(minibatch_size, bufsize=buffersize) if self.max_minibatches: iter = itermax(iter, self.max_minibatches) test_losses = [test_model(x, y) for x, y in iter] test_score = numpy.mean(test_losses) # test it on the second test set iter2 = dataset_test.test(minibatch_size, bufsize=buffersize) if self.max_minibatches: iter2 = itermax(iter2, self.max_minibatches) test_losses2 = [test_model(x, y) for x, y in iter2] test_score2 = numpy.mean(test_losses2) # test it on the third test set if there is one iter3 = dataset_test2.test(minibatch_size, bufsize=buffersize) if self.max_minibatches: iter3 = itermax(iter3, self.max_minibatches) test_losses3 = [test_model(x, y) for x, y in iter3] test_score3 = numpy.mean(test_losses3) self.series["test_error"].append((epoch, minibatch_index), test_score * 100.0) print ( ( " epoch %i, minibatch %i, test error on dataset %s (train data) of best " "model %f %%" ) % (epoch, minibatch_index + 1, nom_train, test_score * 100.0) ) print ( (" epoch %i, minibatch %i, test error on dataset %s of best " "model %f %%") % (epoch, minibatch_index + 1, nom_test, test_score2 * 100.0) ) print ( (" epoch %i, minibatch %i, test error on dataset %s of best " "model %f %%") % (epoch, minibatch_index + 1, nom_test2, test_score3 * 100.0) ) if patience <= total_mb_index: done_looping = True break # to exit the FOR loop sys.stdout.flush() # useful when doing tests if self.max_minibatches and minibatch_index >= self.max_minibatches: break if decrease == 1: if ( (ind_test == 21 & epoch % 100 == 0) | ind_test == 20 | ind_test == 30 | (ind_test == 31 & epoch % 100 == 0) ): learning_rate /= 2 # divide the learning rate by 2 for each new epoch of P07 (or 100 of NIST) self.series["params"].append((epoch,), self.classifier.all_params) if done_looping == True: # To exit completly the fine-tuning break # to exit the WHILE loop end_time = time.clock() self.hp.update( { "finetuning_time": end_time - start_time, "best_validation_error": best_validation_loss, "test_score": test_score, "num_finetuning_epochs": epoch, } ) print ( ( "\nOptimization complete with best validation score of %f %%," "with test performance %f %% on dataset %s " ) % (best_validation_loss * 100.0, test_score * 100.0, nom_train) ) print (("The test score on the %s dataset is %f") % (nom_test, test_score2 * 100.0)) print (("The test score on the %s dataset is %f") % (nom_test2, test_score3 * 100.0)) print ("The finetuning ran for %f minutes" % ((end_time - start_time) / 60.0)) sys.stdout.flush() # Save a copy of the parameters in a file to be able to get them in the future if special == 1: # To keep a track of the value of the parameters f = open("params_finetune_stanford.txt", "w") cPickle.dump(parameters_finetune, f, protocol=-1) f.close() elif ind_test == 0 | ind_test == 20: # To keep a track of the value of the parameters f = open("params_finetune_P07.txt", "w") cPickle.dump(parameters_finetune, f, protocol=-1) f.close() elif ind_test == 1: # For the run with 2 finetunes. It will be faster. f = open("params_finetune_NIST.txt", "w") cPickle.dump(parameters_finetune, f, protocol=-1) f.close() elif ind_test == 21: # To keep a track of the value of the parameters f = open("params_finetune_P07_then_NIST.txt", "w") cPickle.dump(parameters_finetune, f, protocol=-1) f.close() elif ind_test == 30: f = open("params_finetune_PNIST07.txt", "w") cPickle.dump(parameters_finetune, f, protocol=-1) f.close() elif ind_test == 31: f = open("params_finetune_PNIST07_then_NIST.txt", "w") cPickle.dump(parameters_finetune, f, protocol=-1) f.close() # Set parameters like they where right after pre-train or finetune def reload_parameters(self, which): # self.parameters_pre=pickle.load('params_pretrain.txt') f = open(which) self.parameters_pre = cPickle.load(f) f.close() for idx, x in enumerate(self.parameters_pre): if x.dtype == "float64": self.classifier.params[idx].value = theano._asarray(copy(x), dtype=theano.config.floatX) else: self.classifier.params[idx].value = copy(x) def training_error(self, dataset, part=0): import math # create a function to compute the mistakes that are made by the model # on the validation set, or testing set test_model = theano.function([self.classifier.x, self.classifier.y], self.classifier.errors) # train if part == 0: iter2 = dataset.train(self.hp.minibatch_size, bufsize=buffersize) name = "train" # validation if part == 1: iter2 = dataset.valid(self.hp.minibatch_size, bufsize=buffersize) name = "validation" if part == 2: iter2 = dataset.test(self.hp.minibatch_size, bufsize=buffersize) name = "test" train_losses2 = [test_model(x, y) for x, y in iter2] train_score2 = numpy.mean(train_losses2) print "On the " + name + "dataset" print (("\t the error is %f") % (train_score2 * 100.0)) # print len(train_losses2) stderr = math.sqrt(train_score2 - train_score2 ** 2) / math.sqrt(len(train_losses2) * self.hp.minibatch_size) print (("\t the stderr is %f") % (stderr * 100.0)) # To see the prediction of the model, the real answer and the image to judge def see_error(self, dataset): import pylab # The function to know the prediction test_model = theano.function([self.classifier.x, self.classifier.y], self.classifier.logLayer.y_pred) user = [] nb_total = 0 # total number of exemples seen nb_error = 0 # total number of errors for x, y in dataset.test(1): nb_total += 1 pred = self.translate(test_model(x, y)) rep = self.translate(y) error = pred != rep print "prediction: " + str(pred) + "\t answer: " + str(rep) + "\t right: " + str(not (error)) pylab.imshow(x.reshape((32, 32))) pylab.draw() if error: nb_error += 1 user.append(int(raw_input("1 = The error is normal, 0 = The error is not normal : "))) print "\t\t character is hard to distinguish: " + str(user[-1]) else: time.sleep(3) print "\n Over the " + str(nb_total) + " exemples, there is " + str( nb_error ) + " errors. \nThe percentage of errors is" + str(float(nb_error) / float(nb_total)) print "The percentage of errors done by the model that an human will also do: " + str(numpy.mean(user)) # To translate the numeric prediction in character if necessary def translate(self, y): if y <= 9: return y[0] elif y == 10: return "A" elif y == 11: return "B" elif y == 12: return "C" elif y == 13: return "D" elif y == 14: return "E" elif y == 15: return "F" elif y == 16: return "G" elif y == 17: return "H" elif y == 18: return "I" elif y == 19: return "J" elif y == 20: return "K" elif y == 21: return "L" elif y == 22: return "M" elif y == 23: return "N" elif y == 24: return "O" elif y == 25: return "P" elif y == 26: return "Q" elif y == 27: return "R" elif y == 28: return "S" elif y == 29: return "T" elif y == 30: return "U" elif y == 31: return "V" elif y == 32: return "W" elif y == 33: return "X" elif y == 34: return "Y" elif y == 35: return "Z" elif y == 36: return "a" elif y == 37: return "b" elif y == 38: return "c" elif y == 39: return "d" elif y == 40: return "e" elif y == 41: return "f" elif y == 42: return "g" elif y == 43: return "h" elif y == 44: return "i" elif y == 45: return "j" elif y == 46: return "k" elif y == 47: return "l" elif y == 48: return "m" elif y == 49: return "n" elif y == 50: return "o" elif y == 51: return "p" elif y == 52: return "q" elif y == 53: return "r" elif y == 54: return "s" elif y == 55: return "t" elif y == 56: return "u" elif y == 57: return "v" elif y == 58: return "w" elif y == 59: return "x" elif y == 60: return "y" elif y == 61: return "z"
class SdaSgdOptimizer: def __init__(self, dataset, hyperparameters, n_ins, n_outs, examples_per_epoch, series=default_series, save_params=False): self.dataset = dataset self.hp = hyperparameters self.n_ins = n_ins self.n_outs = n_outs self.save_params = save_params self.ex_per_epoch = examples_per_epoch self.mb_per_epoch = examples_per_epoch / self.hp.minibatch_size self.series = series self.rng = numpy.random.RandomState(1234) self.init_classifier() sys.stdout.flush() def init_classifier(self): print "Constructing classifier" # we don't want to save arrays in DD objects, so # we recreate those arrays here nhl = self.hp.num_hidden_layers layers_sizes = [self.hp.hidden_layers_sizes] * nhl corruption_levels = [self.hp.corruption_levels] * nhl # construct the stacked denoising autoencoder class self.classifier = SdA( \ batch_size = self.hp.minibatch_size, \ n_ins= self.n_ins, \ hidden_layers_sizes = layers_sizes, \ n_outs = self.n_outs, \ corruption_levels = corruption_levels,\ rng = self.rng,\ pretrain_lr = self.hp.pretraining_lr, \ finetune_lr = self.hp.finetuning_lr) #theano.printing.pydotprint(self.classifier.pretrain_functions[0], "function.graph") sys.stdout.flush() def train(self): self.pretrain(self.dataset) self.finetune(self.dataset) def pretrain(self,dataset): print "STARTING PRETRAINING, time = ", datetime.datetime.now() sys.stdout.flush() start_time = time.clock() ## Pre-train layer-wise for i in xrange(self.classifier.n_layers): # go through pretraining epochs for epoch in xrange(self.hp.pretraining_epochs_per_layer): # go through the training set batch_index=0 for x,y in dataset.train(self.hp.minibatch_size): c = self.classifier.pretrain_functions[i](x) self.series["reconstruction_error"].append((epoch, batch_index), c) batch_index+=1 #if batch_index % 100 == 0: # print "100 batches" print 'Pre-training layer %i, epoch %d, cost '%(i,epoch),c sys.stdout.flush() self.series['params'].append((epoch,), self.classifier.all_params) end_time = time.clock() print ('Pretraining took %f minutes' %((end_time-start_time)/60.)) self.hp.update({'pretraining_time': end_time-start_time}) sys.stdout.flush() def finetune(self,dataset): print "STARTING FINETUNING, time = ", datetime.datetime.now() minibatch_size = self.hp.minibatch_size # create a function to compute the mistakes that are made by the model # on the validation set, or testing set test_model = \ theano.function( [self.classifier.x,self.classifier.y], self.classifier.errors) # givens = { # self.classifier.x: ensemble_x, # self.classifier.y: ensemble_y]}) validate_model = \ theano.function( [self.classifier.x,self.classifier.y], self.classifier.errors) # givens = { # self.classifier.x: , # self.classifier.y: ]}) # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(self.mb_per_epoch, patience/2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = float('inf') test_score = 0. start_time = time.clock() done_looping = False epoch = 0 total_mb_index = 0 while (epoch < self.hp.max_finetuning_epochs) and (not done_looping): epoch = epoch + 1 minibatch_index = -1 for x,y in dataset.train(minibatch_size): minibatch_index += 1 cost_ij = self.classifier.finetune(x,y) total_mb_index += 1 self.series["training_error"].append((epoch, minibatch_index), cost_ij) if (total_mb_index+1) % validation_frequency == 0: iter = dataset.valid(minibatch_size) validation_losses = [validate_model(x,y) for x,y in iter] this_validation_loss = numpy.mean(validation_losses) self.series["validation_error"].\ append((epoch, minibatch_index), this_validation_loss*100.) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index+1, self.mb_per_epoch, \ this_validation_loss*100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold : patience = max(patience, total_mb_index * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = total_mb_index # test it on the test set iter = dataset.test(minibatch_size) test_losses = [test_model(x,y) for x,y in iter] test_score = numpy.mean(test_losses) self.series["test_error"].\ append((epoch, minibatch_index), test_score*100.) print((' epoch %i, minibatch %i/%i, test error of best ' 'model %f %%') % (epoch, minibatch_index+1, self.mb_per_epoch, test_score*100.)) sys.stdout.flush() self.series['params'].append((epoch,), self.classifier.all_params) if patience <= total_mb_index: done_looping = True break end_time = time.clock() self.hp.update({'finetuning_time':end_time-start_time,\ 'best_validation_error':best_validation_loss,\ 'test_score':test_score, 'num_finetuning_epochs':epoch}) if self.save_params: save_params(self.classifier.all_params, "weights.dat") print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score*100.)) print ('The finetuning ran for %f minutes' % ((end_time-start_time)/60.))