def train(self, train_X=None, train_Y=None, valid_X=None, valid_Y=None, test_X=None, test_Y=None, is_artificial=False, artificial_sequence=1, continue_training=False): log.maybeLog(self.logger, "\nTraining---------\n") if train_X is None: log.maybeLog(self.logger, "Training using data given during initialization of RNN-GSN.\n") train_X = self.train_X train_Y = self.train_Y if train_X is None: log.maybeLog(self.logger, "\nPlease provide a training dataset!\n") raise AssertionError("Please provide a training dataset!") else: log.maybeLog(self.logger, "Training using data provided to training function.\n") if valid_X is None: valid_X = self.valid_X valid_Y = self.valid_Y if test_X is None: test_X = self.test_X test_Y = self.test_Y ########################################################## # Train the GSN first to get good weights initialization # ########################################################## if self.train_gsn_first: log.maybeLog(self.logger, "\n\n----------Initially training the GSN---------\n\n") init_gsn = generative_stochastic_network.GSN(train_X=train_X, valid_X=valid_X, test_X=test_X, args=self.gsn_args, logger=self.logger) init_gsn.train() ############################# # Save the model parameters # ############################# def save_params_to_file(name, n, gsn_params): pass print 'saving parameters...' save_path = self.outdir+name+'_params_epoch_'+str(n)+'.pkl' f = open(save_path, 'wb') try: cPickle.dump(gsn_params, f, protocol=cPickle.HIGHEST_PROTOCOL) finally: f.close() def save_params(params): values = [param.get_value(borrow=True) for param in params] return values def restore_params(params, values): for i in range(len(params)): params[i].set_value(values[i]) ######################################### # If we are using Hessian-free training # ######################################### if self.hessian_free: pass # gradient_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=5000) # cg_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=1000) # valid_dataset = hf_sequence_dataset([valid_X.get_value()], batch_size=None, number_batches=1000) # # s = x_samples # costs = [cost, show_cost] # hf_optimizer(params, [Xs], s, costs, u, ua).train(gradient_dataset, cg_dataset, initial_lambda=1.0, preconditioner=True, validation=valid_dataset) ################################ # If we are using SGD training # ################################ else: log.maybeLog(self.logger, "\n-----------TRAINING RNN-GSN------------\n") # TRAINING STOP = False counter = 0 if not continue_training: self.learning_rate.set_value(self.init_learn_rate) # learning rate times = [] best_cost = float('inf') best_params = None patience = 0 log.maybeLog(self.logger, ['train X size:',str(train_X.shape.eval())]) if valid_X is not None: log.maybeLog(self.logger, ['valid X size:',str(valid_X.shape.eval())]) if test_X is not None: log.maybeLog(self.logger, ['test X size:',str(test_X.shape.eval())]) if self.vis_init: self.bias_list[0].set_value(logit(numpy.clip(0.9,0.001,train_X.get_value().mean(axis=0)))) while not STOP: counter += 1 t = time.time() log.maybeAppend(self.logger, [counter,'\t']) if is_artificial: data.sequence_mnist_data(train_X, train_Y, valid_X, valid_Y, test_X, test_Y, artificial_sequence, rng) #train train_costs = data.apply_cost_function_to_dataset(self.f_learn, train_X, self.batch_size) # record it log.maybeAppend(self.logger, ['Train:',trunc(train_costs),'\t']) #valid valid_costs = data.apply_cost_function_to_dataset(self.f_cost, valid_X, self.batch_size) # record it log.maybeAppend(self.logger, ['Valid:',trunc(valid_costs), '\t']) #test test_costs = data.apply_cost_function_to_dataset(self.f_cost, test_X, self.batch_size) # record it log.maybeAppend(self.logger, ['Test:',trunc(test_costs), '\t']) #check for early stopping cost = numpy.sum(valid_costs) if cost < best_cost*self.early_stop_threshold: patience = 0 best_cost = cost # save the parameters that made it the best best_params = save_params(self.params) else: patience += 1 if counter >= self.n_epoch or patience >= self.early_stop_length: STOP = True if best_params is not None: restore_params(self.params, best_params) save_params_to_file('all', counter, self.params) timing = time.time() - t times.append(timing) log.maybeAppend(self.logger, 'time: '+make_time_units_string(timing)+'\t') log.maybeLog(self.logger, 'remaining: '+make_time_units_string((self.n_epoch - counter) * numpy.mean(times))) if (counter % self.save_frequency) == 0 or STOP is True: n_examples = 100 nums = test_X.get_value(borrow=True)[range(n_examples)] noisy_nums = self.f_noise(test_X.get_value(borrow=True)[range(n_examples)]) reconstructions = [] for i in xrange(0, len(noisy_nums)): recon = self.f_recon(noisy_nums[max(0,(i+1)-self.batch_size):i+1]) reconstructions.append(recon) reconstructed = numpy.array(reconstructions) # Concatenate stuff stacked = numpy.vstack([numpy.vstack([nums[i*10 : (i+1)*10], noisy_nums[i*10 : (i+1)*10], reconstructed[i*10 : (i+1)*10]]) for i in range(10)]) number_reconstruction = PIL.Image.fromarray(tile_raster_images(stacked, (self.root_N_input,self.root_N_input), (10,30))) number_reconstruction.save(self.outdir+'rnngsn_number_reconstruction_epoch_'+str(counter)+'.png') #save params save_params_to_file('all', counter, self.params) # ANNEAL! new_lr = self.learning_rate.get_value() * self.annealing self.learning_rate.set_value(new_lr)
def train(self, train_X=None, valid_X=None, test_X=None, continue_training=False): log.maybeLog(self.logger, "\nTraining---------\n") if train_X is None: log.maybeLog(self.logger, "Training using data given during initialization of GSN.\n") train_X = self.train_X if train_X is None: log.maybeLog(self.logger, "\nPlease provide a training dataset!\n") raise AssertionError("Please provide a training dataset!") else: log.maybeLog(self.logger, "Training using data provided to training function.\n") if valid_X is None: valid_X = self.valid_X if test_X is None: test_X = self.test_X train_X = raise_data_to_list(train_X) valid_X = raise_data_to_list(valid_X) test_X = raise_data_to_list(test_X) ############ # TRAINING # ############ log.maybeLog(self.logger, "-----------TRAINING GSN FOR {0!s} EPOCHS-----------".format(self.n_epoch)) STOP = False counter = 0 if not continue_training: self.learning_rate.set_value(self.init_learn_rate) # learning rate times = [] best_cost = float('inf') best_params = None patience = 0 log.maybeLog(self.logger, ['train X size:',str(train_X[0].shape.eval())]) if valid_X is not None: log.maybeLog(self.logger, ['valid X size:',str(valid_X[0].shape.eval())]) if test_X is not None: log.maybeLog(self.logger, ['test X size:',str(test_X[0].shape.eval())]) if self.vis_init: self.bias_list[0].set_value(logit(numpy.clip(0.9,0.001,train_X[0].get_value().mean(axis=0)))) while not STOP: counter += 1 t = time.time() log.maybeAppend(self.logger, [counter,'\t']) #train train_costs = data.apply_cost_function_to_dataset(self.f_learn, train_X, self.batch_size) log.maybeAppend(self.logger, ['Train:',trunc(numpy.mean(train_costs)), '\t']) #valid if valid_X is not None: valid_costs = data.apply_cost_function_to_dataset(self.f_cost, valid_X, self.batch_size) log.maybeAppend(self.logger, ['Valid:',trunc(numpy.mean(valid_costs)), '\t']) #test if test_X is not None: test_costs = data.apply_cost_function_to_dataset(self.f_cost, test_X, self.batch_size) log.maybeAppend(self.logger, ['Test:',trunc(numpy.mean(test_costs)), '\t']) #check for early stopping if valid_X is not None: cost = numpy.sum(valid_costs) else: cost = numpy.sum(train_costs) if cost < best_cost*self.early_stop_threshold: patience = 0 best_cost = cost # save the parameters that made it the best best_params = save_params(self.params) else: patience += 1 if counter >= self.n_epoch or patience >= self.early_stop_length: STOP = True if best_params is not None: restore_params(self.params, best_params) save_params_to_file(counter, self.params, self.outdir, self.logger) timing = time.time() - t times.append(timing) log.maybeAppend(self.logger, 'time: '+make_time_units_string(timing)+'\t') log.maybeLog(self.logger, 'remaining: '+make_time_units_string((self.n_epoch - counter) * numpy.mean(times))) if (counter % self.save_frequency) == 0 or STOP is True: if self.is_image: n_examples = 100 tests = test_X.get_value()[0:n_examples] noisy_tests = self.f_noise(test_X.get_value()[0:n_examples]) _, reconstructed = self.f_recon(noisy_tests) # Concatenate stuff if it is an image stacked = numpy.vstack([numpy.vstack([tests[i*10 : (i+1)*10], noisy_tests[i*10 : (i+1)*10], reconstructed[i*10 : (i+1)*10]]) for i in range(10)]) number_reconstruction = PIL.Image.fromarray(tile_raster_images(stacked, (self.image_height,self.image_width), (10,30))) number_reconstruction.save(self.outdir+'gsn_image_reconstruction_epoch_'+str(counter)+'.png') #save gsn_params save_params_to_file(counter, self.params, self.outdir, self.logger) # ANNEAL! new_lr = self.learning_rate.get_value() * self.annealing self.learning_rate.set_value(new_lr) new_hidden_sigma = self.hidden_add_noise_sigma.get_value() * self.noise_annealing self.hidden_add_noise_sigma.set_value(new_hidden_sigma) new_salt_pepper = self.input_salt_and_pepper.get_value() * self.noise_annealing self.input_salt_and_pepper.set_value(new_salt_pepper)
def train(self, train_X=None, train_Y=None, valid_X=None, valid_Y=None, test_X=None, test_Y=None, is_artificial=False, artificial_sequence=1, continue_training=False): log.maybeLog(self.logger, "\nTraining---------\n") if train_X is None: log.maybeLog(self.logger, "Training using data given during initialization of RNN-GSN.\n") train_X = self.train_X train_Y = self.train_Y if train_X is None: log.maybeLog(self.logger, "\nPlease provide a training dataset!\n") raise AssertionError("Please provide a training dataset!") else: log.maybeLog(self.logger, "Training using data provided to training function.\n") if valid_X is None: valid_X = self.valid_X valid_Y = self.valid_Y if test_X is None: test_X = self.test_X test_Y = self.test_Y # Input data - make sure it is a list of shared datasets train_X = raise_to_list(train_X) train_Y = raise_to_list(train_Y) valid_X = raise_to_list(valid_X) valid_Y = raise_to_list(valid_Y) test_X = raise_to_list(test_X) test_Y = raise_to_list(test_Y) ########################################################## # Train the GSN first to get good weights initialization # ########################################################## if self.train_gsn_first: log.maybeLog(self.logger, "\n\n----------Initially training the GSN---------\n\n") # init_gsn = GSN(train_X=train_X, valid_X=valid_X, test_X=test_X, state=self.gsn_args, logger=self.logger) # init_gsn.train() print "NOT IMPLEMENTED" ######################################### # If we are using Hessian-free training # ######################################### if self.hessian_free: pass # gradient_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=5000) # cg_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=1000) # valid_dataset = hf_sequence_dataset([valid_X.get_value()], batch_size=None, number_batches=1000) # # s = x_samples # costs = [cost, show_cost] # hf_optimizer(params, [Xs], s, costs, u, ua).train(gradient_dataset, cg_dataset, initial_lambda=1.0, preconditioner=True, validation=valid_dataset) ################################ # If we are using SGD training # ################################ else: log.maybeLog(self.logger, "\n-----------TRAINING RNN-GSN------------\n") # TRAINING STOP = False counter = 0 if not continue_training: self.learning_rate.set_value(self.init_learn_rate) # learning rate times = [] best_cost = float('inf') best_params = None patience = 0 log.maybeLog(self.logger, ['train X size:',str(train_X[0].get_value(borrow=True).shape)]) if valid_X is not None: log.maybeLog(self.logger, ['valid X size:',str(valid_X[0].get_value(borrow=True).shape)]) if test_X is not None: log.maybeLog(self.logger, ['test X size:',str(test_X[0].get_value(borrow=True).shape)]) if self.vis_init: self.bias_list[0].set_value(logit(numpy.clip(0.9,0.001,train_X[0].get_value(borrow=True).mean(axis=0)))) start_time = time.time() while not STOP: counter += 1 t = time.time() log.maybeAppend(self.logger, [counter,'\t']) # if is_artificial: # data.sequence_mnist_data(train_X[0], train_Y[0], valid_X[0], valid_Y[0], test_X[0], test_Y[0], artificial_sequence, rng) #train train_costs = [] train_errors = [] for train_data in train_X: costs_and_errors = data.apply_cost_function_to_dataset(self.f_learn, train_data, self.batch_size) train_costs.extend([cost for (cost, error) in costs_and_errors]) train_errors.extend([error for (cost, error) in costs_and_errors]) log.maybeAppend(self.logger, ['Train:',trunc(numpy.mean(train_costs)),trunc(numpy.mean(train_errors)),'\t']) #valid if valid_X is not None: valid_costs = [] for valid_data in valid_X: cs = data.apply_cost_function_to_dataset(self.f_cost, valid_data, self.batch_size) valid_costs.extend([c for c,e in cs]) log.maybeAppend(self.logger, ['Valid:',trunc(numpy.mean(valid_costs)), '\t']) #test if test_X is not None: test_costs = [] test_errors = [] for test_data in test_X: costs_and_errors = data.apply_cost_function_to_dataset(self.f_cost, test_data, self.batch_size) test_costs.extend([cost for (cost, error) in costs_and_errors]) test_errors.extend([error for (cost, error) in costs_and_errors]) log.maybeAppend(self.logger, ['Test:',trunc(numpy.mean(test_costs)),trunc(numpy.mean(test_errors)), '\t']) #check for early stopping if valid_X is not None: cost = numpy.sum(valid_costs) else: cost = numpy.sum(train_costs) if cost < best_cost*self.early_stop_threshold: patience = 0 best_cost = cost # save the parameters that made it the best best_params = copy_params(self.params) else: patience += 1 if counter >= self.n_epoch or patience >= self.early_stop_length: STOP = True if best_params is not None: restore_params(self.params, best_params) self.save_params('all', counter, self.params) timing = time.time() - t times.append(timing) log.maybeAppend(self.logger, 'time: '+make_time_units_string(timing)+'\t') log.maybeLog(self.logger, 'remaining: '+make_time_units_string((self.n_epoch - counter) * numpy.mean(times))) if (counter % self.save_frequency) == 0 or STOP is True: n_examples = 100 xs_test = test_X[0].get_value(borrow=True)[range(n_examples)] noisy_xs_test = self.f_noise(test_X[0].get_value(borrow=True)[range(n_examples)]) reconstructions = [] for i in xrange(0, len(noisy_xs_test)): recon, recon_cost = self.f_recon(noisy_xs_test[max(0,(i+1)-self.batch_size):i+1]) reconstructions.append(recon[-1]) reconstructed = numpy.array(reconstructions) if (self.is_image): # Concatenate stuff stacked = numpy.vstack([numpy.vstack([xs_test[i*10 : (i+1)*10], noisy_xs_test[i*10 : (i+1)*10], reconstructed[i*10 : (i+1)*10]]) for i in range(10)]) number_reconstruction = PIL.Image.fromarray(tile_raster_images(stacked, (self.image_height, self.image_width), (10,30))) number_reconstruction.save(self.outdir+'rnngsn_reconstruction_epoch_'+str(counter)+'.png') #sample_numbers(counter, 'seven') # plot_samples(counter, 'rnngsn') #save params self.save_params('all', counter, self.params) # ANNEAL! new_lr = self.learning_rate.get_value() * self.annealing self.learning_rate.set_value(new_lr) new_noise = self.input_salt_and_pepper.get_value() * self.noise_annealing self.input_salt_and_pepper.set_value(new_noise) log.maybeLog(self.logger, "\n------------TOTAL RNN-GSN TRAIN TIME TOOK {0!s}---------".format(make_time_units_string(time.time()-start_time)))
def train(self, train_X=None, valid_X=None, test_X=None, continue_training=False): log.maybeLog(self.logger, "\nTraining---------\n") if train_X is None: log.maybeLog( self.logger, "Training using data given during initialization of GSN.\n") train_X = self.train_X if train_X is None: log.maybeLog(self.logger, "\nPlease provide a training dataset!\n") raise AssertionError("Please provide a training dataset!") else: log.maybeLog( self.logger, "Training using data provided to training function.\n") if valid_X is None: valid_X = self.valid_X if test_X is None: test_X = self.test_X train_X = raise_data_to_list(train_X) valid_X = raise_data_to_list(valid_X) test_X = raise_data_to_list(test_X) ############ # TRAINING # ############ log.maybeLog( self.logger, "-----------TRAINING GSN FOR {0!s} EPOCHS-----------".format( self.n_epoch)) STOP = False counter = 0 if not continue_training: self.learning_rate.set_value(self.init_learn_rate) # learning rate times = [] best_cost = float('inf') best_params = None patience = 0 log.maybeLog( self.logger, ['train X size:', str(train_X[0].shape.eval())]) if valid_X is not None: log.maybeLog(self.logger, ['valid X size:', str(valid_X[0].shape.eval())]) if test_X is not None: log.maybeLog( self.logger, ['test X size:', str(test_X[0].shape.eval())]) if self.vis_init: self.bias_list[0].set_value( logit( numpy.clip(0.9, 0.001, train_X[0].get_value().mean(axis=0)))) while not STOP: counter += 1 t = time.time() log.maybeAppend(self.logger, [counter, '\t']) #train train_costs = data.apply_cost_function_to_dataset( self.f_learn, train_X, self.batch_size) log.maybeAppend( self.logger, ['Train:', trunc(numpy.mean(train_costs)), '\t']) #valid if valid_X is not None: valid_costs = data.apply_cost_function_to_dataset( self.f_cost, valid_X, self.batch_size) log.maybeAppend( self.logger, ['Valid:', trunc(numpy.mean(valid_costs)), '\t']) #test if test_X is not None: test_costs = data.apply_cost_function_to_dataset( self.f_cost, test_X, self.batch_size) log.maybeAppend( self.logger, ['Test:', trunc(numpy.mean(test_costs)), '\t']) #check for early stopping if valid_X is not None: cost = numpy.sum(valid_costs) else: cost = numpy.sum(train_costs) if cost < best_cost * self.early_stop_threshold: patience = 0 best_cost = cost # save the parameters that made it the best best_params = save_params(self.params) else: patience += 1 if counter >= self.n_epoch or patience >= self.early_stop_length: STOP = True if best_params is not None: restore_params(self.params, best_params) save_params_to_file(counter, self.params, self.outdir, self.logger) timing = time.time() - t times.append(timing) log.maybeAppend(self.logger, 'time: ' + make_time_units_string(timing) + '\t') log.maybeLog( self.logger, 'remaining: ' + make_time_units_string( (self.n_epoch - counter) * numpy.mean(times))) if (counter % self.save_frequency) == 0 or STOP is True: if self.is_image: n_examples = 100 tests = test_X.get_value()[0:n_examples] noisy_tests = self.f_noise( test_X.get_value()[0:n_examples]) _, reconstructed = self.f_recon(noisy_tests) # Concatenate stuff if it is an image stacked = numpy.vstack([ numpy.vstack([ tests[i * 10:(i + 1) * 10], noisy_tests[i * 10:(i + 1) * 10], reconstructed[i * 10:(i + 1) * 10] ]) for i in range(10) ]) number_reconstruction = PIL.Image.fromarray( tile_raster_images( stacked, (self.image_height, self.image_width), (10, 30))) number_reconstruction.save( self.outdir + 'gsn_image_reconstruction_epoch_' + str(counter) + '.png') #save gsn_params save_params_to_file(counter, self.params, self.outdir, self.logger) # ANNEAL! new_lr = self.learning_rate.get_value() * self.annealing self.learning_rate.set_value(new_lr) new_hidden_sigma = self.hidden_add_noise_sigma.get_value( ) * self.noise_annealing self.hidden_add_noise_sigma.set_value(new_hidden_sigma) new_salt_pepper = self.input_salt_and_pepper.get_value( ) * self.noise_annealing self.input_salt_and_pepper.set_value(new_salt_pepper)
def train(self, train_X=None, train_Y=None, valid_X=None, valid_Y=None, test_X=None, test_Y=None, is_artificial=False, artificial_sequence=1, continue_training=False): log.maybeLog(self.logger, "\nTraining---------\n") if train_X is None: log.maybeLog(self.logger, "Training using data given during initialization of RNN-GSN.\n") train_X = self.train_X train_Y = self.train_Y if train_X is None: log.maybeLog(self.logger, "\nPlease provide a training dataset!\n") raise AssertionError("Please provide a training dataset!") else: log.maybeLog(self.logger, "Training using data provided to training function.\n") if valid_X is None: valid_X = self.valid_X valid_Y = self.valid_Y if test_X is None: test_X = self.test_X test_Y = self.test_Y # Input data - make sure it is a list of shared datasets train_X = raise_to_list(train_X) train_Y = raise_to_list(train_Y) valid_X = raise_to_list(valid_X) valid_Y = raise_to_list(valid_Y) test_X = raise_to_list(test_X) test_Y = raise_to_list(test_Y) ########################################################## # Train the GSN first to get good weights initialization # ########################################################## # if self.train_gsn_first: # log.maybeLog(self.logger, "\n\n----------Initially training the GSN---------\n\n") # init_gsn = generative_stochastic_network.GSN(train_X=train_X, valid_X=valid_X, test_X=test_X, args=self.gsn_args, logger=self.logger) # init_gsn.train() ######################################### # If we are using Hessian-free training # ######################################### if self.hessian_free: pass # gradient_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=5000) # cg_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=1000) # valid_dataset = hf_sequence_dataset([valid_X.get_value()], batch_size=None, number_batches=1000) # # s = x_samples # costs = [cost, show_cost] # hf_optimizer(params, [Xs], s, costs, u, ua).train(gradient_dataset, cg_dataset, initial_lambda=1.0, preconditioner=True, validation=valid_dataset) ################################ # If we are using SGD training # ################################ else: log.maybeLog(self.logger, "\n-----------TRAINING RNN-GSN------------\n") # TRAINING STOP = False counter = 0 if not continue_training: self.learning_rate.set_value(self.init_learn_rate) # learning rate times = [] best_cost = float('inf') best_params = None patience = 0 log.maybeLog(self.logger, ['train X size:',str(train_X[0].get_value(borrow=True).shape)]) if valid_X is not None: log.maybeLog(self.logger, ['valid X size:',str(valid_X[0].get_value(borrow=True).shape)]) if test_X is not None: log.maybeLog(self.logger, ['test X size:',str(test_X[0].get_value(borrow=True).shape)]) if self.vis_init: self.bias_list[0].set_value(logit(numpy.clip(0.9,0.001,train_X[0].get_value(borrow=True).mean(axis=0)))) start_time = time.time() while not STOP: counter += 1 t = time.time() log.maybeAppend(self.logger, [counter,'\t']) # if is_artificial: # data.sequence_mnist_data(train_X[0], train_Y[0], valid_X[0], valid_Y[0], test_X[0], test_Y[0], artificial_sequence, rng) #train train_costs = [] train_errors = [] for train_data in train_X: costs_and_errors = data.apply_cost_function_to_dataset(self.f_learn, train_data, self.batch_size) train_costs.extend([cost for (cost, error) in costs_and_errors]) train_errors.extend([error for (cost, error) in costs_and_errors]) log.maybeAppend(self.logger, ['Train:',trunc(numpy.mean(train_costs)),trunc(numpy.mean(train_errors)),'\t']) #valid if valid_X is not None: valid_costs = [] for valid_data in valid_X: cs = data.apply_cost_function_to_dataset(self.f_cost, valid_data, self.batch_size) valid_costs.extend([c for c,e in cs]) log.maybeAppend(self.logger, ['Valid:',trunc(numpy.mean(valid_costs)), '\t']) #test if test_X is not None: test_costs = [] test_errors = [] for test_data in test_X: costs_and_errors = data.apply_cost_function_to_dataset(self.f_cost, test_data, self.batch_size) test_costs.extend([cost for (cost, error) in costs_and_errors]) test_errors.extend([error for (cost, error) in costs_and_errors]) log.maybeAppend(self.logger, ['Test:',trunc(numpy.mean(test_costs)),trunc(numpy.mean(test_errors)), '\t']) #check for early stopping if valid_X is not None: cost = numpy.sum(valid_costs) else: cost = numpy.sum(train_costs) if cost < best_cost*self.early_stop_threshold: patience = 0 best_cost = cost # save the parameters that made it the best best_params = copy_params(self.params) else: patience += 1 if counter >= self.n_epoch or patience >= self.early_stop_length: STOP = True if best_params is not None: restore_params(self.params, best_params) self.save_params('all', counter, self.params) timing = time.time() - t times.append(timing) log.maybeAppend(self.logger, 'time: '+make_time_units_string(timing)+'\t') log.maybeLog(self.logger, 'remaining: '+make_time_units_string((self.n_epoch - counter) * numpy.mean(times))) if (counter % self.save_frequency) == 0 or STOP is True: n_examples = 100 xs_test = test_X[0].get_value(borrow=True)[range(n_examples)] noisy_xs_test = self.f_noise(test_X[0].get_value(borrow=True)[range(n_examples)]) reconstructions = [] for i in xrange(0, len(noisy_xs_test)): recon, recon_cost = self.f_recon(noisy_xs_test[max(0,(i+1)-self.batch_size):i+1]) reconstructions.append(recon) reconstructed = numpy.array(reconstructions) if (self.is_image): # Concatenate stuff # stacked = numpy.vstack([numpy.vstack([xs_test[i*10 : (i+1)*10], noisy_xs_test[i*10 : (i+1)*10], reconstructed[i*10 : (i+1)*10]]) for i in range(10)]) # number_reconstruction = PIL.Image.fromarray(tile_raster_images(stacked, (self.image_height, self.image_width), (10,30))) # number_reconstruction.save(self.outdir+'rnngsn_reconstruction_epoch_'+str(counter)+'.png') #sample_numbers(counter, 'seven') # plot_samples(counter, 'rnngsn') pass #save params self.save_params('all', counter, self.params) # ANNEAL! new_lr = self.learning_rate.get_value() * self.annealing self.learning_rate.set_value(new_lr) new_noise = self.input_salt_and_pepper.get_value() * self.noise_annealing self.input_salt_and_pepper.set_value(new_noise) log.maybeLog(self.logger, "\n------------TOTAL RNN-GSN TRAIN TIME TOOK {0!s}---------".format(make_time_units_string(time.time()-start_time)))
def train(self, train_X=None, train_Y=None, valid_X=None, valid_Y=None, test_X=None, test_Y=None, is_artificial=False, artificial_sequence=1, continue_training=False): log.maybeLog(self.logger, "\nTraining---------\n") if train_X is None: log.maybeLog( self.logger, "Training using data given during initialization of RNN-GSN.\n" ) train_X = self.train_X train_Y = self.train_Y if train_X is None: log.maybeLog(self.logger, "\nPlease provide a training dataset!\n") raise AssertionError("Please provide a training dataset!") else: log.maybeLog( self.logger, "Training using data provided to training function.\n") if valid_X is None: valid_X = self.valid_X valid_Y = self.valid_Y if test_X is None: test_X = self.test_X test_Y = self.test_Y ########################################################## # Train the GSN first to get good weights initialization # ########################################################## if self.train_gsn_first: log.maybeLog( self.logger, "\n\n----------Initially training the GSN---------\n\n") init_gsn = generative_stochastic_network.GSN(train_X=train_X, valid_X=valid_X, test_X=test_X, args=self.gsn_args, logger=self.logger) init_gsn.train() ############################# # Save the model parameters # ############################# def save_params_to_file(name, n, gsn_params): pass print 'saving parameters...' save_path = self.outdir + name + '_params_epoch_' + str(n) + '.pkl' f = open(save_path, 'wb') try: cPickle.dump(gsn_params, f, protocol=cPickle.HIGHEST_PROTOCOL) finally: f.close() def save_params(params): values = [param.get_value(borrow=True) for param in params] return values def restore_params(params, values): for i in range(len(params)): params[i].set_value(values[i]) ######################################### # If we are using Hessian-free training # ######################################### if self.hessian_free: pass # gradient_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=5000) # cg_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=1000) # valid_dataset = hf_sequence_dataset([valid_X.get_value()], batch_size=None, number_batches=1000) # # s = x_samples # costs = [cost, show_cost] # hf_optimizer(params, [Xs], s, costs, u, ua).train(gradient_dataset, cg_dataset, initial_lambda=1.0, preconditioner=True, validation=valid_dataset) ################################ # If we are using SGD training # ################################ else: log.maybeLog(self.logger, "\n-----------TRAINING RNN-GSN------------\n") # TRAINING STOP = False counter = 0 if not continue_training: self.learning_rate.set_value( self.init_learn_rate) # learning rate times = [] best_cost = float('inf') best_params = None patience = 0 log.maybeLog( self.logger, ['train X size:', str(train_X.shape.eval())]) if valid_X is not None: log.maybeLog(self.logger, ['valid X size:', str(valid_X.shape.eval())]) if test_X is not None: log.maybeLog( self.logger, ['test X size:', str(test_X.shape.eval())]) if self.vis_init: self.bias_list[0].set_value( logit( numpy.clip(0.9, 0.001, train_X.get_value().mean(axis=0)))) while not STOP: counter += 1 t = time.time() log.maybeAppend(self.logger, [counter, '\t']) if is_artificial: data.sequence_mnist_data(train_X, train_Y, valid_X, valid_Y, test_X, test_Y, artificial_sequence, rng) #train train_costs = data.apply_cost_function_to_dataset( self.f_learn, train_X, self.batch_size) # record it log.maybeAppend(self.logger, ['Train:', trunc(train_costs), '\t']) #valid valid_costs = data.apply_cost_function_to_dataset( self.f_cost, valid_X, self.batch_size) # record it log.maybeAppend(self.logger, ['Valid:', trunc(valid_costs), '\t']) #test test_costs = data.apply_cost_function_to_dataset( self.f_cost, test_X, self.batch_size) # record it log.maybeAppend(self.logger, ['Test:', trunc(test_costs), '\t']) #check for early stopping cost = numpy.sum(valid_costs) if cost < best_cost * self.early_stop_threshold: patience = 0 best_cost = cost # save the parameters that made it the best best_params = save_params(self.params) else: patience += 1 if counter >= self.n_epoch or patience >= self.early_stop_length: STOP = True if best_params is not None: restore_params(self.params, best_params) save_params_to_file('all', counter, self.params) timing = time.time() - t times.append(timing) log.maybeAppend( self.logger, 'time: ' + make_time_units_string(timing) + '\t') log.maybeLog( self.logger, 'remaining: ' + make_time_units_string( (self.n_epoch - counter) * numpy.mean(times))) if (counter % self.save_frequency) == 0 or STOP is True: n_examples = 100 nums = test_X.get_value(borrow=True)[range(n_examples)] noisy_nums = self.f_noise( test_X.get_value(borrow=True)[range(n_examples)]) reconstructions = [] for i in xrange(0, len(noisy_nums)): recon = self.f_recon( noisy_nums[max(0, (i + 1) - self.batch_size):i + 1]) reconstructions.append(recon) reconstructed = numpy.array(reconstructions) # Concatenate stuff stacked = numpy.vstack([ numpy.vstack([ nums[i * 10:(i + 1) * 10], noisy_nums[i * 10:(i + 1) * 10], reconstructed[i * 10:(i + 1) * 10] ]) for i in range(10) ]) number_reconstruction = PIL.Image.fromarray( tile_raster_images( stacked, (self.root_N_input, self.root_N_input), (10, 30))) number_reconstruction.save( self.outdir + 'rnngsn_number_reconstruction_epoch_' + str(counter) + '.png') #save params save_params_to_file('all', counter, self.params) # ANNEAL! new_lr = self.learning_rate.get_value() * self.annealing self.learning_rate.set_value(new_lr)