def train(self, train_X=None, train_Y=None, valid_X=None, valid_Y=None, test_X=None, test_Y=None, is_artificial=False, artificial_sequence=1, continue_training=False): log.maybeLog(self.logger, "\nTraining---------\n") if train_X is None: log.maybeLog(self.logger, "Training using data given during initialization of RNN-GSN.\n") train_X = self.train_X train_Y = self.train_Y if train_X is None: log.maybeLog(self.logger, "\nPlease provide a training dataset!\n") raise AssertionError("Please provide a training dataset!") else: log.maybeLog(self.logger, "Training using data provided to training function.\n") if valid_X is None: valid_X = self.valid_X valid_Y = self.valid_Y if test_X is None: test_X = self.test_X test_Y = self.test_Y ########################################################## # Train the GSN first to get good weights initialization # ########################################################## if self.train_gsn_first: log.maybeLog(self.logger, "\n\n----------Initially training the GSN---------\n\n") init_gsn = generative_stochastic_network.GSN(train_X=train_X, valid_X=valid_X, test_X=test_X, args=self.gsn_args, logger=self.logger) init_gsn.train() ############################# # Save the model parameters # ############################# def save_params_to_file(name, n, gsn_params): pass print 'saving parameters...' save_path = self.outdir+name+'_params_epoch_'+str(n)+'.pkl' f = open(save_path, 'wb') try: cPickle.dump(gsn_params, f, protocol=cPickle.HIGHEST_PROTOCOL) finally: f.close() def save_params(params): values = [param.get_value(borrow=True) for param in params] return values def restore_params(params, values): for i in range(len(params)): params[i].set_value(values[i]) ######################################### # If we are using Hessian-free training # ######################################### if self.hessian_free: pass # gradient_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=5000) # cg_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=1000) # valid_dataset = hf_sequence_dataset([valid_X.get_value()], batch_size=None, number_batches=1000) # # s = x_samples # costs = [cost, show_cost] # hf_optimizer(params, [Xs], s, costs, u, ua).train(gradient_dataset, cg_dataset, initial_lambda=1.0, preconditioner=True, validation=valid_dataset) ################################ # If we are using SGD training # ################################ else: log.maybeLog(self.logger, "\n-----------TRAINING RNN-GSN------------\n") # TRAINING STOP = False counter = 0 if not continue_training: self.learning_rate.set_value(self.init_learn_rate) # learning rate times = [] best_cost = float('inf') best_params = None patience = 0 log.maybeLog(self.logger, ['train X size:',str(train_X.shape.eval())]) if valid_X is not None: log.maybeLog(self.logger, ['valid X size:',str(valid_X.shape.eval())]) if test_X is not None: log.maybeLog(self.logger, ['test X size:',str(test_X.shape.eval())]) if self.vis_init: self.bias_list[0].set_value(logit(numpy.clip(0.9,0.001,train_X.get_value().mean(axis=0)))) while not STOP: counter += 1 t = time.time() log.maybeAppend(self.logger, [counter,'\t']) if is_artificial: data.sequence_mnist_data(train_X, train_Y, valid_X, valid_Y, test_X, test_Y, artificial_sequence, rng) #train train_costs = data.apply_cost_function_to_dataset(self.f_learn, train_X, self.batch_size) # record it log.maybeAppend(self.logger, ['Train:',trunc(train_costs),'\t']) #valid valid_costs = data.apply_cost_function_to_dataset(self.f_cost, valid_X, self.batch_size) # record it log.maybeAppend(self.logger, ['Valid:',trunc(valid_costs), '\t']) #test test_costs = data.apply_cost_function_to_dataset(self.f_cost, test_X, self.batch_size) # record it log.maybeAppend(self.logger, ['Test:',trunc(test_costs), '\t']) #check for early stopping cost = numpy.sum(valid_costs) if cost < best_cost*self.early_stop_threshold: patience = 0 best_cost = cost # save the parameters that made it the best best_params = save_params(self.params) else: patience += 1 if counter >= self.n_epoch or patience >= self.early_stop_length: STOP = True if best_params is not None: restore_params(self.params, best_params) save_params_to_file('all', counter, self.params) timing = time.time() - t times.append(timing) log.maybeAppend(self.logger, 'time: '+make_time_units_string(timing)+'\t') log.maybeLog(self.logger, 'remaining: '+make_time_units_string((self.n_epoch - counter) * numpy.mean(times))) if (counter % self.save_frequency) == 0 or STOP is True: n_examples = 100 nums = test_X.get_value(borrow=True)[range(n_examples)] noisy_nums = self.f_noise(test_X.get_value(borrow=True)[range(n_examples)]) reconstructions = [] for i in xrange(0, len(noisy_nums)): recon = self.f_recon(noisy_nums[max(0,(i+1)-self.batch_size):i+1]) reconstructions.append(recon) reconstructed = numpy.array(reconstructions) # Concatenate stuff stacked = numpy.vstack([numpy.vstack([nums[i*10 : (i+1)*10], noisy_nums[i*10 : (i+1)*10], reconstructed[i*10 : (i+1)*10]]) for i in range(10)]) number_reconstruction = PIL.Image.fromarray(tile_raster_images(stacked, (self.root_N_input,self.root_N_input), (10,30))) number_reconstruction.save(self.outdir+'rnngsn_number_reconstruction_epoch_'+str(counter)+'.png') #save params save_params_to_file('all', counter, self.params) # ANNEAL! new_lr = self.learning_rate.get_value() * self.annealing self.learning_rate.set_value(new_lr)
def train(self, train_X=None, train_Y=None, valid_X=None, valid_Y=None, test_X=None, test_Y=None, is_artificial=False, artificial_sequence=1, continue_training=False): log.maybeLog(self.logger, "\nTraining---------\n") if train_X is None: log.maybeLog(self.logger, "Training using data given during initialization of RNN-GSN.\n") train_X = self.train_X train_Y = self.train_Y if train_X is None: log.maybeLog(self.logger, "\nPlease provide a training dataset!\n") raise AssertionError("Please provide a training dataset!") else: log.maybeLog(self.logger, "Training using data provided to training function.\n") if valid_X is None: valid_X = self.valid_X valid_Y = self.valid_Y if test_X is None: test_X = self.test_X test_Y = self.test_Y # Input data - make sure it is a list of shared datasets train_X = raise_to_list(train_X) train_Y = raise_to_list(train_Y) valid_X = raise_to_list(valid_X) valid_Y = raise_to_list(valid_Y) test_X = raise_to_list(test_X) test_Y = raise_to_list(test_Y) ########################################################## # Train the GSN first to get good weights initialization # ########################################################## if self.train_gsn_first: log.maybeLog(self.logger, "\n\n----------Initially training the GSN---------\n\n") # init_gsn = GSN(train_X=train_X, valid_X=valid_X, test_X=test_X, state=self.gsn_args, logger=self.logger) # init_gsn.train() print "NOT IMPLEMENTED" ######################################### # If we are using Hessian-free training # ######################################### if self.hessian_free: pass # gradient_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=5000) # cg_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=1000) # valid_dataset = hf_sequence_dataset([valid_X.get_value()], batch_size=None, number_batches=1000) # # s = x_samples # costs = [cost, show_cost] # hf_optimizer(params, [Xs], s, costs, u, ua).train(gradient_dataset, cg_dataset, initial_lambda=1.0, preconditioner=True, validation=valid_dataset) ################################ # If we are using SGD training # ################################ else: log.maybeLog(self.logger, "\n-----------TRAINING RNN-GSN------------\n") # TRAINING STOP = False counter = 0 if not continue_training: self.learning_rate.set_value(self.init_learn_rate) # learning rate times = [] best_cost = float('inf') best_params = None patience = 0 log.maybeLog(self.logger, ['train X size:',str(train_X[0].get_value(borrow=True).shape)]) if valid_X is not None: log.maybeLog(self.logger, ['valid X size:',str(valid_X[0].get_value(borrow=True).shape)]) if test_X is not None: log.maybeLog(self.logger, ['test X size:',str(test_X[0].get_value(borrow=True).shape)]) if self.vis_init: self.bias_list[0].set_value(logit(numpy.clip(0.9,0.001,train_X[0].get_value(borrow=True).mean(axis=0)))) start_time = time.time() while not STOP: counter += 1 t = time.time() log.maybeAppend(self.logger, [counter,'\t']) # if is_artificial: # data.sequence_mnist_data(train_X[0], train_Y[0], valid_X[0], valid_Y[0], test_X[0], test_Y[0], artificial_sequence, rng) #train train_costs = [] train_errors = [] for train_data in train_X: costs_and_errors = data.apply_cost_function_to_dataset(self.f_learn, train_data, self.batch_size) train_costs.extend([cost for (cost, error) in costs_and_errors]) train_errors.extend([error for (cost, error) in costs_and_errors]) log.maybeAppend(self.logger, ['Train:',trunc(numpy.mean(train_costs)),trunc(numpy.mean(train_errors)),'\t']) #valid if valid_X is not None: valid_costs = [] for valid_data in valid_X: cs = data.apply_cost_function_to_dataset(self.f_cost, valid_data, self.batch_size) valid_costs.extend([c for c,e in cs]) log.maybeAppend(self.logger, ['Valid:',trunc(numpy.mean(valid_costs)), '\t']) #test if test_X is not None: test_costs = [] test_errors = [] for test_data in test_X: costs_and_errors = data.apply_cost_function_to_dataset(self.f_cost, test_data, self.batch_size) test_costs.extend([cost for (cost, error) in costs_and_errors]) test_errors.extend([error for (cost, error) in costs_and_errors]) log.maybeAppend(self.logger, ['Test:',trunc(numpy.mean(test_costs)),trunc(numpy.mean(test_errors)), '\t']) #check for early stopping if valid_X is not None: cost = numpy.sum(valid_costs) else: cost = numpy.sum(train_costs) if cost < best_cost*self.early_stop_threshold: patience = 0 best_cost = cost # save the parameters that made it the best best_params = copy_params(self.params) else: patience += 1 if counter >= self.n_epoch or patience >= self.early_stop_length: STOP = True if best_params is not None: restore_params(self.params, best_params) self.save_params('all', counter, self.params) timing = time.time() - t times.append(timing) log.maybeAppend(self.logger, 'time: '+make_time_units_string(timing)+'\t') log.maybeLog(self.logger, 'remaining: '+make_time_units_string((self.n_epoch - counter) * numpy.mean(times))) if (counter % self.save_frequency) == 0 or STOP is True: n_examples = 100 xs_test = test_X[0].get_value(borrow=True)[range(n_examples)] noisy_xs_test = self.f_noise(test_X[0].get_value(borrow=True)[range(n_examples)]) reconstructions = [] for i in xrange(0, len(noisy_xs_test)): recon, recon_cost = self.f_recon(noisy_xs_test[max(0,(i+1)-self.batch_size):i+1]) reconstructions.append(recon[-1]) reconstructed = numpy.array(reconstructions) if (self.is_image): # Concatenate stuff stacked = numpy.vstack([numpy.vstack([xs_test[i*10 : (i+1)*10], noisy_xs_test[i*10 : (i+1)*10], reconstructed[i*10 : (i+1)*10]]) for i in range(10)]) number_reconstruction = PIL.Image.fromarray(tile_raster_images(stacked, (self.image_height, self.image_width), (10,30))) number_reconstruction.save(self.outdir+'rnngsn_reconstruction_epoch_'+str(counter)+'.png') #sample_numbers(counter, 'seven') # plot_samples(counter, 'rnngsn') #save params self.save_params('all', counter, self.params) # ANNEAL! new_lr = self.learning_rate.get_value() * self.annealing self.learning_rate.set_value(new_lr) new_noise = self.input_salt_and_pepper.get_value() * self.noise_annealing self.input_salt_and_pepper.set_value(new_noise) log.maybeLog(self.logger, "\n------------TOTAL RNN-GSN TRAIN TIME TOOK {0!s}---------".format(make_time_units_string(time.time()-start_time)))
def train(self, train_X=None, valid_X=None, test_X=None, continue_training=False): log.maybeLog(self.logger, "\nTraining---------\n") if train_X is None: log.maybeLog(self.logger, "Training using data given during initialization of GSN.\n") train_X = self.train_X if train_X is None: log.maybeLog(self.logger, "\nPlease provide a training dataset!\n") raise AssertionError("Please provide a training dataset!") else: log.maybeLog(self.logger, "Training using data provided to training function.\n") if valid_X is None: valid_X = self.valid_X if test_X is None: test_X = self.test_X train_X = raise_data_to_list(train_X) valid_X = raise_data_to_list(valid_X) test_X = raise_data_to_list(test_X) ############ # TRAINING # ############ log.maybeLog(self.logger, "-----------TRAINING GSN FOR {0!s} EPOCHS-----------".format(self.n_epoch)) STOP = False counter = 0 if not continue_training: self.learning_rate.set_value(self.init_learn_rate) # learning rate times = [] best_cost = float('inf') best_params = None patience = 0 log.maybeLog(self.logger, ['train X size:',str(train_X[0].shape.eval())]) if valid_X is not None: log.maybeLog(self.logger, ['valid X size:',str(valid_X[0].shape.eval())]) if test_X is not None: log.maybeLog(self.logger, ['test X size:',str(test_X[0].shape.eval())]) if self.vis_init: self.bias_list[0].set_value(logit(numpy.clip(0.9,0.001,train_X[0].get_value().mean(axis=0)))) while not STOP: counter += 1 t = time.time() log.maybeAppend(self.logger, [counter,'\t']) #train train_costs = data.apply_cost_function_to_dataset(self.f_learn, train_X, self.batch_size) log.maybeAppend(self.logger, ['Train:',trunc(numpy.mean(train_costs)), '\t']) #valid if valid_X is not None: valid_costs = data.apply_cost_function_to_dataset(self.f_cost, valid_X, self.batch_size) log.maybeAppend(self.logger, ['Valid:',trunc(numpy.mean(valid_costs)), '\t']) #test if test_X is not None: test_costs = data.apply_cost_function_to_dataset(self.f_cost, test_X, self.batch_size) log.maybeAppend(self.logger, ['Test:',trunc(numpy.mean(test_costs)), '\t']) #check for early stopping if valid_X is not None: cost = numpy.sum(valid_costs) else: cost = numpy.sum(train_costs) if cost < best_cost*self.early_stop_threshold: patience = 0 best_cost = cost # save the parameters that made it the best best_params = save_params(self.params) else: patience += 1 if counter >= self.n_epoch or patience >= self.early_stop_length: STOP = True if best_params is not None: restore_params(self.params, best_params) save_params_to_file(counter, self.params, self.outdir, self.logger) timing = time.time() - t times.append(timing) log.maybeAppend(self.logger, 'time: '+make_time_units_string(timing)+'\t') log.maybeLog(self.logger, 'remaining: '+make_time_units_string((self.n_epoch - counter) * numpy.mean(times))) if (counter % self.save_frequency) == 0 or STOP is True: if self.is_image: n_examples = 100 tests = test_X.get_value()[0:n_examples] noisy_tests = self.f_noise(test_X.get_value()[0:n_examples]) _, reconstructed = self.f_recon(noisy_tests) # Concatenate stuff if it is an image stacked = numpy.vstack([numpy.vstack([tests[i*10 : (i+1)*10], noisy_tests[i*10 : (i+1)*10], reconstructed[i*10 : (i+1)*10]]) for i in range(10)]) number_reconstruction = PIL.Image.fromarray(tile_raster_images(stacked, (self.image_height,self.image_width), (10,30))) number_reconstruction.save(self.outdir+'gsn_image_reconstruction_epoch_'+str(counter)+'.png') #save gsn_params save_params_to_file(counter, self.params, self.outdir, self.logger) # ANNEAL! new_lr = self.learning_rate.get_value() * self.annealing self.learning_rate.set_value(new_lr) new_hidden_sigma = self.hidden_add_noise_sigma.get_value() * self.noise_annealing self.hidden_add_noise_sigma.set_value(new_hidden_sigma) new_salt_pepper = self.input_salt_and_pepper.get_value() * self.noise_annealing self.input_salt_and_pepper.set_value(new_salt_pepper)
def train(self, train_X=None, valid_X=None, test_X=None, continue_training=False): log.maybeLog(self.logger, "\nTraining---------\n") if train_X is None: log.maybeLog( self.logger, "Training using data given during initialization of GSN.\n") train_X = self.train_X if train_X is None: log.maybeLog(self.logger, "\nPlease provide a training dataset!\n") raise AssertionError("Please provide a training dataset!") else: log.maybeLog( self.logger, "Training using data provided to training function.\n") if valid_X is None: valid_X = self.valid_X if test_X is None: test_X = self.test_X train_X = raise_data_to_list(train_X) valid_X = raise_data_to_list(valid_X) test_X = raise_data_to_list(test_X) ############ # TRAINING # ############ log.maybeLog( self.logger, "-----------TRAINING GSN FOR {0!s} EPOCHS-----------".format( self.n_epoch)) STOP = False counter = 0 if not continue_training: self.learning_rate.set_value(self.init_learn_rate) # learning rate times = [] best_cost = float('inf') best_params = None patience = 0 log.maybeLog( self.logger, ['train X size:', str(train_X[0].shape.eval())]) if valid_X is not None: log.maybeLog(self.logger, ['valid X size:', str(valid_X[0].shape.eval())]) if test_X is not None: log.maybeLog( self.logger, ['test X size:', str(test_X[0].shape.eval())]) if self.vis_init: self.bias_list[0].set_value( logit( numpy.clip(0.9, 0.001, train_X[0].get_value().mean(axis=0)))) while not STOP: counter += 1 t = time.time() log.maybeAppend(self.logger, [counter, '\t']) #train train_costs = data.apply_cost_function_to_dataset( self.f_learn, train_X, self.batch_size) log.maybeAppend( self.logger, ['Train:', trunc(numpy.mean(train_costs)), '\t']) #valid if valid_X is not None: valid_costs = data.apply_cost_function_to_dataset( self.f_cost, valid_X, self.batch_size) log.maybeAppend( self.logger, ['Valid:', trunc(numpy.mean(valid_costs)), '\t']) #test if test_X is not None: test_costs = data.apply_cost_function_to_dataset( self.f_cost, test_X, self.batch_size) log.maybeAppend( self.logger, ['Test:', trunc(numpy.mean(test_costs)), '\t']) #check for early stopping if valid_X is not None: cost = numpy.sum(valid_costs) else: cost = numpy.sum(train_costs) if cost < best_cost * self.early_stop_threshold: patience = 0 best_cost = cost # save the parameters that made it the best best_params = save_params(self.params) else: patience += 1 if counter >= self.n_epoch or patience >= self.early_stop_length: STOP = True if best_params is not None: restore_params(self.params, best_params) save_params_to_file(counter, self.params, self.outdir, self.logger) timing = time.time() - t times.append(timing) log.maybeAppend(self.logger, 'time: ' + make_time_units_string(timing) + '\t') log.maybeLog( self.logger, 'remaining: ' + make_time_units_string( (self.n_epoch - counter) * numpy.mean(times))) if (counter % self.save_frequency) == 0 or STOP is True: if self.is_image: n_examples = 100 tests = test_X.get_value()[0:n_examples] noisy_tests = self.f_noise( test_X.get_value()[0:n_examples]) _, reconstructed = self.f_recon(noisy_tests) # Concatenate stuff if it is an image stacked = numpy.vstack([ numpy.vstack([ tests[i * 10:(i + 1) * 10], noisy_tests[i * 10:(i + 1) * 10], reconstructed[i * 10:(i + 1) * 10] ]) for i in range(10) ]) number_reconstruction = PIL.Image.fromarray( tile_raster_images( stacked, (self.image_height, self.image_width), (10, 30))) number_reconstruction.save( self.outdir + 'gsn_image_reconstruction_epoch_' + str(counter) + '.png') #save gsn_params save_params_to_file(counter, self.params, self.outdir, self.logger) # ANNEAL! new_lr = self.learning_rate.get_value() * self.annealing self.learning_rate.set_value(new_lr) new_hidden_sigma = self.hidden_add_noise_sigma.get_value( ) * self.noise_annealing self.hidden_add_noise_sigma.set_value(new_hidden_sigma) new_salt_pepper = self.input_salt_and_pepper.get_value( ) * self.noise_annealing self.input_salt_and_pepper.set_value(new_salt_pepper)
def train(self, train_X=None, train_Y=None, valid_X=None, valid_Y=None, test_X=None, test_Y=None, is_artificial=False, artificial_sequence=1, continue_training=False): log.maybeLog(self.logger, "\nTraining---------\n") if train_X is None: log.maybeLog(self.logger, "Training using data given during initialization of RNN-GSN.\n") train_X = self.train_X train_Y = self.train_Y if train_X is None: log.maybeLog(self.logger, "\nPlease provide a training dataset!\n") raise AssertionError("Please provide a training dataset!") else: log.maybeLog(self.logger, "Training using data provided to training function.\n") if valid_X is None: valid_X = self.valid_X valid_Y = self.valid_Y if test_X is None: test_X = self.test_X test_Y = self.test_Y # Input data - make sure it is a list of shared datasets train_X = raise_to_list(train_X) train_Y = raise_to_list(train_Y) valid_X = raise_to_list(valid_X) valid_Y = raise_to_list(valid_Y) test_X = raise_to_list(test_X) test_Y = raise_to_list(test_Y) ########################################################## # Train the GSN first to get good weights initialization # ########################################################## # if self.train_gsn_first: # log.maybeLog(self.logger, "\n\n----------Initially training the GSN---------\n\n") # init_gsn = generative_stochastic_network.GSN(train_X=train_X, valid_X=valid_X, test_X=test_X, args=self.gsn_args, logger=self.logger) # init_gsn.train() ######################################### # If we are using Hessian-free training # ######################################### if self.hessian_free: pass # gradient_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=5000) # cg_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=1000) # valid_dataset = hf_sequence_dataset([valid_X.get_value()], batch_size=None, number_batches=1000) # # s = x_samples # costs = [cost, show_cost] # hf_optimizer(params, [Xs], s, costs, u, ua).train(gradient_dataset, cg_dataset, initial_lambda=1.0, preconditioner=True, validation=valid_dataset) ################################ # If we are using SGD training # ################################ else: log.maybeLog(self.logger, "\n-----------TRAINING RNN-GSN------------\n") # TRAINING STOP = False counter = 0 if not continue_training: self.learning_rate.set_value(self.init_learn_rate) # learning rate times = [] best_cost = float('inf') best_params = None patience = 0 log.maybeLog(self.logger, ['train X size:',str(train_X[0].get_value(borrow=True).shape)]) if valid_X is not None: log.maybeLog(self.logger, ['valid X size:',str(valid_X[0].get_value(borrow=True).shape)]) if test_X is not None: log.maybeLog(self.logger, ['test X size:',str(test_X[0].get_value(borrow=True).shape)]) if self.vis_init: self.bias_list[0].set_value(logit(numpy.clip(0.9,0.001,train_X[0].get_value(borrow=True).mean(axis=0)))) start_time = time.time() while not STOP: counter += 1 t = time.time() log.maybeAppend(self.logger, [counter,'\t']) # if is_artificial: # data.sequence_mnist_data(train_X[0], train_Y[0], valid_X[0], valid_Y[0], test_X[0], test_Y[0], artificial_sequence, rng) #train train_costs = [] train_errors = [] for train_data in train_X: costs_and_errors = data.apply_cost_function_to_dataset(self.f_learn, train_data, self.batch_size) train_costs.extend([cost for (cost, error) in costs_and_errors]) train_errors.extend([error for (cost, error) in costs_and_errors]) log.maybeAppend(self.logger, ['Train:',trunc(numpy.mean(train_costs)),trunc(numpy.mean(train_errors)),'\t']) #valid if valid_X is not None: valid_costs = [] for valid_data in valid_X: cs = data.apply_cost_function_to_dataset(self.f_cost, valid_data, self.batch_size) valid_costs.extend([c for c,e in cs]) log.maybeAppend(self.logger, ['Valid:',trunc(numpy.mean(valid_costs)), '\t']) #test if test_X is not None: test_costs = [] test_errors = [] for test_data in test_X: costs_and_errors = data.apply_cost_function_to_dataset(self.f_cost, test_data, self.batch_size) test_costs.extend([cost for (cost, error) in costs_and_errors]) test_errors.extend([error for (cost, error) in costs_and_errors]) log.maybeAppend(self.logger, ['Test:',trunc(numpy.mean(test_costs)),trunc(numpy.mean(test_errors)), '\t']) #check for early stopping if valid_X is not None: cost = numpy.sum(valid_costs) else: cost = numpy.sum(train_costs) if cost < best_cost*self.early_stop_threshold: patience = 0 best_cost = cost # save the parameters that made it the best best_params = copy_params(self.params) else: patience += 1 if counter >= self.n_epoch or patience >= self.early_stop_length: STOP = True if best_params is not None: restore_params(self.params, best_params) self.save_params('all', counter, self.params) timing = time.time() - t times.append(timing) log.maybeAppend(self.logger, 'time: '+make_time_units_string(timing)+'\t') log.maybeLog(self.logger, 'remaining: '+make_time_units_string((self.n_epoch - counter) * numpy.mean(times))) if (counter % self.save_frequency) == 0 or STOP is True: n_examples = 100 xs_test = test_X[0].get_value(borrow=True)[range(n_examples)] noisy_xs_test = self.f_noise(test_X[0].get_value(borrow=True)[range(n_examples)]) reconstructions = [] for i in xrange(0, len(noisy_xs_test)): recon, recon_cost = self.f_recon(noisy_xs_test[max(0,(i+1)-self.batch_size):i+1]) reconstructions.append(recon) reconstructed = numpy.array(reconstructions) if (self.is_image): # Concatenate stuff # stacked = numpy.vstack([numpy.vstack([xs_test[i*10 : (i+1)*10], noisy_xs_test[i*10 : (i+1)*10], reconstructed[i*10 : (i+1)*10]]) for i in range(10)]) # number_reconstruction = PIL.Image.fromarray(tile_raster_images(stacked, (self.image_height, self.image_width), (10,30))) # number_reconstruction.save(self.outdir+'rnngsn_reconstruction_epoch_'+str(counter)+'.png') #sample_numbers(counter, 'seven') # plot_samples(counter, 'rnngsn') pass #save params self.save_params('all', counter, self.params) # ANNEAL! new_lr = self.learning_rate.get_value() * self.annealing self.learning_rate.set_value(new_lr) new_noise = self.input_salt_and_pepper.get_value() * self.noise_annealing self.input_salt_and_pepper.set_value(new_noise) log.maybeLog(self.logger, "\n------------TOTAL RNN-GSN TRAIN TIME TOOK {0!s}---------".format(make_time_units_string(time.time()-start_time)))
def train_regression(iteration, train_X, train_Y, valid_X, valid_Y, test_X, test_Y): logger.log('-------------TRAINING REGRESSION FOR ITERATION {0!s}-------------'.format(iteration)) # TRAINING n_epoch = state.n_epoch batch_size = state.batch_size STOP = False counter = 0 best_cost = float('inf') best_params = None patience = 0 if iteration == 0: regression_learning_rate.set_value(cast32(state.learning_rate)) # learning rate times = [] logger.log(['learning rate:', regression_learning_rate.get_value()]) logger.log(['train X size:', str(train_X.shape.eval())]) logger.log(['valid X size:', str(valid_X.shape.eval())]) logger.log(['test X size:', str(test_X.shape.eval())]) if state.test_model: # If testing, do not train and go directly to generating samples, parzen window estimation, and inpainting logger.log('Testing : skip training') STOP = True while not STOP: counter += 1 t = time.time() logger.append([counter, '\t']) # shuffle the data # data.sequence_mnist_data(train_X, train_Y, valid_X, valid_Y, test_X, test_Y, dataset, rng) # train train_costs = [] train_errors = [] for i in range(len(train_X.get_value(borrow=True)) / batch_size): xs = [train_X.get_value(borrow=True)[ (i * batch_size) + sequence_idx: ((i + 1) * batch_size) + sequence_idx] for sequence_idx in range(len(Xs))] xs, _ = fix_input_size(xs) _ins = xs # + [sequence_window_size] cost, error = regression_f_learn(*_ins) # print trunc(cost) # print [numpy.asarray(a) for a in f_check(*_ins)] train_costs.append(cost) train_errors.append(error) train_costs = numpy.mean(train_costs) train_errors = numpy.mean(train_errors) logger.append(['rTrain: ', trunc(train_costs), trunc(train_errors), '\t']) with open(regression_train_convergence, 'a') as f: f.write("{0!s},".format(train_costs)) f.write("\n") # valid valid_costs = [] for i in range(len(valid_X.get_value(borrow=True)) / batch_size): xs = [valid_X.get_value(borrow=True)[ (i * batch_size) + sequence_idx: ((i + 1) * batch_size) + sequence_idx] for sequence_idx in range(len(Xs))] xs, _ = fix_input_size(xs) _ins = xs # + [sequence_window_size] cost, _ = regression_f_cost(*_ins) valid_costs.append(cost) valid_costs = numpy.mean(valid_costs) logger.append(['rValid: ', trunc(valid_costs), '\t']) with open(regression_valid_convergence, 'a') as f: f.write("{0!s},".format(valid_costs)) f.write("\n") # test test_costs = [] test_errors = [] for i in range(len(test_X.get_value(borrow=True)) / batch_size): xs = [test_X.get_value(borrow=True)[ (i * batch_size) + sequence_idx: ((i + 1) * batch_size) + sequence_idx] for sequence_idx in range(len(Xs))] xs, _ = fix_input_size(xs) _ins = xs # + [sequence_window_size] cost, error = regression_f_cost(*_ins) test_costs.append(cost) test_errors.append(error) test_costs = numpy.mean(test_costs) test_errors = numpy.mean(test_errors) logger.append(['rTest: ', trunc(test_costs), trunc(test_errors), '\t']) with open(regression_test_convergence, 'a') as f: f.write("{0!s},".format(test_costs)) f.write("\n") # check for early stopping cost = numpy.sum(valid_costs) if cost < best_cost * state.early_stop_threshold: patience = 0 best_cost = cost # keep the best params so far best_params = save_params(regression_params) else: patience += 1 if counter >= n_epoch or patience >= state.early_stop_length: STOP = True if best_params is not None: restore_params(regression_params, best_params) save_params_to_file('regression', counter, regression_params, iteration) logger.log(["next learning rate should be", regression_learning_rate.get_value() * annealing]) timing = time.time() - t times.append(timing) logger.append('time: ' + make_time_units_string(timing)) logger.log('remaining: ' + make_time_units_string((n_epoch - counter) * numpy.mean(times))) if (counter % state.save_frequency) == 0 or STOP is True: n_examples = 100 + sequence_window_size # Checking reconstruction # grab 100 numbers in the sequence from the test set nums = test_X.get_value()[range(n_examples)] noisy_nums = f_noise(test_X.get_value()[range(n_examples)]) reconstructed_prediction = [] reconstructed = [] for i in range(n_examples): if i >= sequence_window_size: xs = [noisy_nums[i - x] for x in range(len(Xs))] xs.reverse() _ins = xs # + [sequence_window_size] _outs = f_recon(*_ins) prediction = _outs[0] reconstruction = _outs[1] reconstructed_prediction.append(prediction) reconstructed.append(reconstruction) nums = nums[sequence_window_size:] noisy_nums = noisy_nums[sequence_window_size:] reconstructed_prediction = numpy.array(reconstructed_prediction) reconstructed = numpy.array(reconstructed) # Concatenate stuff stacked = numpy.vstack([numpy.vstack([nums[i * 10: (i + 1) * 10], noisy_nums[i * 10: (i + 1) * 10], reconstructed_prediction[i * 10: (i + 1) * 10], reconstructed[i * 10: (i + 1) * 10]]) for i in range(10)]) number_reconstruction = PIL.Image.fromarray( tile_raster_images(stacked, (root_N_input, root_N_input), (10, 40))) # epoch_number = reduce(lambda x,y : x + y, ['_'] * (4-len(str(counter)))) + str(counter) number_reconstruction.save( outdir + 'regression_number_reconstruction_iteration_' + str(iteration) + '_epoch_' + str( counter) + '.png') # save gsn_params save_params_to_file('regression', counter, regression_params, iteration) # ANNEAL! new_r_lr = regression_learning_rate.get_value() * annealing regression_learning_rate.set_value(new_r_lr)
def train_GSN(iteration, train_X, train_Y, valid_X, valid_Y, test_X, test_Y): logger.log('----------------TRAINING GSN FOR ITERATION ' + str(iteration) + "--------------\n") # TRAINING n_epoch = state.n_epoch batch_size = state.batch_size STOP = False counter = 0 if iteration == 0: learning_rate.set_value(cast32(state.learning_rate)) # learning rate times = [] best_cost = float('inf') best_params = None patience = 0 logger.log(['learning rate:', learning_rate.get_value()]) logger.log(['train X size:', str(train_X.shape.eval())]) logger.log(['valid X size:', str(valid_X.shape.eval())]) logger.log(['test X size:', str(test_X.shape.eval())]) if state.vis_init: bias_list[0].set_value(logit(numpy.clip(0.9, 0.001, train_X.get_value().mean(axis=0)))) if state.test_model: # If testing, do not train and go directly to generating samples, parzen window estimation, and inpainting logger.log('Testing : skip training') STOP = True while not STOP: counter += 1 t = time.time() logger.append([counter, '\t']) # shuffle the data # data.sequence_mnist_data(train_X, train_Y, valid_X, valid_Y, test_X, test_Y, dataset, rng) # train train_costs = [] train_errors = [] if iteration == 0: for i in range(len(train_X.get_value(borrow=True)) / batch_size): x = train_X.get_value(borrow=True)[i * batch_size: (i + 1) * batch_size] cost, error = gsn_f_learn_init(x) train_costs.append([cost]) train_errors.append([error]) else: for i in range(len(train_X.get_value(borrow=True)) / batch_size): xs = [train_X.get_value(borrow=True)[ (i * batch_size) + sequence_idx: ((i + 1) * batch_size) + sequence_idx] for sequence_idx in range(len(Xs))] xs, _ = fix_input_size(xs) _ins = xs # + [sequence_window_size] cost, error = gsn_f_learn(*_ins) train_costs.append(cost) train_errors.append(error) train_costs = numpy.mean(train_costs) train_errors = numpy.mean(train_errors) logger.append(['Train: ', trunc(train_costs), trunc(train_errors), '\t']) with open(train_convergence, 'a') as f: f.write("{0!s},".format(train_costs)) f.write("\n") # valid valid_costs = [] if iteration == 0: for i in range(len(valid_X.get_value(borrow=True)) / batch_size): x = valid_X.get_value(borrow=True)[i * batch_size: (i + 1) * batch_size] cost, _ = gsn_f_cost_init(x) valid_costs.append([cost]) else: for i in range(len(valid_X.get_value(borrow=True)) / batch_size): xs = [valid_X.get_value(borrow=True)[ (i * batch_size) + sequence_idx: ((i + 1) * batch_size) + sequence_idx] for sequence_idx in range(len(Xs))] xs, _ = fix_input_size(xs) _ins = xs # + [sequence_window_size] costs, _ = gsn_f_cost(*_ins) valid_costs.append(costs) valid_costs = numpy.mean(valid_costs) logger.append(['Valid: ', trunc(valid_costs), '\t']) with open(valid_convergence, 'a') as f: f.write("{0!s},".format(valid_costs)) f.write("\n") # test test_costs = [] test_errors = [] if iteration == 0: for i in range(len(test_X.get_value(borrow=True)) / batch_size): x = test_X.get_value(borrow=True)[i * batch_size: (i + 1) * batch_size] cost, error = gsn_f_cost_init(x) test_costs.append([cost]) test_errors.append([error]) else: for i in range(len(test_X.get_value(borrow=True)) / batch_size): xs = [test_X.get_value(borrow=True)[ (i * batch_size) + sequence_idx: ((i + 1) * batch_size) + sequence_idx] for sequence_idx in range(len(Xs))] xs, _ = fix_input_size(xs) _ins = xs # + [sequence_window_size] costs, errors = gsn_f_cost(*_ins) test_costs.append(costs) test_errors.append(errors) test_costs = numpy.mean(test_costs) test_errors = numpy.mean(test_errors) logger.append(['Test: ', trunc(test_costs), trunc(test_errors), '\t']) with open(test_convergence, 'a') as f: f.write("{0!s},".format(test_costs)) f.write("\n") # check for early stopping cost = numpy.sum(valid_costs) if cost < best_cost * state.early_stop_threshold: patience = 0 best_cost = cost # save the parameters that made it the best best_params = save_params(gsn_params) else: patience += 1 if counter >= n_epoch or patience >= state.early_stop_length: STOP = True if best_params is not None: restore_params(gsn_params, best_params) save_params_to_file('gsn', counter, gsn_params, iteration) logger.log(["next learning rate should be", learning_rate.get_value() * annealing]) timing = time.time() - t times.append(timing) logger.append('time: ' + make_time_units_string(timing)) logger.log('remaining: ' + make_time_units_string((n_epoch - counter) * numpy.mean(times))) if (counter % state.save_frequency) == 0 or STOP is True: n_examples = 100 if iteration == 0: random_idx = numpy.array(R.sample(range(len(test_X.get_value())), n_examples)) numbers = test_X.get_value()[random_idx] noisy_numbers = f_noise(test_X.get_value()[random_idx]) reconstructed = f_recon_init(noisy_numbers) # Concatenate stuff stacked = numpy.vstack([numpy.vstack( [numbers[i * 10: (i + 1) * 10], noisy_numbers[i * 10: (i + 1) * 10], reconstructed[i * 10: (i + 1) * 10]]) for i in range(10)]) number_reconstruction = PIL.Image.fromarray( tile_raster_images(stacked, (root_N_input, root_N_input), (10, 30))) else: n_examples = n_examples + sequence_window_size # Checking reconstruction # grab 100 numbers in the sequence from the test set nums = test_X.get_value()[range(n_examples)] noisy_nums = f_noise(test_X.get_value()[range(n_examples)]) reconstructed_prediction = [] reconstructed = [] for i in range(n_examples): if i >= sequence_window_size: xs = [noisy_nums[i - x] for x in range(len(Xs))] xs.reverse() _ins = xs # + [sequence_window_size] _outs = f_recon(*_ins) prediction = _outs[0] reconstruction = _outs[1] reconstructed_prediction.append(prediction) reconstructed.append(reconstruction) nums = nums[sequence_window_size:] noisy_nums = noisy_nums[sequence_window_size:] reconstructed_prediction = numpy.array(reconstructed_prediction) reconstructed = numpy.array(reconstructed) # Concatenate stuff stacked = numpy.vstack([numpy.vstack([nums[i * 10: (i + 1) * 10], noisy_nums[i * 10: (i + 1) * 10], reconstructed_prediction[i * 10: (i + 1) * 10], reconstructed[i * 10: (i + 1) * 10]]) for i in range(10)]) number_reconstruction = PIL.Image.fromarray( tile_raster_images(stacked, (root_N_input, root_N_input), (10, 40))) # epoch_number = reduce(lambda x,y : x + y, ['_'] * (4-len(str(counter)))) + str(counter) number_reconstruction.save( outdir + 'gsn_number_reconstruction_iteration_' + str(iteration) + '_epoch_' + str( counter) + '.png') # sample_numbers(counter, 'seven') plot_samples(counter, iteration) # save gsn_params save_params_to_file('gsn', counter, gsn_params, iteration) # ANNEAL! new_lr = learning_rate.get_value() * annealing learning_rate.set_value(new_lr) # 10k samples logger.log('Generating 10,000 samples') samples, _ = sample_some_numbers(N=10000) f_samples = outdir + 'samples.npy' numpy.save(f_samples, samples) logger.log('saved digits')
def train(self, train_X=None, train_Y=None, valid_X=None, valid_Y=None, test_X=None, test_Y=None, is_artificial=False, artificial_sequence=1, continue_training=False): log.maybeLog(self.logger, "\nTraining---------\n") if train_X is None: log.maybeLog( self.logger, "Training using data given during initialization of RNN-GSN.\n" ) train_X = self.train_X train_Y = self.train_Y if train_X is None: log.maybeLog(self.logger, "\nPlease provide a training dataset!\n") raise AssertionError("Please provide a training dataset!") else: log.maybeLog( self.logger, "Training using data provided to training function.\n") if valid_X is None: valid_X = self.valid_X valid_Y = self.valid_Y if test_X is None: test_X = self.test_X test_Y = self.test_Y ########################################################## # Train the GSN first to get good weights initialization # ########################################################## if self.train_gsn_first: log.maybeLog( self.logger, "\n\n----------Initially training the GSN---------\n\n") init_gsn = generative_stochastic_network.GSN(train_X=train_X, valid_X=valid_X, test_X=test_X, args=self.gsn_args, logger=self.logger) init_gsn.train() ############################# # Save the model parameters # ############################# def save_params_to_file(name, n, gsn_params): pass print 'saving parameters...' save_path = self.outdir + name + '_params_epoch_' + str(n) + '.pkl' f = open(save_path, 'wb') try: cPickle.dump(gsn_params, f, protocol=cPickle.HIGHEST_PROTOCOL) finally: f.close() def save_params(params): values = [param.get_value(borrow=True) for param in params] return values def restore_params(params, values): for i in range(len(params)): params[i].set_value(values[i]) ######################################### # If we are using Hessian-free training # ######################################### if self.hessian_free: pass # gradient_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=5000) # cg_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=1000) # valid_dataset = hf_sequence_dataset([valid_X.get_value()], batch_size=None, number_batches=1000) # # s = x_samples # costs = [cost, show_cost] # hf_optimizer(params, [Xs], s, costs, u, ua).train(gradient_dataset, cg_dataset, initial_lambda=1.0, preconditioner=True, validation=valid_dataset) ################################ # If we are using SGD training # ################################ else: log.maybeLog(self.logger, "\n-----------TRAINING RNN-GSN------------\n") # TRAINING STOP = False counter = 0 if not continue_training: self.learning_rate.set_value( self.init_learn_rate) # learning rate times = [] best_cost = float('inf') best_params = None patience = 0 log.maybeLog( self.logger, ['train X size:', str(train_X.shape.eval())]) if valid_X is not None: log.maybeLog(self.logger, ['valid X size:', str(valid_X.shape.eval())]) if test_X is not None: log.maybeLog( self.logger, ['test X size:', str(test_X.shape.eval())]) if self.vis_init: self.bias_list[0].set_value( logit( numpy.clip(0.9, 0.001, train_X.get_value().mean(axis=0)))) while not STOP: counter += 1 t = time.time() log.maybeAppend(self.logger, [counter, '\t']) if is_artificial: data.sequence_mnist_data(train_X, train_Y, valid_X, valid_Y, test_X, test_Y, artificial_sequence, rng) #train train_costs = data.apply_cost_function_to_dataset( self.f_learn, train_X, self.batch_size) # record it log.maybeAppend(self.logger, ['Train:', trunc(train_costs), '\t']) #valid valid_costs = data.apply_cost_function_to_dataset( self.f_cost, valid_X, self.batch_size) # record it log.maybeAppend(self.logger, ['Valid:', trunc(valid_costs), '\t']) #test test_costs = data.apply_cost_function_to_dataset( self.f_cost, test_X, self.batch_size) # record it log.maybeAppend(self.logger, ['Test:', trunc(test_costs), '\t']) #check for early stopping cost = numpy.sum(valid_costs) if cost < best_cost * self.early_stop_threshold: patience = 0 best_cost = cost # save the parameters that made it the best best_params = save_params(self.params) else: patience += 1 if counter >= self.n_epoch or patience >= self.early_stop_length: STOP = True if best_params is not None: restore_params(self.params, best_params) save_params_to_file('all', counter, self.params) timing = time.time() - t times.append(timing) log.maybeAppend( self.logger, 'time: ' + make_time_units_string(timing) + '\t') log.maybeLog( self.logger, 'remaining: ' + make_time_units_string( (self.n_epoch - counter) * numpy.mean(times))) if (counter % self.save_frequency) == 0 or STOP is True: n_examples = 100 nums = test_X.get_value(borrow=True)[range(n_examples)] noisy_nums = self.f_noise( test_X.get_value(borrow=True)[range(n_examples)]) reconstructions = [] for i in xrange(0, len(noisy_nums)): recon = self.f_recon( noisy_nums[max(0, (i + 1) - self.batch_size):i + 1]) reconstructions.append(recon) reconstructed = numpy.array(reconstructions) # Concatenate stuff stacked = numpy.vstack([ numpy.vstack([ nums[i * 10:(i + 1) * 10], noisy_nums[i * 10:(i + 1) * 10], reconstructed[i * 10:(i + 1) * 10] ]) for i in range(10) ]) number_reconstruction = PIL.Image.fromarray( tile_raster_images( stacked, (self.root_N_input, self.root_N_input), (10, 30))) number_reconstruction.save( self.outdir + 'rnngsn_number_reconstruction_epoch_' + str(counter) + '.png') #save params save_params_to_file('all', counter, self.params) # ANNEAL! new_lr = self.learning_rate.get_value() * self.annealing self.learning_rate.set_value(new_lr)