示例#1
0
    def train(self, train_X=None, train_Y=None, valid_X=None, valid_Y=None, test_X=None, test_Y=None, is_artificial=False, artificial_sequence=1, continue_training=False):
        log.maybeLog(self.logger, "\nTraining---------\n")
        if train_X is None:
            log.maybeLog(self.logger, "Training using data given during initialization of RNN-GSN.\n")
            train_X = self.train_X
            train_Y = self.train_Y
            if train_X is None:
                log.maybeLog(self.logger, "\nPlease provide a training dataset!\n")
                raise AssertionError("Please provide a training dataset!")
        else:
            log.maybeLog(self.logger, "Training using data provided to training function.\n")
        if valid_X is None:
            valid_X = self.valid_X
            valid_Y = self.valid_Y
        if test_X is None:
            test_X  = self.test_X
            test_Y  = self.test_Y
            
        ##########################################################
        # Train the GSN first to get good weights initialization #
        ##########################################################
        if self.train_gsn_first:
            log.maybeLog(self.logger, "\n\n----------Initially training the GSN---------\n\n")
            init_gsn = generative_stochastic_network.GSN(train_X=train_X, valid_X=valid_X, test_X=test_X, args=self.gsn_args, logger=self.logger)
            init_gsn.train()
    
        #############################
        # Save the model parameters #
        #############################
        def save_params_to_file(name, n, gsn_params):
            pass
            print 'saving parameters...'
            save_path = self.outdir+name+'_params_epoch_'+str(n)+'.pkl'
            f = open(save_path, 'wb')
            try:
                cPickle.dump(gsn_params, f, protocol=cPickle.HIGHEST_PROTOCOL)
            finally:
                f.close()
                
        def save_params(params):
            values = [param.get_value(borrow=True) for param in params]
            return values
        
        def restore_params(params, values):
            for i in range(len(params)):
                params[i].set_value(values[i])
    
        
        #########################################
        # If we are using Hessian-free training #
        #########################################
        if self.hessian_free:
            pass
#         gradient_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=5000)
#         cg_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=1000)
#         valid_dataset = hf_sequence_dataset([valid_X.get_value()], batch_size=None, number_batches=1000)
#         
#         s = x_samples
#         costs = [cost, show_cost]
#         hf_optimizer(params, [Xs], s, costs, u, ua).train(gradient_dataset, cg_dataset, initial_lambda=1.0, preconditioner=True, validation=valid_dataset)
        
        ################################
        # If we are using SGD training #
        ################################
        else:
            log.maybeLog(self.logger, "\n-----------TRAINING RNN-GSN------------\n")
            # TRAINING
            STOP        =   False
            counter     =   0
            if not continue_training:
                self.learning_rate.set_value(self.init_learn_rate)  # learning rate
            times = []
            best_cost = float('inf')
            best_params = None
            patience = 0
                        
            log.maybeLog(self.logger, ['train X size:',str(train_X.shape.eval())])
            if valid_X is not None:
                log.maybeLog(self.logger, ['valid X size:',str(valid_X.shape.eval())])
            if test_X is not None:
                log.maybeLog(self.logger, ['test X size:',str(test_X.shape.eval())])
            
            if self.vis_init:
                self.bias_list[0].set_value(logit(numpy.clip(0.9,0.001,train_X.get_value().mean(axis=0))))
        
            while not STOP:
                counter += 1
                t = time.time()
                log.maybeAppend(self.logger, [counter,'\t'])
                    
                if is_artificial:
                    data.sequence_mnist_data(train_X, train_Y, valid_X, valid_Y, test_X, test_Y, artificial_sequence, rng)
                     
                #train
                train_costs = data.apply_cost_function_to_dataset(self.f_learn, train_X, self.batch_size)
                # record it
                log.maybeAppend(self.logger, ['Train:',trunc(train_costs),'\t'])
         
         
                #valid
                valid_costs = data.apply_cost_function_to_dataset(self.f_cost, valid_X, self.batch_size)
                # record it
                log.maybeAppend(self.logger, ['Valid:',trunc(valid_costs), '\t'])
         
         
                #test
                test_costs = data.apply_cost_function_to_dataset(self.f_cost, test_X, self.batch_size)
                # record it 
                log.maybeAppend(self.logger, ['Test:',trunc(test_costs), '\t'])
                 
                 
                #check for early stopping
                cost = numpy.sum(valid_costs)
                if cost < best_cost*self.early_stop_threshold:
                    patience = 0
                    best_cost = cost
                    # save the parameters that made it the best
                    best_params = save_params(self.params)
                else:
                    patience += 1
         
                if counter >= self.n_epoch or patience >= self.early_stop_length:
                    STOP = True
                    if best_params is not None:
                        restore_params(self.params, best_params)
                    save_params_to_file('all', counter, self.params)
         
                timing = time.time() - t
                times.append(timing)
         
                log.maybeAppend(self.logger, 'time: '+make_time_units_string(timing)+'\t')
            
                log.maybeLog(self.logger, 'remaining: '+make_time_units_string((self.n_epoch - counter) * numpy.mean(times)))
        
                if (counter % self.save_frequency) == 0 or STOP is True:
                    n_examples = 100
                    nums = test_X.get_value(borrow=True)[range(n_examples)]
                    noisy_nums = self.f_noise(test_X.get_value(borrow=True)[range(n_examples)])
                    reconstructions = []
                    for i in xrange(0, len(noisy_nums)):
                        recon = self.f_recon(noisy_nums[max(0,(i+1)-self.batch_size):i+1])
                        reconstructions.append(recon)
                    reconstructed = numpy.array(reconstructions)

                    # Concatenate stuff
                    stacked = numpy.vstack([numpy.vstack([nums[i*10 : (i+1)*10], noisy_nums[i*10 : (i+1)*10], reconstructed[i*10 : (i+1)*10]]) for i in range(10)])
                    number_reconstruction = PIL.Image.fromarray(tile_raster_images(stacked, (self.root_N_input,self.root_N_input), (10,30)))
                        
                    number_reconstruction.save(self.outdir+'rnngsn_number_reconstruction_epoch_'+str(counter)+'.png')
                    
                    #save params
                    save_params_to_file('all', counter, self.params)
             
                # ANNEAL!
                new_lr = self.learning_rate.get_value() * self.annealing
                self.learning_rate.set_value(new_lr)
示例#2
0
    def train(self, train_X=None, train_Y=None, valid_X=None, valid_Y=None, test_X=None, test_Y=None, is_artificial=False, artificial_sequence=1, continue_training=False):
        log.maybeLog(self.logger, "\nTraining---------\n")
        if train_X is None:
            log.maybeLog(self.logger, "Training using data given during initialization of RNN-GSN.\n")
            train_X = self.train_X
            train_Y = self.train_Y
            if train_X is None:
                log.maybeLog(self.logger, "\nPlease provide a training dataset!\n")
                raise AssertionError("Please provide a training dataset!")
        else:
            log.maybeLog(self.logger, "Training using data provided to training function.\n")
        if valid_X is None:
            valid_X = self.valid_X
            valid_Y = self.valid_Y
        if test_X is None:
            test_X  = self.test_X
            test_Y  = self.test_Y
            
        # Input data - make sure it is a list of shared datasets
        train_X = raise_to_list(train_X)
        train_Y = raise_to_list(train_Y)
        valid_X = raise_to_list(valid_X)
        valid_Y = raise_to_list(valid_Y)
        test_X  = raise_to_list(test_X)
        test_Y =  raise_to_list(test_Y)
            
        ##########################################################
        # Train the GSN first to get good weights initialization #
        ##########################################################
        if self.train_gsn_first:
            log.maybeLog(self.logger, "\n\n----------Initially training the GSN---------\n\n")
            # init_gsn = GSN(train_X=train_X, valid_X=valid_X, test_X=test_X, state=self.gsn_args, logger=self.logger)
            # init_gsn.train()
            print "NOT IMPLEMENTED"
    
        
        #########################################
        # If we are using Hessian-free training #
        #########################################
        if self.hessian_free:
            pass
#         gradient_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=5000)
#         cg_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=1000)
#         valid_dataset = hf_sequence_dataset([valid_X.get_value()], batch_size=None, number_batches=1000)
#         
#         s = x_samples
#         costs = [cost, show_cost]
#         hf_optimizer(params, [Xs], s, costs, u, ua).train(gradient_dataset, cg_dataset, initial_lambda=1.0, preconditioner=True, validation=valid_dataset)
        
        ################################
        # If we are using SGD training #
        ################################
        else:
            log.maybeLog(self.logger, "\n-----------TRAINING RNN-GSN------------\n")
            # TRAINING
            STOP        =   False
            counter     =   0
            if not continue_training:
                self.learning_rate.set_value(self.init_learn_rate)  # learning rate
            times = []
            best_cost = float('inf')
            best_params = None
            patience = 0
                        
            log.maybeLog(self.logger, ['train X size:',str(train_X[0].get_value(borrow=True).shape)])
            if valid_X is not None:
                log.maybeLog(self.logger, ['valid X size:',str(valid_X[0].get_value(borrow=True).shape)])
            if test_X is not None:
                log.maybeLog(self.logger, ['test X size:',str(test_X[0].get_value(borrow=True).shape)])
            
            if self.vis_init:
                self.bias_list[0].set_value(logit(numpy.clip(0.9,0.001,train_X[0].get_value(borrow=True).mean(axis=0))))
                
            start_time = time.time()
        
            while not STOP:
                counter += 1
                t = time.time()
                log.maybeAppend(self.logger, [counter,'\t'])
                    
#                 if is_artificial:
#                     data.sequence_mnist_data(train_X[0], train_Y[0], valid_X[0], valid_Y[0], test_X[0], test_Y[0], artificial_sequence, rng)
                     
                #train
                train_costs = []
                train_errors = []
                for train_data in train_X:
                    costs_and_errors = data.apply_cost_function_to_dataset(self.f_learn, train_data, self.batch_size)
                    train_costs.extend([cost for (cost, error) in costs_and_errors])
                    train_errors.extend([error for (cost, error) in costs_and_errors])
                log.maybeAppend(self.logger, ['Train:',trunc(numpy.mean(train_costs)),trunc(numpy.mean(train_errors)),'\t'])
         
         
                #valid
                if valid_X is not None:
                    valid_costs = []
                    for valid_data in valid_X:
                        cs = data.apply_cost_function_to_dataset(self.f_cost, valid_data, self.batch_size)
                        valid_costs.extend([c for c,e in cs])
                    log.maybeAppend(self.logger, ['Valid:',trunc(numpy.mean(valid_costs)), '\t'])
         
         
                #test
                if test_X is not None:
                    test_costs = []
                    test_errors = []
                    for test_data in test_X:
                        costs_and_errors = data.apply_cost_function_to_dataset(self.f_cost, test_data, self.batch_size)
                        test_costs.extend([cost for (cost, error) in costs_and_errors])
                        test_errors.extend([error for (cost, error) in costs_and_errors])
                    log.maybeAppend(self.logger, ['Test:',trunc(numpy.mean(test_costs)),trunc(numpy.mean(test_errors)), '\t'])
                
                 
                #check for early stopping
                if valid_X is not None:
                    cost = numpy.sum(valid_costs)
                else:
                    cost = numpy.sum(train_costs)
                if cost < best_cost*self.early_stop_threshold:
                    patience = 0
                    best_cost = cost
                    # save the parameters that made it the best
                    best_params = copy_params(self.params)
                else:
                    patience += 1
         
                if counter >= self.n_epoch or patience >= self.early_stop_length:
                    STOP = True
                    if best_params is not None:
                        restore_params(self.params, best_params)
                    self.save_params('all', counter, self.params)
         
                timing = time.time() - t
                times.append(timing)
         
                log.maybeAppend(self.logger, 'time: '+make_time_units_string(timing)+'\t')
            
                log.maybeLog(self.logger, 'remaining: '+make_time_units_string((self.n_epoch - counter) * numpy.mean(times)))
        
                if (counter % self.save_frequency) == 0 or STOP is True:
                    n_examples = 100
                    xs_test = test_X[0].get_value(borrow=True)[range(n_examples)]
                    noisy_xs_test = self.f_noise(test_X[0].get_value(borrow=True)[range(n_examples)])
                    reconstructions = []
                    for i in xrange(0, len(noisy_xs_test)):
                        recon, recon_cost = self.f_recon(noisy_xs_test[max(0,(i+1)-self.batch_size):i+1])
                        reconstructions.append(recon[-1])
                    reconstructed = numpy.array(reconstructions)
                    if (self.is_image):
                        # Concatenate stuff
                        stacked = numpy.vstack([numpy.vstack([xs_test[i*10 : (i+1)*10], noisy_xs_test[i*10 : (i+1)*10], reconstructed[i*10 : (i+1)*10]]) for i in range(10)])
                        number_reconstruction = PIL.Image.fromarray(tile_raster_images(stacked, (self.image_height, self.image_width), (10,30)))
                            
                        number_reconstruction.save(self.outdir+'rnngsn_reconstruction_epoch_'+str(counter)+'.png')
            
                        #sample_numbers(counter, 'seven')
                        # plot_samples(counter, 'rnngsn')

            
                    #save params
                    self.save_params('all', counter, self.params)
             
                # ANNEAL!
                new_lr = self.learning_rate.get_value() * self.annealing
                self.learning_rate.set_value(new_lr)
                
                new_noise = self.input_salt_and_pepper.get_value() * self.noise_annealing
                self.input_salt_and_pepper.set_value(new_noise)
                
            log.maybeLog(self.logger, "\n------------TOTAL RNN-GSN TRAIN TIME TOOK {0!s}---------".format(make_time_units_string(time.time()-start_time)))
 def train(self, train_X=None, valid_X=None, test_X=None, continue_training=False):
     log.maybeLog(self.logger, "\nTraining---------\n")
     if train_X is None:
         log.maybeLog(self.logger, "Training using data given during initialization of GSN.\n")
         train_X = self.train_X
         if train_X is None:
             log.maybeLog(self.logger, "\nPlease provide a training dataset!\n")
             raise AssertionError("Please provide a training dataset!")
     else:
         log.maybeLog(self.logger, "Training using data provided to training function.\n")
     if valid_X is None:
         valid_X = self.valid_X
     if test_X is None:
         test_X  = self.test_X
         
     train_X = raise_data_to_list(train_X)
     valid_X = raise_data_to_list(valid_X)
     test_X  = raise_data_to_list(test_X)
         
     
     ############
     # TRAINING #
     ############
     log.maybeLog(self.logger, "-----------TRAINING GSN FOR {0!s} EPOCHS-----------".format(self.n_epoch))
     STOP        = False
     counter     = 0
     if not continue_training:
         self.learning_rate.set_value(self.init_learn_rate)  # learning rate
     times       = []
     best_cost   = float('inf')
     best_params = None
     patience    = 0
                 
     log.maybeLog(self.logger, ['train X size:',str(train_X[0].shape.eval())])
     if valid_X is not None:
         log.maybeLog(self.logger, ['valid X size:',str(valid_X[0].shape.eval())])
     if test_X is not None:
         log.maybeLog(self.logger, ['test X size:',str(test_X[0].shape.eval())])
     
     if self.vis_init:
         self.bias_list[0].set_value(logit(numpy.clip(0.9,0.001,train_X[0].get_value().mean(axis=0))))
 
     while not STOP:
         counter += 1
         t = time.time()
         log.maybeAppend(self.logger, [counter,'\t'])
         
         #train
         train_costs = data.apply_cost_function_to_dataset(self.f_learn, train_X, self.batch_size)
         log.maybeAppend(self.logger, ['Train:',trunc(numpy.mean(train_costs)), '\t'])
 
         #valid
         if valid_X is not None:
             valid_costs = data.apply_cost_function_to_dataset(self.f_cost, valid_X, self.batch_size)
             log.maybeAppend(self.logger, ['Valid:',trunc(numpy.mean(valid_costs)), '\t'])
 
         #test
         if test_X is not None:
             test_costs = data.apply_cost_function_to_dataset(self.f_cost, test_X, self.batch_size)
             log.maybeAppend(self.logger, ['Test:',trunc(numpy.mean(test_costs)), '\t'])
             
         #check for early stopping
         if valid_X is not None:
             cost = numpy.sum(valid_costs)
         else:
             cost = numpy.sum(train_costs)
         if cost < best_cost*self.early_stop_threshold:
             patience = 0
             best_cost = cost
             # save the parameters that made it the best
             best_params = save_params(self.params)
         else:
             patience += 1
 
         if counter >= self.n_epoch or patience >= self.early_stop_length:
             STOP = True
             if best_params is not None:
                 restore_params(self.params, best_params)
             save_params_to_file(counter, self.params, self.outdir, self.logger)
 
         timing = time.time() - t
         times.append(timing)
 
         log.maybeAppend(self.logger, 'time: '+make_time_units_string(timing)+'\t')
         
         log.maybeLog(self.logger, 'remaining: '+make_time_units_string((self.n_epoch - counter) * numpy.mean(times)))
     
         if (counter % self.save_frequency) == 0 or STOP is True:
             if self.is_image:
                 n_examples = 100
                 tests = test_X.get_value()[0:n_examples]
                 noisy_tests = self.f_noise(test_X.get_value()[0:n_examples])
                 _, reconstructed = self.f_recon(noisy_tests) 
                 # Concatenate stuff if it is an image
                 stacked = numpy.vstack([numpy.vstack([tests[i*10 : (i+1)*10], noisy_tests[i*10 : (i+1)*10], reconstructed[i*10 : (i+1)*10]]) for i in range(10)])
                 number_reconstruction = PIL.Image.fromarray(tile_raster_images(stacked, (self.image_height,self.image_width), (10,30)))
                 
                 number_reconstruction.save(self.outdir+'gsn_image_reconstruction_epoch_'+str(counter)+'.png')
     
             #save gsn_params
             save_params_to_file(counter, self.params, self.outdir, self.logger)
      
         # ANNEAL!
         new_lr = self.learning_rate.get_value() * self.annealing
         self.learning_rate.set_value(new_lr)
         
         new_hidden_sigma = self.hidden_add_noise_sigma.get_value() * self.noise_annealing
         self.hidden_add_noise_sigma.set_value(new_hidden_sigma)
         
         new_salt_pepper = self.input_salt_and_pepper.get_value() * self.noise_annealing
         self.input_salt_and_pepper.set_value(new_salt_pepper)
示例#4
0
    def train(self,
              train_X=None,
              valid_X=None,
              test_X=None,
              continue_training=False):
        log.maybeLog(self.logger, "\nTraining---------\n")
        if train_X is None:
            log.maybeLog(
                self.logger,
                "Training using data given during initialization of GSN.\n")
            train_X = self.train_X
            if train_X is None:
                log.maybeLog(self.logger,
                             "\nPlease provide a training dataset!\n")
                raise AssertionError("Please provide a training dataset!")
        else:
            log.maybeLog(
                self.logger,
                "Training using data provided to training function.\n")
        if valid_X is None:
            valid_X = self.valid_X
        if test_X is None:
            test_X = self.test_X

        train_X = raise_data_to_list(train_X)
        valid_X = raise_data_to_list(valid_X)
        test_X = raise_data_to_list(test_X)

        ############
        # TRAINING #
        ############
        log.maybeLog(
            self.logger,
            "-----------TRAINING GSN FOR {0!s} EPOCHS-----------".format(
                self.n_epoch))
        STOP = False
        counter = 0
        if not continue_training:
            self.learning_rate.set_value(self.init_learn_rate)  # learning rate
        times = []
        best_cost = float('inf')
        best_params = None
        patience = 0

        log.maybeLog(
            self.logger,
            ['train X size:', str(train_X[0].shape.eval())])
        if valid_X is not None:
            log.maybeLog(self.logger,
                         ['valid X size:',
                          str(valid_X[0].shape.eval())])
        if test_X is not None:
            log.maybeLog(
                self.logger,
                ['test X size:', str(test_X[0].shape.eval())])

        if self.vis_init:
            self.bias_list[0].set_value(
                logit(
                    numpy.clip(0.9, 0.001,
                               train_X[0].get_value().mean(axis=0))))

        while not STOP:
            counter += 1
            t = time.time()
            log.maybeAppend(self.logger, [counter, '\t'])

            #train
            train_costs = data.apply_cost_function_to_dataset(
                self.f_learn, train_X, self.batch_size)
            log.maybeAppend(
                self.logger,
                ['Train:', trunc(numpy.mean(train_costs)), '\t'])

            #valid
            if valid_X is not None:
                valid_costs = data.apply_cost_function_to_dataset(
                    self.f_cost, valid_X, self.batch_size)
                log.maybeAppend(
                    self.logger,
                    ['Valid:', trunc(numpy.mean(valid_costs)), '\t'])

            #test
            if test_X is not None:
                test_costs = data.apply_cost_function_to_dataset(
                    self.f_cost, test_X, self.batch_size)
                log.maybeAppend(
                    self.logger,
                    ['Test:', trunc(numpy.mean(test_costs)), '\t'])

            #check for early stopping
            if valid_X is not None:
                cost = numpy.sum(valid_costs)
            else:
                cost = numpy.sum(train_costs)
            if cost < best_cost * self.early_stop_threshold:
                patience = 0
                best_cost = cost
                # save the parameters that made it the best
                best_params = save_params(self.params)
            else:
                patience += 1

            if counter >= self.n_epoch or patience >= self.early_stop_length:
                STOP = True
                if best_params is not None:
                    restore_params(self.params, best_params)
                save_params_to_file(counter, self.params, self.outdir,
                                    self.logger)

            timing = time.time() - t
            times.append(timing)

            log.maybeAppend(self.logger,
                            'time: ' + make_time_units_string(timing) + '\t')

            log.maybeLog(
                self.logger, 'remaining: ' + make_time_units_string(
                    (self.n_epoch - counter) * numpy.mean(times)))

            if (counter % self.save_frequency) == 0 or STOP is True:
                if self.is_image:
                    n_examples = 100
                    tests = test_X.get_value()[0:n_examples]
                    noisy_tests = self.f_noise(
                        test_X.get_value()[0:n_examples])
                    _, reconstructed = self.f_recon(noisy_tests)
                    # Concatenate stuff if it is an image
                    stacked = numpy.vstack([
                        numpy.vstack([
                            tests[i * 10:(i + 1) * 10],
                            noisy_tests[i * 10:(i + 1) * 10],
                            reconstructed[i * 10:(i + 1) * 10]
                        ]) for i in range(10)
                    ])
                    number_reconstruction = PIL.Image.fromarray(
                        tile_raster_images(
                            stacked, (self.image_height, self.image_width),
                            (10, 30)))

                    number_reconstruction.save(
                        self.outdir + 'gsn_image_reconstruction_epoch_' +
                        str(counter) + '.png')

                #save gsn_params
                save_params_to_file(counter, self.params, self.outdir,
                                    self.logger)

            # ANNEAL!
            new_lr = self.learning_rate.get_value() * self.annealing
            self.learning_rate.set_value(new_lr)

            new_hidden_sigma = self.hidden_add_noise_sigma.get_value(
            ) * self.noise_annealing
            self.hidden_add_noise_sigma.set_value(new_hidden_sigma)

            new_salt_pepper = self.input_salt_and_pepper.get_value(
            ) * self.noise_annealing
            self.input_salt_and_pepper.set_value(new_salt_pepper)
    def train(self, train_X=None, train_Y=None, valid_X=None, valid_Y=None, test_X=None, test_Y=None, is_artificial=False, artificial_sequence=1, continue_training=False):
        log.maybeLog(self.logger, "\nTraining---------\n")
        if train_X is None:
            log.maybeLog(self.logger, "Training using data given during initialization of RNN-GSN.\n")
            train_X = self.train_X
            train_Y = self.train_Y
            if train_X is None:
                log.maybeLog(self.logger, "\nPlease provide a training dataset!\n")
                raise AssertionError("Please provide a training dataset!")
        else:
            log.maybeLog(self.logger, "Training using data provided to training function.\n")
        if valid_X is None:
            valid_X = self.valid_X
            valid_Y = self.valid_Y
        if test_X is None:
            test_X  = self.test_X
            test_Y  = self.test_Y
            
        # Input data - make sure it is a list of shared datasets
        train_X = raise_to_list(train_X)
        train_Y = raise_to_list(train_Y)
        valid_X = raise_to_list(valid_X)
        valid_Y = raise_to_list(valid_Y)
        test_X  = raise_to_list(test_X)
        test_Y =  raise_to_list(test_Y)
            
        ##########################################################
        # Train the GSN first to get good weights initialization #
        ##########################################################
        # if self.train_gsn_first:
        #     log.maybeLog(self.logger, "\n\n----------Initially training the GSN---------\n\n")
        #     init_gsn = generative_stochastic_network.GSN(train_X=train_X, valid_X=valid_X, test_X=test_X, args=self.gsn_args, logger=self.logger)
        #     init_gsn.train()
    
        
        #########################################
        # If we are using Hessian-free training #
        #########################################
        if self.hessian_free:
            pass
#         gradient_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=5000)
#         cg_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=1000)
#         valid_dataset = hf_sequence_dataset([valid_X.get_value()], batch_size=None, number_batches=1000)
#         
#         s = x_samples
#         costs = [cost, show_cost]
#         hf_optimizer(params, [Xs], s, costs, u, ua).train(gradient_dataset, cg_dataset, initial_lambda=1.0, preconditioner=True, validation=valid_dataset)
        
        ################################
        # If we are using SGD training #
        ################################
        else:
            log.maybeLog(self.logger, "\n-----------TRAINING RNN-GSN------------\n")
            # TRAINING
            STOP        =   False
            counter     =   0
            if not continue_training:
                self.learning_rate.set_value(self.init_learn_rate)  # learning rate
            times = []
            best_cost = float('inf')
            best_params = None
            patience = 0
                        
            log.maybeLog(self.logger, ['train X size:',str(train_X[0].get_value(borrow=True).shape)])
            if valid_X is not None:
                log.maybeLog(self.logger, ['valid X size:',str(valid_X[0].get_value(borrow=True).shape)])
            if test_X is not None:
                log.maybeLog(self.logger, ['test X size:',str(test_X[0].get_value(borrow=True).shape)])
            
            if self.vis_init:
                self.bias_list[0].set_value(logit(numpy.clip(0.9,0.001,train_X[0].get_value(borrow=True).mean(axis=0))))
                
            start_time = time.time()
        
            while not STOP:
                counter += 1
                t = time.time()
                log.maybeAppend(self.logger, [counter,'\t'])
                    
#                 if is_artificial:
#                     data.sequence_mnist_data(train_X[0], train_Y[0], valid_X[0], valid_Y[0], test_X[0], test_Y[0], artificial_sequence, rng)
                     
                #train
                train_costs = []
                train_errors = []
                for train_data in train_X:
                    costs_and_errors = data.apply_cost_function_to_dataset(self.f_learn, train_data, self.batch_size)
                    train_costs.extend([cost for (cost, error) in costs_and_errors])
                    train_errors.extend([error for (cost, error) in costs_and_errors])
                log.maybeAppend(self.logger, ['Train:',trunc(numpy.mean(train_costs)),trunc(numpy.mean(train_errors)),'\t'])
         
         
                #valid
                if valid_X is not None:
                    valid_costs = []
                    for valid_data in valid_X:
                        cs = data.apply_cost_function_to_dataset(self.f_cost, valid_data, self.batch_size)
                        valid_costs.extend([c for c,e in cs])
                    log.maybeAppend(self.logger, ['Valid:',trunc(numpy.mean(valid_costs)), '\t'])
         
         
                #test
                if test_X is not None:
                    test_costs = []
                    test_errors = []
                    for test_data in test_X:
                        costs_and_errors = data.apply_cost_function_to_dataset(self.f_cost, test_data, self.batch_size)
                        test_costs.extend([cost for (cost, error) in costs_and_errors])
                        test_errors.extend([error for (cost, error) in costs_and_errors])
                    log.maybeAppend(self.logger, ['Test:',trunc(numpy.mean(test_costs)),trunc(numpy.mean(test_errors)), '\t'])
                
                 
                #check for early stopping
                if valid_X is not None:
                    cost = numpy.sum(valid_costs)
                else:
                    cost = numpy.sum(train_costs)
                if cost < best_cost*self.early_stop_threshold:
                    patience = 0
                    best_cost = cost
                    # save the parameters that made it the best
                    best_params = copy_params(self.params)
                else:
                    patience += 1
         
                if counter >= self.n_epoch or patience >= self.early_stop_length:
                    STOP = True
                    if best_params is not None:
                        restore_params(self.params, best_params)
                    self.save_params('all', counter, self.params)
         
                timing = time.time() - t
                times.append(timing)
         
                log.maybeAppend(self.logger, 'time: '+make_time_units_string(timing)+'\t')
            
                log.maybeLog(self.logger, 'remaining: '+make_time_units_string((self.n_epoch - counter) * numpy.mean(times)))
        
                if (counter % self.save_frequency) == 0 or STOP is True:
                    n_examples = 100
                    xs_test = test_X[0].get_value(borrow=True)[range(n_examples)]
                    noisy_xs_test = self.f_noise(test_X[0].get_value(borrow=True)[range(n_examples)])
                    reconstructions = []
                    for i in xrange(0, len(noisy_xs_test)):
                        recon, recon_cost = self.f_recon(noisy_xs_test[max(0,(i+1)-self.batch_size):i+1])
                        reconstructions.append(recon)
                    reconstructed = numpy.array(reconstructions)
                    if (self.is_image):
                        # Concatenate stuff
                        # stacked = numpy.vstack([numpy.vstack([xs_test[i*10 : (i+1)*10], noisy_xs_test[i*10 : (i+1)*10], reconstructed[i*10 : (i+1)*10]]) for i in range(10)])
                        # number_reconstruction = PIL.Image.fromarray(tile_raster_images(stacked, (self.image_height, self.image_width), (10,30)))
                            
                        # number_reconstruction.save(self.outdir+'rnngsn_reconstruction_epoch_'+str(counter)+'.png')
            
                        #sample_numbers(counter, 'seven')
#                         plot_samples(counter, 'rnngsn')
                        pass
            
                    #save params
                    self.save_params('all', counter, self.params)
             
                # ANNEAL!
                new_lr = self.learning_rate.get_value() * self.annealing
                self.learning_rate.set_value(new_lr)
                
                new_noise = self.input_salt_and_pepper.get_value() * self.noise_annealing
                self.input_salt_and_pepper.set_value(new_noise)
                
            log.maybeLog(self.logger, "\n------------TOTAL RNN-GSN TRAIN TIME TOOK {0!s}---------".format(make_time_units_string(time.time()-start_time)))
示例#6
0
    def train_regression(iteration, train_X, train_Y, valid_X, valid_Y, test_X, test_Y):
        logger.log('-------------TRAINING REGRESSION FOR ITERATION {0!s}-------------'.format(iteration))

        # TRAINING
        n_epoch = state.n_epoch
        batch_size = state.batch_size
        STOP = False
        counter = 0
        best_cost = float('inf')
        best_params = None
        patience = 0
        if iteration == 0:
            regression_learning_rate.set_value(cast32(state.learning_rate))  # learning rate
        times = []

        logger.log(['learning rate:', regression_learning_rate.get_value()])

        logger.log(['train X size:', str(train_X.shape.eval())])
        logger.log(['valid X size:', str(valid_X.shape.eval())])
        logger.log(['test X size:', str(test_X.shape.eval())])

        if state.test_model:
            # If testing, do not train and go directly to generating samples, parzen window estimation, and inpainting
            logger.log('Testing : skip training')
            STOP = True

        while not STOP:
            counter += 1
            t = time.time()
            logger.append([counter, '\t'])

            # shuffle the data
            # data.sequence_mnist_data(train_X, train_Y, valid_X, valid_Y, test_X, test_Y, dataset, rng)

            # train
            train_costs = []
            train_errors = []
            for i in range(len(train_X.get_value(borrow=True)) / batch_size):
                xs = [train_X.get_value(borrow=True)[
                      (i * batch_size) + sequence_idx: ((i + 1) * batch_size) + sequence_idx] for sequence_idx in
                      range(len(Xs))]
                xs, _ = fix_input_size(xs)
                _ins = xs  # + [sequence_window_size]
                cost, error = regression_f_learn(*_ins)
                # print trunc(cost)
                # print [numpy.asarray(a) for a in f_check(*_ins)]
                train_costs.append(cost)
                train_errors.append(error)

            train_costs = numpy.mean(train_costs)
            train_errors = numpy.mean(train_errors)
            logger.append(['rTrain: ', trunc(train_costs), trunc(train_errors), '\t'])
            with open(regression_train_convergence, 'a') as f:
                f.write("{0!s},".format(train_costs))
                f.write("\n")

            # valid
            valid_costs = []
            for i in range(len(valid_X.get_value(borrow=True)) / batch_size):
                xs = [valid_X.get_value(borrow=True)[
                      (i * batch_size) + sequence_idx: ((i + 1) * batch_size) + sequence_idx] for sequence_idx in
                      range(len(Xs))]
                xs, _ = fix_input_size(xs)
                _ins = xs  # + [sequence_window_size]
                cost, _ = regression_f_cost(*_ins)
                valid_costs.append(cost)

            valid_costs = numpy.mean(valid_costs)
            logger.append(['rValid: ', trunc(valid_costs), '\t'])
            with open(regression_valid_convergence, 'a') as f:
                f.write("{0!s},".format(valid_costs))
                f.write("\n")

            # test
            test_costs = []
            test_errors = []
            for i in range(len(test_X.get_value(borrow=True)) / batch_size):
                xs = [test_X.get_value(borrow=True)[
                      (i * batch_size) + sequence_idx: ((i + 1) * batch_size) + sequence_idx] for sequence_idx in
                      range(len(Xs))]
                xs, _ = fix_input_size(xs)
                _ins = xs  # + [sequence_window_size]
                cost, error = regression_f_cost(*_ins)
                test_costs.append(cost)
                test_errors.append(error)

            test_costs = numpy.mean(test_costs)
            test_errors = numpy.mean(test_errors)
            logger.append(['rTest: ', trunc(test_costs), trunc(test_errors), '\t'])
            with open(regression_test_convergence, 'a') as f:
                f.write("{0!s},".format(test_costs))
                f.write("\n")

            # check for early stopping
            cost = numpy.sum(valid_costs)
            if cost < best_cost * state.early_stop_threshold:
                patience = 0
                best_cost = cost
                # keep the best params so far
                best_params = save_params(regression_params)
            else:
                patience += 1

            if counter >= n_epoch or patience >= state.early_stop_length:
                STOP = True
                if best_params is not None:
                    restore_params(regression_params, best_params)
                save_params_to_file('regression', counter, regression_params, iteration)
                logger.log(["next learning rate should be", regression_learning_rate.get_value() * annealing])

            timing = time.time() - t
            times.append(timing)

            logger.append('time: ' + make_time_units_string(timing))

            logger.log('remaining: ' + make_time_units_string((n_epoch - counter) * numpy.mean(times)))

            if (counter % state.save_frequency) == 0 or STOP is True:
                n_examples = 100 + sequence_window_size
                # Checking reconstruction
                # grab 100 numbers in the sequence from the test set
                nums = test_X.get_value()[range(n_examples)]
                noisy_nums = f_noise(test_X.get_value()[range(n_examples)])

                reconstructed_prediction = []
                reconstructed = []
                for i in range(n_examples):
                    if i >= sequence_window_size:
                        xs = [noisy_nums[i - x] for x in range(len(Xs))]
                        xs.reverse()
                        _ins = xs  # + [sequence_window_size]
                        _outs = f_recon(*_ins)
                        prediction = _outs[0]
                        reconstruction = _outs[1]
                        reconstructed_prediction.append(prediction)
                        reconstructed.append(reconstruction)
                nums = nums[sequence_window_size:]
                noisy_nums = noisy_nums[sequence_window_size:]
                reconstructed_prediction = numpy.array(reconstructed_prediction)
                reconstructed = numpy.array(reconstructed)

                # Concatenate stuff
                stacked = numpy.vstack([numpy.vstack([nums[i * 10: (i + 1) * 10], noisy_nums[i * 10: (i + 1) * 10],
                                                      reconstructed_prediction[i * 10: (i + 1) * 10],
                                                      reconstructed[i * 10: (i + 1) * 10]]) for i in range(10)])

                number_reconstruction = PIL.Image.fromarray(
                    tile_raster_images(stacked, (root_N_input, root_N_input), (10, 40)))
                # epoch_number    =   reduce(lambda x,y : x + y, ['_'] * (4-len(str(counter)))) + str(counter)
                number_reconstruction.save(
                    outdir + 'regression_number_reconstruction_iteration_' + str(iteration) + '_epoch_' + str(
                        counter) + '.png')

                # save gsn_params
                save_params_to_file('regression', counter, regression_params, iteration)

            # ANNEAL!
            new_r_lr = regression_learning_rate.get_value() * annealing
            regression_learning_rate.set_value(new_r_lr)
示例#7
0
    def train_GSN(iteration, train_X, train_Y, valid_X, valid_Y, test_X, test_Y):
        logger.log('----------------TRAINING GSN FOR ITERATION ' + str(iteration) + "--------------\n")

        # TRAINING
        n_epoch = state.n_epoch
        batch_size = state.batch_size
        STOP = False
        counter = 0
        if iteration == 0:
            learning_rate.set_value(cast32(state.learning_rate))  # learning rate
        times = []
        best_cost = float('inf')
        best_params = None
        patience = 0

        logger.log(['learning rate:', learning_rate.get_value()])

        logger.log(['train X size:', str(train_X.shape.eval())])
        logger.log(['valid X size:', str(valid_X.shape.eval())])
        logger.log(['test X size:', str(test_X.shape.eval())])

        if state.vis_init:
            bias_list[0].set_value(logit(numpy.clip(0.9, 0.001, train_X.get_value().mean(axis=0))))

        if state.test_model:
            # If testing, do not train and go directly to generating samples, parzen window estimation, and inpainting
            logger.log('Testing : skip training')
            STOP = True

        while not STOP:
            counter += 1
            t = time.time()
            logger.append([counter, '\t'])

            # shuffle the data
            # data.sequence_mnist_data(train_X, train_Y, valid_X, valid_Y, test_X, test_Y, dataset, rng)

            # train
            train_costs = []
            train_errors = []
            if iteration == 0:
                for i in range(len(train_X.get_value(borrow=True)) / batch_size):
                    x = train_X.get_value(borrow=True)[i * batch_size: (i + 1) * batch_size]
                    cost, error = gsn_f_learn_init(x)
                    train_costs.append([cost])
                    train_errors.append([error])
            else:
                for i in range(len(train_X.get_value(borrow=True)) / batch_size):
                    xs = [train_X.get_value(borrow=True)[
                          (i * batch_size) + sequence_idx: ((i + 1) * batch_size) + sequence_idx] for sequence_idx in
                          range(len(Xs))]
                    xs, _ = fix_input_size(xs)
                    _ins = xs  # + [sequence_window_size]
                    cost, error = gsn_f_learn(*_ins)
                    train_costs.append(cost)
                    train_errors.append(error)

            train_costs = numpy.mean(train_costs)
            train_errors = numpy.mean(train_errors)
            logger.append(['Train: ', trunc(train_costs), trunc(train_errors), '\t'])
            with open(train_convergence, 'a') as f:
                f.write("{0!s},".format(train_costs))
                f.write("\n")

            # valid
            valid_costs = []
            if iteration == 0:
                for i in range(len(valid_X.get_value(borrow=True)) / batch_size):
                    x = valid_X.get_value(borrow=True)[i * batch_size: (i + 1) * batch_size]
                    cost, _ = gsn_f_cost_init(x)
                    valid_costs.append([cost])
            else:
                for i in range(len(valid_X.get_value(borrow=True)) / batch_size):
                    xs = [valid_X.get_value(borrow=True)[
                          (i * batch_size) + sequence_idx: ((i + 1) * batch_size) + sequence_idx] for sequence_idx in
                          range(len(Xs))]
                    xs, _ = fix_input_size(xs)
                    _ins = xs  # + [sequence_window_size]
                    costs, _ = gsn_f_cost(*_ins)
                    valid_costs.append(costs)

            valid_costs = numpy.mean(valid_costs)
            logger.append(['Valid: ', trunc(valid_costs), '\t'])
            with open(valid_convergence, 'a') as f:
                f.write("{0!s},".format(valid_costs))
                f.write("\n")

            # test
            test_costs = []
            test_errors = []
            if iteration == 0:
                for i in range(len(test_X.get_value(borrow=True)) / batch_size):
                    x = test_X.get_value(borrow=True)[i * batch_size: (i + 1) * batch_size]
                    cost, error = gsn_f_cost_init(x)
                    test_costs.append([cost])
                    test_errors.append([error])
            else:
                for i in range(len(test_X.get_value(borrow=True)) / batch_size):
                    xs = [test_X.get_value(borrow=True)[
                          (i * batch_size) + sequence_idx: ((i + 1) * batch_size) + sequence_idx] for sequence_idx in
                          range(len(Xs))]
                    xs, _ = fix_input_size(xs)
                    _ins = xs  # + [sequence_window_size]
                    costs, errors = gsn_f_cost(*_ins)
                    test_costs.append(costs)
                    test_errors.append(errors)

            test_costs = numpy.mean(test_costs)
            test_errors = numpy.mean(test_errors)
            logger.append(['Test: ', trunc(test_costs), trunc(test_errors), '\t'])
            with open(test_convergence, 'a') as f:
                f.write("{0!s},".format(test_costs))
                f.write("\n")

            # check for early stopping
            cost = numpy.sum(valid_costs)
            if cost < best_cost * state.early_stop_threshold:
                patience = 0
                best_cost = cost
                # save the parameters that made it the best
                best_params = save_params(gsn_params)
            else:
                patience += 1

            if counter >= n_epoch or patience >= state.early_stop_length:
                STOP = True
                if best_params is not None:
                    restore_params(gsn_params, best_params)
                save_params_to_file('gsn', counter, gsn_params, iteration)
                logger.log(["next learning rate should be", learning_rate.get_value() * annealing])

            timing = time.time() - t
            times.append(timing)

            logger.append('time: ' + make_time_units_string(timing))

            logger.log('remaining: ' + make_time_units_string((n_epoch - counter) * numpy.mean(times)))

            if (counter % state.save_frequency) == 0 or STOP is True:
                n_examples = 100
                if iteration == 0:
                    random_idx = numpy.array(R.sample(range(len(test_X.get_value())), n_examples))
                    numbers = test_X.get_value()[random_idx]
                    noisy_numbers = f_noise(test_X.get_value()[random_idx])
                    reconstructed = f_recon_init(noisy_numbers)
                    # Concatenate stuff
                    stacked = numpy.vstack([numpy.vstack(
                        [numbers[i * 10: (i + 1) * 10], noisy_numbers[i * 10: (i + 1) * 10],
                         reconstructed[i * 10: (i + 1) * 10]]) for i in range(10)])
                    number_reconstruction = PIL.Image.fromarray(
                        tile_raster_images(stacked, (root_N_input, root_N_input), (10, 30)))
                else:
                    n_examples = n_examples + sequence_window_size
                    # Checking reconstruction
                    # grab 100 numbers in the sequence from the test set
                    nums = test_X.get_value()[range(n_examples)]
                    noisy_nums = f_noise(test_X.get_value()[range(n_examples)])

                    reconstructed_prediction = []
                    reconstructed = []
                    for i in range(n_examples):
                        if i >= sequence_window_size:
                            xs = [noisy_nums[i - x] for x in range(len(Xs))]
                            xs.reverse()
                            _ins = xs  # + [sequence_window_size]
                            _outs = f_recon(*_ins)
                            prediction = _outs[0]
                            reconstruction = _outs[1]
                            reconstructed_prediction.append(prediction)
                            reconstructed.append(reconstruction)
                    nums = nums[sequence_window_size:]
                    noisy_nums = noisy_nums[sequence_window_size:]
                    reconstructed_prediction = numpy.array(reconstructed_prediction)
                    reconstructed = numpy.array(reconstructed)

                    # Concatenate stuff
                    stacked = numpy.vstack([numpy.vstack([nums[i * 10: (i + 1) * 10], noisy_nums[i * 10: (i + 1) * 10],
                                                          reconstructed_prediction[i * 10: (i + 1) * 10],
                                                          reconstructed[i * 10: (i + 1) * 10]]) for i in range(10)])
                    number_reconstruction = PIL.Image.fromarray(
                        tile_raster_images(stacked, (root_N_input, root_N_input), (10, 40)))

                # epoch_number    =   reduce(lambda x,y : x + y, ['_'] * (4-len(str(counter)))) + str(counter)
                number_reconstruction.save(
                    outdir + 'gsn_number_reconstruction_iteration_' + str(iteration) + '_epoch_' + str(
                        counter) + '.png')

                # sample_numbers(counter, 'seven')
                plot_samples(counter, iteration)

                # save gsn_params
                save_params_to_file('gsn', counter, gsn_params, iteration)

            # ANNEAL!
            new_lr = learning_rate.get_value() * annealing
            learning_rate.set_value(new_lr)

        # 10k samples
        logger.log('Generating 10,000 samples')
        samples, _ = sample_some_numbers(N=10000)
        f_samples = outdir + 'samples.npy'
        numpy.save(f_samples, samples)
        logger.log('saved digits')
示例#8
0
    def train(self,
              train_X=None,
              train_Y=None,
              valid_X=None,
              valid_Y=None,
              test_X=None,
              test_Y=None,
              is_artificial=False,
              artificial_sequence=1,
              continue_training=False):
        log.maybeLog(self.logger, "\nTraining---------\n")
        if train_X is None:
            log.maybeLog(
                self.logger,
                "Training using data given during initialization of RNN-GSN.\n"
            )
            train_X = self.train_X
            train_Y = self.train_Y
            if train_X is None:
                log.maybeLog(self.logger,
                             "\nPlease provide a training dataset!\n")
                raise AssertionError("Please provide a training dataset!")
        else:
            log.maybeLog(
                self.logger,
                "Training using data provided to training function.\n")
        if valid_X is None:
            valid_X = self.valid_X
            valid_Y = self.valid_Y
        if test_X is None:
            test_X = self.test_X
            test_Y = self.test_Y

        ##########################################################
        # Train the GSN first to get good weights initialization #
        ##########################################################
        if self.train_gsn_first:
            log.maybeLog(
                self.logger,
                "\n\n----------Initially training the GSN---------\n\n")
            init_gsn = generative_stochastic_network.GSN(train_X=train_X,
                                                         valid_X=valid_X,
                                                         test_X=test_X,
                                                         args=self.gsn_args,
                                                         logger=self.logger)
            init_gsn.train()

        #############################
        # Save the model parameters #
        #############################
        def save_params_to_file(name, n, gsn_params):
            pass
            print 'saving parameters...'
            save_path = self.outdir + name + '_params_epoch_' + str(n) + '.pkl'
            f = open(save_path, 'wb')
            try:
                cPickle.dump(gsn_params, f, protocol=cPickle.HIGHEST_PROTOCOL)
            finally:
                f.close()

        def save_params(params):
            values = [param.get_value(borrow=True) for param in params]
            return values

        def restore_params(params, values):
            for i in range(len(params)):
                params[i].set_value(values[i])

        #########################################
        # If we are using Hessian-free training #
        #########################################
        if self.hessian_free:
            pass
#         gradient_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=5000)
#         cg_dataset = hf_sequence_dataset([train_X.get_value()], batch_size=None, number_batches=1000)
#         valid_dataset = hf_sequence_dataset([valid_X.get_value()], batch_size=None, number_batches=1000)
#
#         s = x_samples
#         costs = [cost, show_cost]
#         hf_optimizer(params, [Xs], s, costs, u, ua).train(gradient_dataset, cg_dataset, initial_lambda=1.0, preconditioner=True, validation=valid_dataset)

################################
# If we are using SGD training #
################################
        else:
            log.maybeLog(self.logger,
                         "\n-----------TRAINING RNN-GSN------------\n")
            # TRAINING
            STOP = False
            counter = 0
            if not continue_training:
                self.learning_rate.set_value(
                    self.init_learn_rate)  # learning rate
            times = []
            best_cost = float('inf')
            best_params = None
            patience = 0

            log.maybeLog(
                self.logger,
                ['train X size:', str(train_X.shape.eval())])
            if valid_X is not None:
                log.maybeLog(self.logger,
                             ['valid X size:',
                              str(valid_X.shape.eval())])
            if test_X is not None:
                log.maybeLog(
                    self.logger,
                    ['test X size:', str(test_X.shape.eval())])

            if self.vis_init:
                self.bias_list[0].set_value(
                    logit(
                        numpy.clip(0.9, 0.001,
                                   train_X.get_value().mean(axis=0))))

            while not STOP:
                counter += 1
                t = time.time()
                log.maybeAppend(self.logger, [counter, '\t'])

                if is_artificial:
                    data.sequence_mnist_data(train_X, train_Y, valid_X,
                                             valid_Y, test_X, test_Y,
                                             artificial_sequence, rng)

                #train
                train_costs = data.apply_cost_function_to_dataset(
                    self.f_learn, train_X, self.batch_size)
                # record it
                log.maybeAppend(self.logger,
                                ['Train:', trunc(train_costs), '\t'])

                #valid
                valid_costs = data.apply_cost_function_to_dataset(
                    self.f_cost, valid_X, self.batch_size)
                # record it
                log.maybeAppend(self.logger,
                                ['Valid:', trunc(valid_costs), '\t'])

                #test
                test_costs = data.apply_cost_function_to_dataset(
                    self.f_cost, test_X, self.batch_size)
                # record it
                log.maybeAppend(self.logger,
                                ['Test:', trunc(test_costs), '\t'])

                #check for early stopping
                cost = numpy.sum(valid_costs)
                if cost < best_cost * self.early_stop_threshold:
                    patience = 0
                    best_cost = cost
                    # save the parameters that made it the best
                    best_params = save_params(self.params)
                else:
                    patience += 1

                if counter >= self.n_epoch or patience >= self.early_stop_length:
                    STOP = True
                    if best_params is not None:
                        restore_params(self.params, best_params)
                    save_params_to_file('all', counter, self.params)

                timing = time.time() - t
                times.append(timing)

                log.maybeAppend(
                    self.logger,
                    'time: ' + make_time_units_string(timing) + '\t')

                log.maybeLog(
                    self.logger, 'remaining: ' + make_time_units_string(
                        (self.n_epoch - counter) * numpy.mean(times)))

                if (counter % self.save_frequency) == 0 or STOP is True:
                    n_examples = 100
                    nums = test_X.get_value(borrow=True)[range(n_examples)]
                    noisy_nums = self.f_noise(
                        test_X.get_value(borrow=True)[range(n_examples)])
                    reconstructions = []
                    for i in xrange(0, len(noisy_nums)):
                        recon = self.f_recon(
                            noisy_nums[max(0, (i + 1) - self.batch_size):i +
                                       1])
                        reconstructions.append(recon)
                    reconstructed = numpy.array(reconstructions)

                    # Concatenate stuff
                    stacked = numpy.vstack([
                        numpy.vstack([
                            nums[i * 10:(i + 1) * 10],
                            noisy_nums[i * 10:(i + 1) * 10],
                            reconstructed[i * 10:(i + 1) * 10]
                        ]) for i in range(10)
                    ])
                    number_reconstruction = PIL.Image.fromarray(
                        tile_raster_images(
                            stacked, (self.root_N_input, self.root_N_input),
                            (10, 30)))

                    number_reconstruction.save(
                        self.outdir + 'rnngsn_number_reconstruction_epoch_' +
                        str(counter) + '.png')

                    #save params
                    save_params_to_file('all', counter, self.params)

                # ANNEAL!
                new_lr = self.learning_rate.get_value() * self.annealing
                self.learning_rate.set_value(new_lr)