Пример #1
0
    def run(self):
        print(
            "Starting at %s the execution %s" %
            (time.strftime("%Y-%m-%d %H:%M"), self.options['execution-name']))
        begin_time = time.clock()

        self.datasets = self.process_and_load_dataset(
            self.options['dataset-config-file'], self.options['class-map'])

        print("... running experiments")
        for config in sorted(self.configs.keys()):
            for replication in range(self.options['replications']):
                begin_config_time = time.clock()
                experiment_config_name = "{0};{1};rep-{2}.{3}".format(
                    self.options['execution-name'], config, replication + 1,
                    self.options['replications'])

                print("...... config {0}".format(experiment_config_name))
                """
				pretrained_pickle_filename = self.params_pretrained_dir + \
												experiment_config_name + ".pickle"
				trained_pickle_filename    = self.params_trained_dir + \
												experiment_config_name + ".pickle"
				"""

                result = SdA.run_SdA(
                    datasets=self.datasets,
                    numpy_rng=self.options['numpy_rng'],
                    n_ins=self.options['input-size'],
                    n_outs=len(self.options['class-map']),
                    batch_size=self.options['batch-size'],
                    finetune_lr=self.options['learning-finetune'],
                    pretrain_lr=self.options['learning-pretrain'],
                    pretraining_epochs=self.options['pretrain-epochs'],
                    training_epochs=self.options['train-epochs'],
                    hidden_layers_sizes=self.configs[config].
                    hidden_layers_sizes,
                    corruption_levels=self.configs[config].corruption_levels,
                    activation=self.options['activation_obj'],
                    autoencoder_in_activation=self.
                    options['autoencoder-in-activation_obj'],
                    autoencoder_reconstruction_activation=self.
                    options['autoencoder-reconstruction-activation_obj'],
                    save_pretrain_new_representation=self.
                    options['save_pretrain_new_representation'],
                    save_train_new_representation=self.
                    options['save_train_new_representation'],
                    save_valid_new_representation=self.
                    options['save_valid_new_representation'],
                    save_test_new_representation=self.
                    options['save_test_new_representation'])

                self.process_result_experiment_config(experiment_config_name,
                                                      self.configs[config],
                                                      result)
                print("...... config ran for %.2fm" %
                      ((time.clock() - begin_config_time) / 60.))
        print("==> Total Execution Time: %.2fm\n" %
              ((time.clock() - begin_time) / 60.))
Пример #2
0
	def buildFirstLayer(self, loaded):

		numpy_rng = numpy.random.RandomState(89677)

		self.logger.debug('... building the model')
		# construct the stacked denoising autoencoder class
		self.sda = SdA(
			numpy_rng = numpy_rng,
			n_ins = len(self.fuzzyDict),
			hidden_layers_sizes = [len(self.fuzzyDict)*1]*2,
			n_outs = math.sqrt(len(self.fuzzyDict))
		)
def test_SdA():
    SdA.test_SdA(pretraining_epochs=1, training_epochs=1, batch_size=300)
Пример #4
0
def test_SdA(finetune_lr=0.1, pretraining_epochs=1,
             pretrain_lr=0.001, training_epochs=1, 
             b_patch_filename = 'b_10_Training_patches_norm.npy', b_groundtruth_filename = 'b_Training_labels_norm.npy',
             b_valid_filename = 'b_10_Validation_patches_norm.npy', b_validtruth_filename = 'b_Validation_labels_norm.npy',
             u_patch_filename = 'u_10_Training_patches_norm.npy', u_groundtruth_filename = 'u_Training_labels_norm.npy',
             u_valid_filename = 'u_10_Validation_patches_norm.npy', u_validtruth_filename = 'u_Validation_labels_norm.npy',
             batch_size=100, n_ins = 605, n_outs = 2, hidden_layers_sizes = [1000,1000,1000],prefix = '11_11_3_G4_', corruption_levels=[0.2,0.2,0.2] ):
                 
    """
    Demonstrates how to train and test a stochastic denoising autoencoder.

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used in the finetune stage
    (factor for the stochastic gradient)

    :type pretraining_epochs: int
    :param pretraining_epochs: number of epoch to do pretraining

    :type pretrain_lr: float
    :param pretrain_lr: learning rate to be used during pre-training

    :type n_iter: int
    :param n_iter: maximal number of iterations to run the optimizer

    :type dataset: string
    :param dataset: path the the pickled dataset

    """
   
    print '###########################'
    print 'Pretraining epochs: ', pretraining_epochs
    print 'Finetuning epochs: ', training_epochs
    print '###########################'
    
    W = []
    b = []
    
    #########################################################
    #########################################################
   
    resumeTraining = False
    
    #@@@@@@@@ Needs to be worked on @@@@@@@@@@@@@@@@@
    # Snippet to resume training if the program crashes halfway through #
    opts, arg = getopt.getopt(sys.argv[1:],"rp:")
    for opt, arg in opts:
        if opt == '-r':
            resumeTraining = True                               # make this true to resume training from saved model    
        elif opt == '-p':
            prefix = arg
            
    flagValue = 1    
    
    if(resumeTraining):
        
        flagFile = file(prefix+'flag.pkl','rb')
        
        try:
            flagValue = cPickle.load(flagFile)
        except:
            pass
        
        savedModel_preTraining = file(prefix+'pre_training.pkl','rb')
        genVariables_preTraining = cPickle.load(savedModel_preTraining)
        layer_number, epochs_done_preTraining, mean_cost , pretrain_lr = genVariables_preTraining
        epoch_flag = 1
        print 'Inside resumeTraining!!!!!!!!!!!!!!!!!!'
        no_of_layers = len(hidden_layers_sizes) + 1
        
        for i in xrange(no_of_layers):
            try:
                W.append(cPickle.load(savedModel_preTraining))
                b.append(cPickle.load(savedModel_preTraining))
            except:
                W.append(None)
                b.append(None)
                    
        if flagValue is 2:
            epochFlag_fineTuning = 1
            iterFlag = 1
            savedModel_fineTuning = file(prefix+'fine_tuning.pkl','rb')
            hidden_layers_sizes = cPickle.load(savedModel_fineTuning)
            genVariables_fineTuning = cPickle.load(savedModel_fineTuning)
            epochs_done_fineTuning,best_validation_loss,finetune_lr,patience,iters_done = genVariables_fineTuning
    
   
    else:
        
        layer_number, epochs_done, mean_cost, pretrain_lr = [0,0,0,pretrain_lr]
        epoch_flag = 0
        epochFlag_fineTuning = 0
        iterFlag = 0
        W = None
        b = None
                
    ##############################################################
    ##############################################################

                    
    datasets = load_data(b_patch_filename,b_groundtruth_filename,b_valid_filename,b_validtruth_filename)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size

    # numpy random generator
    # start-snippet-3
    numpy_rng = numpy.random.RandomState(89677)
    print '... building the model'
    
#    print 'W: ', W
#    print 'b: ', b
    
    ################################################################
    ################CONSTRUCTION OF SdA CLASS#######################
    sda = SdA(
        numpy_rng=numpy_rng,
        n_ins=n_ins,
        hidden_layers_sizes=hidden_layers_sizes,
        n_outs=n_outs, W = W, b=b)
        
    print 'SdA constructed'
    ################################################################
    ################################################################
    if flagValue is 1:
    ################################################################
    # end-snippet-3 start-snippet-4
    #########################
    # PRETRAINING THE MODEL #
    #########################
    
        flag = open(prefix+'flag.pkl','wb')
        cPickle.dump(1,flag, protocol = cPickle.HIGHEST_PROTOCOL)
        flag.close()
            
        print '... getting the pretraining functions'
        pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x,batch_size=batch_size)
        print 'Length of pretraining function: ', len(pretraining_fns)

        print '... pre-training the model'
        start_time = time.clock()
        ## Pre-train layer-wise
        log_pretrain_cost = []
        #corruption_levels = [.001, .001, .001]
        for i in xrange(sda.n_layers):
        
            if i < layer_number:
                i = layer_number
                #print i
                # go through pretraining epochs
        
            for epoch in xrange(pretraining_epochs):
                ##########################################            
                if epoch_flag is 1 and epoch < epochs_done_preTraining:
                    epoch = epochs_done_preTraining
                    epoch_flag = 0
                    ##########################################
                    # go through the training set
                c = []
                for batch_index in xrange(n_train_batches):
                    #sprint batch_index
                    c.append(pretraining_fns[i](index=batch_index,
                         corruption=corruption_levels[i],
                         lr=pretrain_lr))
                print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
                print numpy.mean(c)
                log_pretrain_cost.append(numpy.mean(c))

            

                save_valid = open(prefix+'pre_training.pkl', 'wb')
                #print 'YO! i=',i,' epoch=',epoch,' cost=',numpy.mean(c) 
                #print pretrain_lr
                genVariables = [i, epoch, numpy.mean(c), pretrain_lr]
                cPickle.dump(genVariables,save_valid,protocol = cPickle.HIGHEST_PROTOCOL)
                for j in xrange(len(sda.params)):
                    cPickle.dump(sda.params[j].get_value(borrow=True), save_valid, protocol = cPickle.HIGHEST_PROTOCOL)
                save_valid.close()
        
        
        pretrain_log_file = open(prefix + 'log_pretrain_cost.txt', "a")
        for l in log_pretrain_cost:
            pretrain_log_file.write("%f\n"%l)
        pretrain_log_file.close()



        #print sda.params[0]
        end_time = time.clock()

        print >> sys.stderr, ('The pretraining code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
                          
                          
                          
    # end-snippet-4
    ########################
    # FINETUNING THE MODEL #
    ########################

    # get the training, validation and testing function for the model
    
    
    datasets = load_data(u_patch_filename,u_groundtruth_filename,u_valid_filename,u_validtruth_filename)
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    print '... getting the finetuning functions'
    train_fn, validate_model, test_model = sda.build_finetune_functions(datasets=datasets,batch_size=100,learning_rate=0.1)

    print '... finetunning the model'
    # early-stopping parameters
    patience = 10 * n_train_batches  # look as this many examples regardless
    patience_increase = 2.  # wait this much longer when a new best is
                            # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = time.clock()

    done_looping = False
    epoch = 0
    flag = open(prefix+'flag.pkl','wb')
    cPickle.dump(2,flag, protocol = cPickle.HIGHEST_PROTOCOL)
    flag.close()
    
    log_valid_cost=[]

    while (epoch < training_epochs) and (not done_looping):
        
        if epochFlag_fineTuning is 1 and epoch < epochs_done_fineTuning:
            epoch = epochs_done_fineTuning
            epochFlag_fineTuning = 0
            
        epoch = epoch + 1
        
        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_cost = train_fn(minibatch_index)
            
            if iterFlag is 1 and iter < iters_done:
                iter = iters_done
                iterFlag = 0
                
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                validation_losses = validate_model()
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))
                log_valid_cost.append(this_validation_loss)

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if (
                        this_validation_loss < best_validation_loss *
                        improvement_threshold
                    ):
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    
                    
                    print 'Saving the best validation network'
                    genVariables = [epoch,best_validation_loss,finetune_lr,patience,iter]
                    save_file = open(prefix+'fine_tuning.pkl','wb')
                    cPickle.dump(hidden_layers_sizes, save_file)
                    cPickle.dump(genVariables, save_file)
                    for j in xrange(len(sda.params)):
                        cPickle.dump(sda.params[j].get_value(borrow=True), save_file, protocol = cPickle.HIGHEST_PROTOCOL)
                    save_file.close()
                    valid_file = open('log_valid_cost.txt', "a")
                    for l in log_valid_cost:
                        valid_file.write("%f\n"%l)
                    log_valid_cost=[]
                    
                
                    # test it on the test set
                    test_losses = test_model()
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

                else :
                    print 'validation loss not decreasing, hence reducing lr'
                    finetune_lr=0.8*finetune_lr


            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print(
        (
            'Optimization complete with best validation score of %f %%, '
            'on iteration %i, '
            'with test performance %f %%'
        )
        % (best_validation_loss * 100., best_iter + 1, test_score * 100.)
    )
    print >> sys.stderr, ('The training code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
Пример #5
0
def test_SdA():
    SdA.test_SdA(pretraining_epochs=1, training_epochs=1, batch_size=300)
Пример #6
0
filename=data_dir + "GM12878_200bp_Data_3Cl_l2normalized_TestSet.txt";
test_set_x_org=numpy.loadtxt(filename,delimiter='\t',dtype='float32')
filename=data_dir + "GM12878_200bp_Classes_3Cl_l2normalized_TestSet.txt";
test_set_y_org=numpy.loadtxt(filename,delimiter='\t',dtype=object)
prev,test_set_y_org=cl.change_class_labels(test_set_y_org)

filename=data_dir + "GM12878_Features_Unique.txt";
features=numpy.loadtxt(filename,delimiter='\t',dtype=object)  

rng=numpy.random.RandomState(1000)

# train
classifier,training_time=SdA.train_model(train_set_x_org=train_set_x_org, train_set_y_org=train_set_y_org, 
                valid_set_x_org=valid_set_x_org, valid_set_y_org=valid_set_y_org, 
                pretrain_lr=0.1,finetune_lr=0.1, alpha=0.01, 
                lambda_reg=0.00005, alpha_reg=0.5, 
                n_hidden=[64,64,32], corruption_levels=[0.01,0.01,0.01],
                pretraining_epochs=5, training_epochs=1000,
                batch_size=200, rng=rng)
                        
# test
test_set_y_pred,test_set_y_pred_prob,test_time=SdA.test_model(classifier, test_set_x_org, batch_size=200)
print test_set_y_pred[0:20]
print test_set_y_pred_prob[0:20]
print test_time

# evaluate classification performance
perf,conf_mat=cl.perform(test_set_y_org,test_set_y_pred,numpy.unique(train_set_y_org))
print perf
print conf_mat
Пример #7
0
def test_SdA():
    t0 = time.time()
    SdA.test_SdA(pretraining_epochs=2, training_epochs=3, batch_size=300)
    print >> sys.stderr, "test_SdA took %.3fs expected 971s in our buildbot" % (
        time.time() - t0)
Пример #8
0
def test_SdA():
    t0=time.time()
    SdA.test_SdA(pretraining_epochs = 2, training_epochs = 3, batch_size = 300)
    print >> sys.stderr, "test_SdA took %.3fs expected 971s in our buildbot"%(time.time()-t0)
Пример #9
0
def test_SdA(finetune_lr=0.1, pretraining_epochs=1,
             pretrain_lr=0.001, training_epochs=1, 
             b_patch_filename = 'b_Training_patches_norm.npy', b_groundtruth_filename = 'b_Training_labels_norm.npy',
             b_valid_filename = 'b_Validation_patches_norm.npy', b_validtruth_filename = 'b_Validation_labels_norm.npy',
             u_patch_filename = 'u_Training_patches_norm.npy', u_groundtruth_filename = 'u_Training_labels_norm.npy',
             u_valid_filename = 'u_Validation_patches_norm.npy', u_validtruth_filename = 'u_Validation_labels_norm.npy',
             batch_size=100, n_ins = 605, n_outs = 5, hidden_layers_sizes = [1000,1000,1000],prefix = '11_11_3_G4_', corruption_levels=[0.2,0.2,0.2], resumeTraining = False, StopAtPretraining = False):
                 
    """
    Demonstrates how to train and test a stochastic denoising autoencoder.

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used in the finetune stage
    (factor for the stochastic gradient)

    :type pretraining_epochs: int
    :param pretraining_epochs: number of epoch to do pretraining

    :type pretrain_lr: float
    :param pretrain_lr: learning rate to be used during pre-training

    :type n_iter: int
    :param n_iter: maximal number of iterations to run the optimizer

    :type dataset: string
    :param dataset: path the the pickled dataset

    """
   
    print '###########################'
    print 'Pretraining epochs: ', pretraining_epochs
    print 'Finetuning epochs: ', training_epochs
    print '###########################'
    
    W = []
    b = []
    
    #########################################################
    #########################################################
    
    #@@@@@@@@ Needs to be worked on @@@@@@@@@@@@@@@@@
    # Snippet to resume training if the program crashes halfway through #
    opts, arg = getopt.getopt(sys.argv[1:],"rp:")
    for opt, arg in opts:
        if opt == '-r':
            resumeTraining = True                               # make this true to resume training from saved model    
        elif opt == '-p':
            prefix = arg
            
    flag = 0
    
    if(resumeTraining):
        
        flag = 1
        
        path = '/media/brain/1A34723D34721BC7/BRATS/codes/results/test_255_9x9x3/9x9x3pre_training.pkl'
                
        savedModel_preTraining = file(path,'rb')
        genVariables_preTraining = cPickle.load(savedModel_preTraining)
        layer_number, epochs_done_preTraining, mean_cost , pretrain_lr = genVariables_preTraining
        epoch_flag = 1
        print 'Inside resumeTraining!!!!!!!!!!!!!!!!!!'
        no_of_layers = len(hidden_layers_sizes) + 1
        
        for i in xrange(no_of_layers):
            W.append(cPickle.load(savedModel_preTraining))
            b.append(cPickle.load(savedModel_preTraining))    
   
              
    ##############################################################
    ##############################################################

    if flag == 0:
                
        datasets = load_data(b_patch_filename,b_groundtruth_filename,b_valid_filename,b_validtruth_filename)
    
        train_set_x, train_set_y = datasets[0]
        valid_set_x, valid_set_y = datasets[1]
        test_set_x, test_set_y = datasets[2]
        
    
        # compute number of minibatches for training, validation and testing
        n_train_batches = train_set_x.get_value(borrow=True).shape[0]
        n_train_batches /= batch_size
    
        # numpy random generator
        # start-snippet-3
        numpy_rng = numpy.random.RandomState(89677)
        print '... building the model'
        
    #    print 'W: ', W
    #    print 'b: ', b
        
        ################################################################
        ################CONSTRUCTION OF SdA CLASS#######################
        sda = SdA(
            numpy_rng=numpy_rng,
            n_ins=n_ins,
            hidden_layers_sizes=hidden_layers_sizes,
            n_outs=n_outs)
            
        print 'SdA constructed'
        ################################################################
        ################################################################
        
        ################################################################
        # end-snippet-3 start-snippet-4
        #########################
        # PRETRAINING THE MODEL #
        #########################
    
        flag = open(prefix+'flag.pkl','wb')
        cPickle.dump(1,flag, protocol = cPickle.HIGHEST_PROTOCOL)
        flag.close()
            
        print '... getting the pretraining functions'
        pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x,batch_size=batch_size)
        print 'Length of pretraining function: ', len(pretraining_fns)

        print '... pre-training the model'
        start_time = time.clock()
        ## Pre-train layer-wise
        log_pretrain_cost = []

        

        shapeimg = [(33,44),(50,60), (25,40), (50,10)]

        #corruption_levels = [.001, .001, .001]
        for i in xrange(sda.n_layers):
            
            # if i < layer_number:
            #     i = layer_number
                #print i
                # go through pretraining epochs
            best_cost = numpy.inf
            adapt_counter = 0
            learning_rate = pretrain_lr

            if i==0:
                num_of_epochs = pretraining_epochs
            else:
                num_of_epochs = pretraining_epochs
            for epoch in xrange(num_of_epochs):


                ##########################################            
                # if epoch_flag is 1 and epoch < epochs_done_preTraining:
                #     epoch = epochs_done_preTraining
                #     epoch_flag = 0
                    ##########################################
                    # go through the training set
                c = []
                for batch_index in xrange(n_train_batches):
                    #sprint batch_index
                    c.append(pretraining_fns[i](index=batch_index,
                         corruption=corruption_levels[i],
                         lr=learning_rate))
                print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
                print numpy.mean(c)
                current_cost = numpy.mean(c)
                log_pretrain_cost.append(numpy.mean(c))
                if current_cost < best_cost:
                    best_cost = current_cost
                if current_cost > best_cost :
                    adapt_counter = adapt_counter+1
                # if adapt_counter>25:
                itr = epoch + 1
                learning_rate = learning_rate / ( 1 + itr * 5e-05)
                # print 'Reducing learning rate', learning_rate
                adapt_counter = 0

            
                previous_cost = current_cost
                

                if epoch%50 == 0 and epoch!=0 or epoch == 399 or epoch == 199:
                    image = Image.fromarray(tile_raster_images(
                        X=sda.params[2*i].get_value(borrow=True).T,
                        img_shape=shapeimg[i], tile_shape=(40,hidden_layers_sizes[i]/20),
                        tile_spacing=(1, 1)))
                    image.save(prefix+str(i) + '_' + str(epoch)+'.png')
        

            save_valid = open(prefix+'pre_training.pkl', 'wb')
            genVariables = ['gen']
            cPickle.dump(genVariables,save_valid,protocol = cPickle.HIGHEST_PROTOCOL)
            for j in xrange(len(sda.params)):
                cPickle.dump(sda.params[j].get_value(borrow=True), save_valid, protocol = cPickle.HIGHEST_PROTOCOL)
            save_valid.close()


        pretrain_log_file = open(prefix + 'log_pretrain_cost.txt', "a")
        for l in log_pretrain_cost:
            pretrain_log_file.write("%f\n"%l)
        pretrain_log_file.close()
        
        # for k in [0,2,4,6]:
        #     print k
        #     image = Image.fromarray(tile_raster_images(
        #        X=sda.params[k].get_value(borrow=True).T,
        #        img_shape=shapeimg[k/2], tile_shape=(40,hidden_layers_sizes[k/2]/20),
        #        tile_spacing=(1, 1)))
        #     image.save(prefix+str(k/2)+'.png')


        #print sda.params[0]
        end_time = time.clock()

        print >> sys.stderr, ('The pretraining code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
                          
                     
    print '###################'
    # end-snippet-4
    ########################
    # FINETUNING THE MODEL #
    ########################

    # get the training, validation and testing function for the model   


    if flag == 1:
    
        datasets = load_data(u_patch_filename,u_groundtruth_filename,u_valid_filename,u_validtruth_filename)
        train_set_x, train_set_y = datasets[0]
        valid_set_x, valid_set_y = datasets[1]
        test_set_x, test_set_y = datasets[2]
        n_train_batches = train_set_x.get_value(borrow=True).shape[0]
        
        n_train_batches /= batch_size
        
        numpy_rng = numpy.random.RandomState(89677)
        print '... building the model'
        
    #    print 'W: ', W
    #    print 'b: ', b
        
        ################################################################
        ################CONSTRUCTION OF SdA CLASS#######################
        sda = SdA(
            numpy_rng=numpy_rng,
            n_ins=n_ins,
            hidden_layers_sizes=hidden_layers_sizes,
            n_outs=n_outs, W = W, b = b)
        
        print 'SdA constructed'
        
    if StopAtPretraining == False:  
        
        print '... getting the finetuning functions'
        train_fn, validate_model, test_model = sda.build_finetune_functions(datasets=datasets,batch_size=batch_size)
        print batch_size

        print '... finetunning the model'
        ########################confusion matrix Block 1##########################    
        prediction = sda.get_prediction(train_set_x,batch_size)
        y_truth = np.load(u_groundtruth_filename)
        y_truth = y_truth[0:(len(y_truth)-(len(y_truth)%batch_size))]
        cnf_freq = 1
        ##################################################################  
        # early-stopping parameters
        patience = 40 * n_train_batches  # look as this many examples regardless
        patience_increase = 10.  # wait this much longer when a new best is
                                # found
        improvement_threshold = 0.995  # a relative improvement of this much is
                                       # considered significant
        validation_frequency = min(n_train_batches, patience / 2)
                                      # go through this many
                                      # minibatche before checking the network
                                      # on the validation set; in this case we
                                      # check every epoch

        best_validation_loss = numpy.inf
        test_score = 0.
        start_time = time.clock()

        finetune_lr_initial = finetune_lr

        done_looping = False
        epoch = 0
        flag = open(prefix+'flag.pkl','wb')
        cPickle.dump(2,flag, protocol = cPickle.HIGHEST_PROTOCOL)
        flag.close()
        
        log_valid_cost=[]
        adapt_counter = 0
        while (epoch < training_epochs) and (not done_looping):
            
    #        if epochFlag_fineTuning is 1 and epoch < epochs_done_fineTuning:
    #            epoch = epochs_done_fineTuning
    #            epochFlag_fineTuning = 0
                
            epoch = epoch + 1
            ################################confusion matrix block 2#################
            if epoch%cnf_freq==0:
                pred_c = np.array([])
                for minibatch_index in xrange(n_train_batches):
                    pred_c = np.concatenate([pred_c,np.array(prediction(minibatch_index))])
            
                cnf_matrix = confusion_matrix(y_truth, pred_c)
                print cnf_matrix
            ##########################################################################
            c = []
            for minibatch_index in xrange(n_train_batches):
                minibatch_avg_cost = train_fn(index=minibatch_index,lr=finetune_lr)
                c.append(minibatch_avg_cost)
    #            if iterFlag is 1 and iter < iters_done:
    #                iter = iters_done
    #                iterFlag = 0
                        
                iter = (epoch - 1) * n_train_batches + minibatch_index

                if (iter + 1) % validation_frequency == 0:
                    validation_losses = validate_model()
                    this_validation_loss = numpy.mean(validation_losses)
                    print('epoch %i, minibatch %i/%i, validation error %f %%' %
                          (epoch, minibatch_index + 1, n_train_batches,
                           this_validation_loss * 100.))
                    log_valid_cost.append(this_validation_loss)

                    # if we got the best validation score until now
                    if this_validation_loss < best_validation_loss:

                        #improve patience if loss improvement is good enough
                        if (
                            this_validation_loss < best_validation_loss *
                            improvement_threshold
                        ):
                            patience = max(patience, iter * patience_increase)

                        # save best validation score and iteration number
                        best_validation_loss = this_validation_loss
                        best_iter = iter
                        
                        
                        print 'Saving the best validation network'
                        genVariables = [epoch,best_validation_loss,finetune_lr,patience,iter]
                        save_file = open(prefix+'fine_tuning.pkl','wb')
                        cPickle.dump(hidden_layers_sizes, save_file)
                        cPickle.dump(genVariables, save_file)
                        for j in xrange(len(sda.params)):
                            cPickle.dump(sda.params[j].get_value(borrow=True), save_file, protocol = cPickle.HIGHEST_PROTOCOL)
                        save_file.close()
                        
                        
                    
                        # test it on the test set
                        test_losses = test_model()
                        test_score = numpy.mean(test_losses)
                        print(('     epoch %i, minibatch %i/%i, test error of '
                               'best model %f %%') %
                              (epoch, minibatch_index + 1, n_train_batches,
                               test_score * 100.))
                               
                        print 'Training cost: ', np.mean(c)
                    else:
                        adapt_counter = adapt_counter+1
                    if adapt_counter>20:
                        adapt_counter=0
                        finetune_lr = 0.8*finetune_lr
                        print 'Reduced learning rate : ', finetune_lr

                    else:
                        finetune_lr = finetune_lr_initial / (1 + epoch * 5e-05)
                        
                #if patience <= iter:
                #    done_looping = True
                #    break

        end_time = time.clock()
        print(
            (
                'Optimization complete with best validation score of %f %%, '
                'on iteration %i, '
                'with test performance %f %%'
            )
            % (best_validation_loss * 100., best_iter + 1, test_score * 100.)
        )
        print >> sys.stderr, ('The training code for file ' +
                              os.path.split(__file__)[1] +
                              ' ran for %.2fm' % ((end_time - start_time) / 60.))

        valid_file = open(prefix+'log_valid_error.txt', 'w')
        valid_file.write('Best validation error: '+str(best_validation_loss*100))
        valid_file.write('\nBest test error: '+str(test_score*100))
        valid_file.close()
        finetune_log_file = open(prefix + 'log_finetune_cost.txt', "a")
        for l in log_valid_cost:
            finetune_log_file.write("%f\n"%l)
        finetune_log_file.close()
Пример #10
0
prev, test_set_y_org = cl.change_class_labels(test_set_y_org)

filename = data_dir + "GM12878_Features_Unique.txt"
features = numpy.loadtxt(filename, delimiter='\t', dtype=object)

rng = numpy.random.RandomState(1000)

# train
classifier, training_time = SdA.train_model(
    train_set_x_org=train_set_x_org,
    train_set_y_org=train_set_y_org,
    valid_set_x_org=valid_set_x_org,
    valid_set_y_org=valid_set_y_org,
    pretrain_lr=0.1,
    finetune_lr=0.1,
    alpha=0.01,
    lambda_reg=0.00005,
    alpha_reg=0.5,
    n_hidden=[64, 64, 32],
    corruption_levels=[0.01, 0.01, 0.01],
    pretraining_epochs=5,
    training_epochs=1000,
    batch_size=200,
    rng=rng)

# test
test_set_y_pred, test_set_y_pred_prob, test_time = SdA.test_model(
    classifier, test_set_x_org, batch_size=200)
print test_set_y_pred[0:20]
print test_set_y_pred_prob[0:20]
print test_time
Пример #11
0
def test_SdA():
    SdA.test_SdA(pretraining_epochs = 2, training_epochs = 3)
Пример #12
0
            pretrain_lr=0.1
            finetune_lr=0.1
            alpha=0.1
            lambda_reg=0.00005
            alpha_reg=0.5
            n_hidden=[256,128,64]
            corruption_levels=[0.01,0.01,0.01]
            pretraining_epochs=5
            training_epochs=1000
            batch_size=100

            # train, and extract features from training set
            classifier,training_time=SdA.train_model(train_set_x_org=train_set_x_org, train_set_y_org=train_set_y_org, 
                                                     valid_set_x_org=valid_set_x_org, valid_set_y_org=valid_set_y_org, 
                                                     pretrain_lr=pretrain_lr,finetune_lr=finetune_lr, alpha=alpha, 
                                                     lambda_reg=lambda_reg, alpha_reg=alpha_reg, 
                                                     n_hidden=n_hidden, corruption_levels=corruption_levels,
                                                     pretraining_epochs=pretraining_epochs, training_epochs=training_epochs,
                                                     batch_size=batch_size, rng=rng)
            
            # test the classifier
            test_set_y_pred,test_set_y_pred_prob,test_time=SdA.test_model(classifier, test_set_x_org, batch_size=200)
                        
            # evaluate classification performance
            perf_i,conf_mat_i=cl.perform(test_set_y_org,test_set_y_pred,numpy.unique(train_set_y_org))
            print perf_i
            print conf_mat_i
            if i==0:
                perf=perf_i
                conf_mat=conf_mat_i
                training_times=training_time
Пример #13
0
def test_SdA(finetune_lr=0.1, pretraining_epochs=1,
             pretrain_lr=0.001, training_epochs=1, 
             b_patch_filename = 'b_Training_patches_norm.npy', b_groundtruth_filename = 'b_Training_labels_norm.npy',
             b_valid_filename = 'b_Validation_patches_norm.npy', b_validtruth_filename = 'b_Validation_labels_norm.npy',
             u_patch_filename = 'u_Training_patches_norm.npy', u_groundtruth_filename = 'u_Training_labels_norm.npy',
             u_valid_filename = 'u_Validation_patches_norm.npy', u_validtruth_filename = 'u_Validation_labels_norm.npy',
             batch_size=100, n_ins = 605, n_outs = 5, hidden_layers_sizes = [1000,1000,1000],prefix = '11_11_3_G4_', corruption_levels=[0.2,0.2,0.2], resumeTraining = False, StopAtPretraining = False):
                 
    """
    Demonstrates how to train and test a stochastic denoising autoencoder.

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used in the finetune stage
    (factor for the stochastic gradient)

    :type pretraining_epochs: int
    :param pretraining_epochs: number of epoch to do pretraining

    :type pretrain_lr: float
    :param pretrain_lr: learning rate to be used during pre-training

    :type n_iter: int
    :param n_iter: maximal number of iterations to run the optimizer

    :type dataset: string
    :param dataset: path the the pickled dataset

    """
   
    print '###########################'
    print 'Pretraining epochs: ', pretraining_epochs
    print 'Finetuning epochs: ', training_epochs
    print '###########################'
    
    W = []
    b = []
    
    #########################################################
    #########################################################
    
    #@@@@@@@@ Needs to be worked on @@@@@@@@@@@@@@@@@
    # Snippet to resume training if the program crashes halfway through #
    opts, arg = getopt.getopt(sys.argv[1:],"rp:")
    for opt, arg in opts:
        if opt == '-r':
            resumeTraining = True                               # make this true to resume training from saved model    
        elif opt == '-p':
            prefix = arg
            
    flag = 0
    
    if(resumeTraining):
        
        flag = 1
        
        path = '/media/brain/1A34723D34721BC7/BRATS/codes/results/test_255_9x9x3/9x9x3pre_training.pkl'
                
        savedModel_preTraining = file(path,'rb')
        genVariables_preTraining = cPickle.load(savedModel_preTraining)
        layer_number, epochs_done_preTraining, mean_cost , pretrain_lr = genVariables_preTraining
        epoch_flag = 1
        print 'Inside resumeTraining!!!!!!!!!!!!!!!!!!'
        no_of_layers = len(hidden_layers_sizes) + 1
        
        for i in xrange(no_of_layers):
            W.append(cPickle.load(savedModel_preTraining))
            b.append(cPickle.load(savedModel_preTraining))    
   
              
    ##############################################################
    ##############################################################

    if flag == 0:
                
        datasets = load_data(b_patch_filename,b_groundtruth_filename,b_valid_filename,b_validtruth_filename)
    
        train_set_x, train_set_y = datasets[0]
        valid_set_x, valid_set_y = datasets[1]
        test_set_x, test_set_y = datasets[2]
        
    
        # compute number of minibatches for training, validation and testing
        n_train_batches = train_set_x.get_value(borrow=True).shape[0]
        n_train_batches /= batch_size
    
        # numpy random generator
        # start-snippet-3
        numpy_rng = numpy.random.RandomState(89677)
        print '... building the model'
        
    #    print 'W: ', W
    #    print 'b: ', b
        
        ################################################################
        ################CONSTRUCTION OF SdA CLASS#######################
        sda = SdA(
            numpy_rng=numpy_rng,
            n_ins=n_ins,
            hidden_layers_sizes=hidden_layers_sizes,
            n_outs=n_outs)
            
        print 'SdA constructed'
        ################################################################
        ################################################################
        
        ################################################################
        # end-snippet-3 start-snippet-4
        #########################
        # PRETRAINING THE MODEL #
        #########################
    
        flag = open(prefix+'flag.pkl','wb')
        cPickle.dump(1,flag, protocol = cPickle.HIGHEST_PROTOCOL)
        flag.close()
            
        print '... getting the pretraining functions'
        pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x,batch_size=batch_size)
        print 'Length of pretraining function: ', len(pretraining_fns)

        print '... pre-training the model'
        start_time = time.clock()
        ## Pre-train layer-wise
        log_pretrain_cost = []

        

        shapeimg = [(42,42),(50,50), (25,40), (50,10)]

        # shapeimg = [(33,44),(32,25), (20,20), (20,10)]
        #corruption_levels = [.001, .001, .001]
        for i in xrange(sda.n_layers):
            
            # if i < layer_number:
            #     i = layer_number
                #print i
                # go through pretraining epochs
            best_cost = numpy.inf
            adapt_counter = 0
            learning_rate = pretrain_lr
            learning_rate_0=pretrain_lr

            if i==0:
                num_of_epochs = pretraining_epochs
            else:
                num_of_epochs = pretraining_epochs
            for epoch in xrange(num_of_epochs):


                ##########################################            
                # if epoch_flag is 1 and epoch < epochs_done_preTraining:
                #     epoch = epochs_done_preTraining
                #     epoch_flag = 0
                    ##########################################
                    # go through the training set
                c = []
                for batch_index in xrange(n_train_batches):
                    #sprint batch_index
                    c.append(pretraining_fns[i](index=batch_index,
                         corruption=corruption_levels[i],
                         lr=learning_rate))
                print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
                print numpy.mean(c)
                current_cost = numpy.mean(c)
                log_pretrain_cost.append(numpy.mean(c))
                if current_cost < best_cost:
                    best_cost = current_cost
                if current_cost > best_cost :
                    adapt_counter = adapt_counter+1
                # if adapt_counter>25:
                itr = epoch + 1
                learning_rate = learning_rate_0/ ( 1 + itr * 0.005)       # HAVE TO Change this number!!!!!!!! to anneal faster.......
                print 'Reducing learning rate', learning_rate
                # learning_rate=learning_rate_0
                adapt_counter = 0
                if itr%5 ==0:
                    print 'current learning_rate:',learning_rate

            
                previous_cost = current_cost
                

                if epoch%50 == 0 and epoch!=0 or epoch == 399 or epoch == 199:
                    image = Image.fromarray(tile_raster_images(
                        X=sda.params[2*i].get_value(borrow=True).T,
                        img_shape=shapeimg[i], tile_shape=(40,hidden_layers_sizes[i]/20),
                        tile_spacing=(1, 1)))
                    image.save(prefix+str(i) + '_' + str(epoch)+'.png')
        

            save_valid = open(prefix+'pre_training.pkl', 'wb')
            genVariables = ['gen']
            cPickle.dump(genVariables,save_valid,protocol = cPickle.HIGHEST_PROTOCOL)
            for j in xrange(len(sda.params)):
                cPickle.dump(sda.params[j].get_value(borrow=True), save_valid, protocol = cPickle.HIGHEST_PROTOCOL)
            save_valid.close()


        pretrain_log_file = open(prefix + 'log_pretrain_cost.txt', "a")
        for l in log_pretrain_cost:
            pretrain_log_file.write("%f\n"%l)
        pretrain_log_file.close()
        
        # for k in [0,2,4,6]:
        #     print k
        #     image = Image.fromarray(tile_raster_images(
        #        X=sda.params[k].get_value(borrow=True).T,
        #        img_shape=shapeimg[k/2], tile_shape=(40,hidden_layers_sizes[k/2]/20),
        #        tile_spacing=(1, 1)))
        #     image.save(prefix+str(k/2)+'.png')


        #print sda.params[0]
        end_time = time.clock()

        print >> sys.stderr, ('The pretraining code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
                          
                     
    print '###################'
    # end-snippet-4
    ########################
    # FINETUNING THE MODEL #
    ########################

    # get the training, validation and testing function for the model   


    if flag == 1:
    
        datasets = load_data(u_patch_filename,u_groundtruth_filename,u_valid_filename,u_validtruth_filename)
        train_set_x, train_set_y = datasets[0]
        valid_set_x, valid_set_y = datasets[1]
        test_set_x, test_set_y = datasets[2]
        n_train_batches = train_set_x.get_value(borrow=True).shape[0]
        
        n_train_batches /= batch_size
        
        numpy_rng = numpy.random.RandomState(89677)
        print '... building the model'
        
    #    print 'W: ', W
    #    print 'b: ', b
        
        ################################################################
        ################CONSTRUCTION OF SdA CLASS#######################
        sda = SdA(
            numpy_rng=numpy_rng,
            n_ins=n_ins,
            hidden_layers_sizes=hidden_layers_sizes,
            n_outs=n_outs, W = W, b = b)
        
        print 'SdA constructed'
        
    if StopAtPretraining == False:  
        
        print '... getting the finetuning functions'
        train_fn, validate_model, test_model = sda.build_finetune_functions(datasets=datasets,batch_size=batch_size)
        print batch_size

        print '... finetunning the model'
        ########################confusion matrix Block 1##########################    
        prediction = sda.get_prediction(train_set_x,batch_size)
        y_truth = np.load(u_groundtruth_filename)
        y_truth = y_truth[0:(len(y_truth)-(len(y_truth)%batch_size))]
        cnf_freq = 1
        ##################################################################  
        # early-stopping parameters
        patience = 40 * n_train_batches  # look as this many examples regardless
        patience_increase = 10.  # wait this much longer when a new best is
                                # found
        improvement_threshold = 0.995  # a relative improvement of this much is
                                       # considered significant
        validation_frequency = min(n_train_batches, patience / 2)
                                      # go through this many
                                      # minibatche before checking the network
                                      # on the validation set; in this case we
                                      # check every epoch

        best_validation_loss = numpy.inf
        test_score = 0.
        start_time = time.clock()

        finetune_lr_initial = finetune_lr

        done_looping = False
        epoch = 0
        flag = open(prefix+'flag.pkl','wb')
        cPickle.dump(2,flag, protocol = cPickle.HIGHEST_PROTOCOL)
        flag.close()
        
        log_valid_cost=[]
        adapt_counter = 0
        while (epoch < training_epochs) and (not done_looping):
            
    #        if epochFlag_fineTuning is 1 and epoch < epochs_done_fineTuning:
    #            epoch = epochs_done_fineTuning
    #            epochFlag_fineTuning = 0
                
            epoch = epoch + 1
            ################################confusion matrix block 2#################
            if epoch%cnf_freq==0:
                pred_c = np.array([])
                for minibatch_index in xrange(n_train_batches):
                    pred_c = np.concatenate([pred_c,np.array(prediction(minibatch_index))])
            
                cnf_matrix = confusion_matrix(y_truth, pred_c)
                print cnf_matrix
            ##########################################################################
            c = []
            for minibatch_index in xrange(n_train_batches):
                minibatch_avg_cost = train_fn(index=minibatch_index,lr=finetune_lr)
                c.append(minibatch_avg_cost)
    #            if iterFlag is 1 and iter < iters_done:
    #                iter = iters_done
    #                iterFlag = 0
                        
                iter = (epoch - 1) * n_train_batches + minibatch_index

                if (iter + 1) % validation_frequency == 0:
                    validation_losses = validate_model()
                    this_validation_loss = numpy.mean(validation_losses)
                    print('epoch %i, minibatch %i/%i, validation error %f %%' %
                          (epoch, minibatch_index + 1, n_train_batches,
                           this_validation_loss * 100.))
                    log_valid_cost.append(this_validation_loss)

                    # if we got the best validation score until now
                    if this_validation_loss < best_validation_loss:

                        #improve patience if loss improvement is good enough
                        if (
                            this_validation_loss < best_validation_loss *
                            improvement_threshold
                        ):
                            patience = max(patience, iter * patience_increase)

                        # save best validation score and iteration number
                        best_validation_loss = this_validation_loss
                        best_iter = iter
                        
                        
                        print 'Saving the best validation network'
                        genVariables = [epoch,best_validation_loss,finetune_lr,patience,iter]
                        save_file = open(prefix+'fine_tuning.pkl','wb')
                        cPickle.dump(hidden_layers_sizes, save_file)
                        cPickle.dump(genVariables, save_file)
                        for j in xrange(len(sda.params)):
                            cPickle.dump(sda.params[j].get_value(borrow=True), save_file, protocol = cPickle.HIGHEST_PROTOCOL)
                        save_file.close()
                        
                        
                    
                        # test it on the test set
                        test_losses = test_model()
                        test_score = numpy.mean(test_losses)
                        print(('     epoch %i, minibatch %i/%i, test error of '
                               'best model %f %%') %
                              (epoch, minibatch_index + 1, n_train_batches,
                               test_score * 100.))
                               
                        print 'Training cost: ', np.mean(c)
                    else:
                        adapt_counter = adapt_counter+1
                    if adapt_counter>20:
                        adapt_counter=0
                        finetune_lr = 0.8*finetune_lr
                        print 'Reduced learning rate : ', finetune_lr

                    else:
                        finetune_lr = finetune_lr_initial / (1 + epoch * 5e-05)
                        
                #if patience <= iter:
                #    done_looping = True
                #    break

        end_time = time.clock()
        print(
            (
                'Optimization complete with best validation score of %f %%, '
                'on iteration %i, '
                'with test performance %f %%'
            )
            % (best_validation_loss * 100., best_iter + 1, test_score * 100.)
        )
        print >> sys.stderr, ('The training code for file ' +
                              os.path.split(__file__)[1] +
                              ' ran for %.2fm' % ((end_time - start_time) / 60.))

        valid_file = open(prefix+'log_valid_error.txt', 'w')
        valid_file.write('Best validation error: '+str(best_validation_loss*100))
        valid_file.write('\nBest test error: '+str(test_score*100))
        valid_file.close()
        finetune_log_file = open(prefix + 'log_finetune_cost.txt', "a")
        for l in log_valid_cost:
            finetune_log_file.write("%f\n"%l)
        finetune_log_file.close()
Пример #14
0
class Tardy(object):
	
	def __init__(self):
		"""
			Analyse lateness data and make predications
		"""

		self.Loaded = namedtuple('Loaded', 'columnMap data')
		# self.log = Log("Tardy", None, None)
		self.logger = logging.getLogger('Tardy')

	def loadData(self, filename):
		new_path = os.path.join(
			os.path.split(__file__)[0],
			"..", "..",
			"data",
			filename
		)

		extract = ["fragmentOrderProduct", "earlyByHours"]

		with open(new_path, 'rb') as csvfile:
			reader = csv.reader(csvfile)
			firstLine = reader.next()
			columnMap = dict(zip(iter(firstLine),itertools.count()))

			data = []
			for i in reader:
				row = []
				for j in extract:
					row.append(i[columnMap[j]])
				data.append(row)

		return self.Loaded (columnMap, data)

	def vectoriseData(self, loaded):

		f = FuzzyStringDict()

		# Identify ahead to make vectors same size
		for i in loaded.data:
			f.identify(i[0])

		# Transform
		self.data = [[f.toVector(i[0]),i[1]] for i in loaded.data]

		self.logger.debug("Loaded %d training items" % (len(self.data)))

		self.train_set_x = tensor.as_tensor_variable([i[0] for i in self.data], name='train_x')

		self.fuzzyDict = f
		return self.Loaded (loaded.columnMap, self.data)

	def buildFirstLayer(self, loaded):

		numpy_rng = numpy.random.RandomState(89677)

		self.logger.debug('... building the model')
		# construct the stacked denoising autoencoder class
		self.sda = SdA(
			numpy_rng = numpy_rng,
			n_ins = len(self.fuzzyDict),
			hidden_layers_sizes = [len(self.fuzzyDict)*1]*2,
			n_outs = math.sqrt(len(self.fuzzyDict))
		)
		

	def trainFirstLayer(self):

		batch_size = 1
		
		self.logger.debug('... getting the pretraining functions')
		pretraining_fns = self.sda.pretraining_functions(train_set_x=self.train_set_x,
														batch_size=batch_size)
		
		self.logger.debug('... pre-training the model')
		start_time = timeit.default_timer()

		## Pre-train layer-wise
		corruption_levels = [.2] * 6
		pretraining_epochs = 3
		pretrain_lr = 0.1

		# compute number of minibatches for training, validation and testing
		n_train_batches = len(self.data) #self.train_set_x.get_value(borrow=True).shape[0]
		n_train_batches /= batch_size

		for i in xrange(self.sda.n_layers):
			# go through pretraining epochs
			for epoch in xrange(pretraining_epochs):
				# go through the training set
				c = []
				for batch_index in xrange(n_train_batches):
					c.append(pretraining_fns[i](index=batch_index,
							 corruption=corruption_levels[i],
							 lr=pretrain_lr))
				self.logger.debug('Pre-training layer %i with %i batches, epoch %d, cost ' % (i, n_train_batches, epoch))
				self.logger.debug(numpy.mean(c))
Пример #15
0
            alpha_reg = 0.5
            n_hidden = [256, 128, 64]
            corruption_levels = [0.01, 0.01, 0.01]
            pretraining_epochs = 5
            training_epochs = 1000
            batch_size = 100

            # train, and extract features from training set
            classifier, training_time = SdA.train_model(
                train_set_x_org=train_set_x_org,
                train_set_y_org=train_set_y_org,
                valid_set_x_org=valid_set_x_org,
                valid_set_y_org=valid_set_y_org,
                pretrain_lr=pretrain_lr,
                finetune_lr=finetune_lr,
                alpha=alpha,
                lambda_reg=lambda_reg,
                alpha_reg=alpha_reg,
                n_hidden=n_hidden,
                corruption_levels=corruption_levels,
                pretraining_epochs=pretraining_epochs,
                training_epochs=training_epochs,
                batch_size=batch_size,
                rng=rng)

            # test the classifier
            test_set_y_pred, test_set_y_pred_prob, test_time = SdA.test_model(
                classifier, test_set_x_org, batch_size=200)

            # evaluate classification performance
            perf_i, conf_mat_i = cl.perform(test_set_y_org, test_set_y_pred,
                                            numpy.unique(train_set_y_org))