Exemplo n.º 1
0
def predict():
    """
    An example of how to load a trained model and use it
    to predict labels.
    """
    print("Michael Holt\nAssignment 4")
    print("-------------------------------------------------------------------------\n\
                                Part 2 \n\
-------------------------------------------------------------------------\n" )
    print("... loading the saved mlp model")
    # load the saved model
    classifier = pickle.load(open('best_mlp_model.pkl', 'rb'))

    # compile a predictor function
    predict_model = theano.function(
        inputs=[classifier.input],
        outputs=classifier.logRegressionLayer.y_pred)

    # We can test it on some examples from test test
    dataset='mnist.pkl.gz'

    datasets = load_data(dataset)
    test_set_x, test_set_y = datasets[2]
    test_set_x = test_set_x.get_value()

    predicted_values = predict_model(test_set_x[:10])
    print("Predicted values for the first 10 examples in test set:")
    print(predicted_values)
Exemplo n.º 2
0
def build_confusion_matrix():
    """
    Builds a confusion matrix from a loaded trained model and a given test set.
    """

    confusionMatrix = np.zeros((10,10))

    #load saved model
    classifier = pickle.load(open('best_logistic_model.pkl'))

    #predictor function
    predict_model = theano.function(
        inputs=[classifier.input],
        outputs=classifier.y_pred)
    
    dataset = 'mnist.pkl.gz'
    datasets = lsgd.load_data(dataset)

    test_set_x, test_set_y = datasets[2]

    test_set_x = test_set_x.eval()
    test_set_y = test_set_y.eval()
    
    predictions = predict_model(test_set_x)
    
    print("Confusion Matrix for digit classification: \n")

    for i in range(len(test_set_x)):
        a = test_set_y[i]
        b = predictions[i]
        confusionMatrix[a][b] += 1

    np.set_printoptions(suppress=True)
    print(confusionMatrix)
Exemplo n.º 3
0
def minifyDataset(input='data/mnist.pkl.gz', output='data/minified.pkl.gz'):
    print('read ' + input)
    datasets = load_data(input)
    
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    train_len = 500
    valid_len = 100
    test_len = 100

    minified_train_x = theanoTensor2NumpyArray(train_set_x[0:train_len:1])
    minified_valid_x = theanoTensor2NumpyArray(valid_set_x[0:valid_len:1])
    minified_test_x = theanoTensor2NumpyArray(test_set_x[0:test_len:1])
    
    minified_train_y = theanoTensor2NumpyArray(train_set_y[0:train_len:1])
    minified_valid_y = theanoTensor2NumpyArray(train_set_y[0:valid_len:1])
    minified_test_y = theanoTensor2NumpyArray(train_set_y[0:test_len:1])

    data = (minified_train_x, minified_train_y),(minified_valid_x, minified_valid_y), (minified_test_x, minified_test_y)

    f = gzip.open(output, 'wb')
    cPickle.dump(data, f, -1)
    f.close()

    print('output to ' + output)
Exemplo n.º 4
0
def mlp_test(test_set, Model, n_input=2030, n_output=150, n_hidden=50):
    datasets = load_data(test_set, test_set, test_set)

    test_set_x, test_set_y = datasets[0]
    index = T.lscalar()  # index to a [mini]batch

    x = T.vector('x')  # the data is presented as rasterized images
    # y = T.ivector('y')  # the labels are presented as 1D vector of

    rng = numpy.random.RandomState(1234)
    # construct the MLP class
    classifier = MLP(
        rng=rng,
        input=x,
        n_in=n_input,
        n_hidden=n_hidden,
        n_out=n_output,
        Model=Model
    )

    # classifier.hiddenLayer.__setstate__((Model['hidden_W'], Model['hidden_b']))
    # classifier.logRegressionLayer.__setstate__((Model['logRegression_W'], Model['logRegression_b']))

    test_model = theano.function(
        inputs=[index],
        outputs=classifier.predictAll,
        givens={
            x: test_set_x[index],
        }
    )

    out = test_model(0).tolist()

    return out
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
             dataset='or.pkl.gz', batch_size = 3, n_hidden=500):
    # inputs [[1,0,0], [1,0,1], [1,1,0], [1,1,1], [0,1,1], [0,1,0], [0,0,1], [0,0,0]]
    # outputs [[1], [1], [1], [1], [1], [1], [1], [0]]
    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]

    print train_set_x

    print '...building the model'
    index = T.lscalar()
    x = T.ivector('x')
    y = T.ivector('y')
    rng = numpy.random.RandomState(1234)

    classifier = MLP(rng=rng, input=x, n_in=3, n_hidden=3, n_out=1)

    cost = (
        classifier.negative_log_likelihood(y)
        + L1_reg * classifier.L1
        + L2_reg * classifier.L2_sqr
    )

    #index = 0
    test_model = theano.function(
        inputs=[index], 
        outputs=classifier.errors(y),
        givens={
    		x: train_set_x[index * batch_size: (index + 1) * batch_size],
    		y: train_set_y[index * batch_size: (index + 1) * batch_size]
    	}
    )
Exemplo n.º 6
0
def train_autos_l2(corruption=0):
#load data
    dataset='mnist.pkl.gz'
    datasets = load_data(dataset)
    train_set_x, train_set_y = datasets[0]

#train dA's
    dAs = []
    n_hiddens = [10,25,50,100]
    x = T.matrix('x')
    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    for n_hidden in n_hiddens:
        path_da = '../data/dA_l2/dA_l2_nhid'+str(n_hidden)+'_corr'+str(corruption)+'.p'
        if not os.path.isfile(path_da):
            print 'defining for n_hidden = ',n_hidden
            da = dA(
                numpy_rng=rng,
                theano_rng=theano_rng,
                input=x,
                n_visible=28 * 28,
                n_hidden=n_hidden
                )
            print 'training for n_hidden = ',n_hidden
            train_dA(da,train_set_x = train_set_x, train_set_y = train_set_y, corruption=corruption)
        
            print 'storing dA to file'
            da.dump(open(path_da,'w'))
def test_mlp(dataset = 'mnist.pkl.gz', batch_size = 128):
    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for testing
    n_test = test_set_x.get_value(borrow=True).shape[0]
    n_test_batches = n_test / batch_size

    ######################
    # LOAD ACTUAL MODEL #
    ######################
    print '... loading the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    # construct the MLP class
    classifier = MLP(
        input=x,
        n_in=28 * 28,
        n_hidden1=394,
        n_hidden2=196,
        n_out=10
    )

    f = open('/home/dmitry/Projects/DNN-develop/theano/results/mlp',"rb")
    classifier.hiddenLayer1.W.set_value(cPickle.load(f), borrow=True)
    classifier.hiddenLayer1.b.set_value(cPickle.load(f), borrow=True)

    classifier.hiddenLayer2.W.set_value(cPickle.load(f), borrow=True)
    classifier.hiddenLayer2.b.set_value(cPickle.load(f), borrow=True)

    classifier.logRegressionLayer.W.set_value(cPickle.load(f), borrow=True)
    classifier.logRegressionLayer.b.set_value(cPickle.load(f), borrow=True)

    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    print('batch_size ' + str(batch_size))
    print('n_test ' + str(n_test))
    print('n_test_batches ' + str(n_test_batches))
    wtime = time.clock()
    test_losses = [test_model(i) for i in xrange(n_test_batches)]
    test_score = numpy.mean(test_losses)

    wtime = (time.clock() - wtime) / n_test * 1000.;
    print('for 1 sample needed ' + str(wtime) + ' msec')
    print('test score ' + str(test_score * 100.))
Exemplo n.º 8
0
def test(dataset = 'mnist.pkl.gz', output_folder = 'plots'):
    
    datasets = load_data(dataset)
    train_set_x, train_set_y = datasets[0]
    
    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)
    os.chdir(output_folder)
    
    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))
    
    input = T.matrix('input')
    
    output = get_corrupted_input_gaussian(theano_rng = theano_rng, input = input)
    
    corrupt = theano.function([input], output)
    
    
    mnist_noise = corrupt(train_set_x.get_value(borrow = True))
    mnist_noise = theano.shared(value=mnist_noise, name='mnist_noise', borrow = True)
#     print train_set_x.get_value(borrow=True)[0]
#     print mnist_noise.get_value(borrow=True)[0]
    
    image_clean = Image.fromarray(tile_raster_images(X = train_set_x.get_value(borrow = True),
                                               img_shape=(28, 28), tile_shape=(1, 6),
                                               tile_spacing=(1,1)))
    image_clean.save('clean_6.png')
    
    image_noise = Image.fromarray(tile_raster_images(X = mnist_noise.get_value(borrow = True),
                                               img_shape=(28, 28), tile_shape=(1, 6),
                                               tile_spacing=(1,1)))
    image_noise.save('noise_6.png')
    
    print 'Done!'
Exemplo n.º 9
0
def test_autos_l2(corruption=0):
#load data
    dataset='mnist.pkl.gz'
    datasets = load_data(dataset)
    train_set_x, train_set_y = datasets[0]
    valid_set_x,valid_set_y = datasets[1]


#test against validation set
    n_hiddens = [10,25,50,100]
    x = T.matrix('x')
    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    for n_hidden in n_hiddens:
    #load model
        da = dA(numpy_rng = rng,
                theano_rng = theano_rng,
                input=x,
                n_visible=28*28,
                n_hidden=n_hidden)
        da.load(open('../data/dA_l2/dA_l2_nhid'+str(n_hidden)+'_corr'+str(corruption)+'.p','r'))
        
        reconstructed = da.get_reconstructed_input(input=valid_set_x)
        image = Image.fromarray(tile_raster_images(X=reconstructed.eval(),img_shape=(28, 28), tile_shape=(10, 10),tile_spacing=(1, 1)))
        image.save('../data/dA_l2/pics/dAs_reconstructed_nhid'+str(da.n_hidden)+'_corr'+str(corruption)+'.png')

    image = Image.fromarray(tile_raster_images(X=valid_set_x.get_value(),img_shape=(28, 28), tile_shape=(10, 10),tile_spacing=(1, 1)))
    image.save('../data/dA_l2/pics/original.png')
	def setData(self, datasetPath):
		self._datasets = load_data( datasetPath )
		self._setTrainingData( self._datasets[0] )
		self._setValidateData( self._datasets[1] )
		self._setTestData( self._datasets[2] )

		print( "... Triple DataSet in position. \n" )
def test_stack_machine():

    # Load learning:
    save_path = "Saving/Stack_AE_theano"
    save_dir = "10-06_3L1000"
    load_dir = os.path.join(save_path,save_dir)
    stack_AE = SAE.load(load_dir)

    # Load the dataset:
    print("Loading dataset...")
    datasets = load_data('mnist.pkl.gz')
    train_set_x, train_set_y = datasets[0]
    test_set_x, test_set_y = datasets[2]

    reconstructed_layer_value, error = stack_AE.reconstruct(test_set_x)

    print("The error of reconstruction is:  {0}".format(error.eval()), "%")

    # Classification:
    # Perform the feed-forward pass for train & testing sets:
    train_deconstructed_layer_value = stack_AE.forward_encoding (train_set_x,
            0, stack_AE.architecture.shape[0])
    train_reconstructed_layer_value = stack_AE.forward_decoding(
            train_deconstructed_layer_value, 0, stack_AE.architecture.shape[0])

    test_deconstructed_layer_value = stack_AE.forward_encoding (test_set_x,0,
            stack_AE.architecture.shape[0])
    test_reconstructed_layer_value = stack_AE.forward_decoding(
            test_deconstructed_layer_value, 0, stack_AE.architecture.shape[0])

    # Classifiers:
    classifier = 'AdaBoostClassifier'
    print("Classifier used: ", classifier)
    print ("Learning the logistic regression without stack...")
    logReg_withoutStack = stack_AE.supervized_classification(
                train_set_x.eval(), train_set_y.eval(),
                classification_method= classifier)
    print ("Learning the logistic regression with stack...")
    logReg_afterStack = stack_AE.supervized_classification(
            train_reconstructed_layer_value.eval(), train_set_y.eval(),
            classification_method= classifier)

    # Performances:
    print("Without Stack_AE:")
    print ("Accuracy training set:", logReg_withoutStack.score(train_set_x.eval(),
                                                    train_set_y.eval()))
    print ("Accuracy test set:", logReg_withoutStack.score(test_set_x.eval(),
                                                    test_set_y.eval()))

    print("With Stack_AE:")
    print ("Accuracy training set:", logReg_afterStack.score(
                                    train_reconstructed_layer_value.eval(),
                                    train_set_y.eval()))

    print ("Accuracy test set:", logReg_afterStack.score(
                                    test_reconstructed_layer_value.eval(),
                                    test_set_y.eval()))

    return stack_AE
Exemplo n.º 12
0
def predict(dataset):
    """
    example of loading and running a model
    """
    # test on some examples from the test set
    datasets = load_data(dataset)
    test_set_x, test_set_y = datasets[2]
    test_set_x = test_set_x.get_value()
    pars = cPickle.load(open(BEST_PICKLEJAR))

    # load the saved weights and bias vectors
    for i, p in enumerate(pars):
        print("Checking loaded parameter %i type and shape..." % i)
        print(type(p))
        print(p.eval().shape)

    # symbolic vars for the data
    x = T.matrix('x')        # rasterized image data
    rng = numpy.random.RandomState(1234)

    # use our loaded params to init the model
    classifier = MLP(
        rng=rng,
        input=x,
        n_in=NINPUT,
        n_hidden1=NHIDDEN1,
        n_hidden2=NHIDDEN2,
        n_out=NOUT,
        W_hidden1=pars[0],
        b_hidden1=pars[1],
        W_hidden2=pars[2],
        b_hidden2=pars[3],
        W_logreg=pars[4],
        b_logreg=pars[5]
    )

    # compile a predictor fn
    #  use classifier.logRegressionLayer.p_y_given_x to look at the full
    #  softmax array prior to the argmax call.
    predict_model = theano.function(
        inputs=[classifier.input],
        outputs=classifier.logRegressionLayer.y_pred
        # outputs=classifier.logRegressionLayer.p_y_given_x
    )
    predict_model_show_probs = theano.function(
        inputs=[classifier.input],
        outputs=classifier.logRegressionLayer.p_y_given_x
    )

    show_size = 50

    predicted_values_probs = predict_model_show_probs(test_set_x[:show_size])
    predicted_values = predict_model(test_set_x[:show_size])
    print("Predicted values (probs) for the first %d:" % (show_size))
    print(predicted_values_probs)
    print("Predicted values for the first %d:" % (show_size))
    print(predicted_values)
    print("Actual values:")
    print(T.cast(test_set_y, 'int32').eval()[:show_size])
def main():
    dataset = '../subimages/cache.pkl.gz'
    datasets = load_data(dataset)
    epochs = 200
    batch_size=500
    rvals = [0.1, 0.05, 0.01, 0.005]
    kerns1vals = [10, 20]
    kerns2vals = [20, 50]
    hypers = list(itertools.product(rvals, kerns1vals, kerns2vals))
    names = ['r', 'k1', 'k2']
    trainerWrapper = lambda dim_in, r, k1, k2: Trainer(r, k1, k2, batch_size)

    xdata, ydata = datasets[0]
    xverify, yverify = datasets[1]
    xtest, ytest = datasets[2]

    xdata_ref = xdata.get_value(borrow=True)
    ydata_ref = ydata.eval()
    xverify_ref = xverify.get_value(borrow=True)
    yverify_ref = yverify.eval()
    xtest_ref = xtest.get_value(borrow=True)
    ytest_ref = ytest.eval()

    print 'Learning a convolutional neural network (CNN)'
    print '  dataset:            ', dataset
    print '  training size:      ', xdata_ref.shape
    print '  verify size:        ', xverify_ref.shape
    print '  test size:          ', xtest_ref.shape

    k = 3
    cross_epochs = 5
    print '... cross-validating'
    print '  k:                    ', k
    print '  cross-val epochs:     ', cross_epochs
    print '  batch size:           ', batch_size
    print '  params:               ', names
    for i in xrange(len(names)):
        print '  {0:3s} values:         '.format(names[i]), sorted(set([x[i] for x in hypers]))
    #best = crossvalidate(trainerWrapper, xdata_ref, ydata_ref, k, cross_epochs, batch_size, hypers, names)
    print 'Skipping cross-validation.  Using best value from previous run'
    best = [0.05, 20, 50]
    print '  best hyper-params:    ', names, ' = ', best

    classifier = trainerWrapper(0, *best)
    print '... training'
    print '  batch size:           ', batch_size
    print '  epochs:               ', epochs
    print '  hyper-params:         ', names, ' = ', best
    classifier.train(xdata_ref, ydata_ref, epochs, batch_size, valid_set_x=xverify_ref,
                     valid_set_y=yverify_ref, quiet=False)
    print '... testing'
    print '  training accuracy:    ', 1 - classifier.errors(xdata_ref, ydata_ref)
    print '  validation accuracy:  ', 1 - classifier.errors(xverify_ref, yverify_ref)
    print '  testing accuracy:     ', 1 - classifier.errors(xtest_ref, ytest_ref)
    print '... pickling'
    print '  classifier:           ', classifier
    filename = pickleSafely(classifier, 'classifier', '.pkl.gz')
    print '  saved to:             ', filename
Exemplo n.º 14
0
def main(dataset = 'mnist.pkl.gz',no_model = None, no_hid = None):
    # initiate hyper-parameters
    if no_model == None:
        no_model = 10
    if no_hid == None:
        no_hid = 5

    # data loading
    print 'loading data ...'
    datasets = load_data(dataset)
    x_tr, y_tr = datasets[0]
    x_va, y_va = datasets[1]
    x_te, y_te = datasets[2]

    x = T.matrix('x')
    y = T.ivector('y')

    no_train = x_tr.get_value(borrow=True).shape[0]
    dim = x_tr.get_value(borrow=True).shape[1]
    K = y_tr.eval().max() # 9 for mnsit

    # global variables
    weight = np.tile(1.0/no_train,(no_train,)) # weights for each instances
    alpha = np.zeros((no_model,))
    # model
    nets = []
    rng = np.random.RandomState(1234)

    for i in xrange(no_model):
        net = MLP(
            rng=rng,
            input=x,
            n_in=dim,
            n_hidden=no_hid,
            n_out=K+1
        )
        print('adding network %i into nets list')%(i)
        nets.append(net)

    # train model
    for i in xrange(no_model):
        # train model based on current weights
        # make prediction, compute weighted error/alpha
        # update weights

    return

def train_mlp():







if __name__=='__main__':
    main()
Exemplo n.º 15
0
def test_dA(learning_rate=0.1, training_epochs=15,
			dataset='./data/mnist.pkl.gz',
			batch_size=20, output_folder='dA_plots'):
	
	datasets = load_data(dataset)
	train_set_x, train_set_y = datasets[0]

	n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

	index = T.lscalar()
	x = T.matrix('x')

	if not os.path.isdir(output_folder):
		os.makedirs(output_folder)
	os.chdir(output_folder)

	rng = numpy.random.RandomState(123)
	theano_rng = RandomStreams(rng.randint(2 ** 30))

	da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x,
			n_visible=28*28, n_hidden=500)

	cost, updates = da.get_cost_updates(corruption_level=0.,
										learning_rate=learning_rate)

	train_da = theano.function([index], cost, updates=updates,
		givens={x: train_set_x[index * batch_size:
							   (index + 1) * batch_size]})

	start_time = time.clock();

	# training
	for epoch in xrange(training_epochs):
		c = []
		for batch_index in xrange(n_train_batches):
			c.append(train_da(batch_index))

		print 'Training epoch %d, cost ' % epoch, numpy.mean(c)

	end_time = time.clock()

	training_time = (end_time - start_time)

	print >> sys.stderr, ('The no corruption code for file ' +
						  os.path.split(__file__)[1] + 
						  ' ran for %.2fm' % ((training_time) / 60.))

	image = PIL.Image.fromarray(
		tile_raster_images(X=da.W.get_value(borrow=True).T,
						   img_shape=(28, 28), tile_shape=(10, 10),
						   tile_spacing=(1, 1)))

	image.save('filters_corruption_0.png')

	# training with corruption_level is 30% ......

	os.chdir('../')
def test_DimentionalReduction(learning_rate=0.1, training_epochs=15, dataset="mnist.pkl.gz", batch_size=20):
    datasets = load_data(dataset)
    train_set_x, train_set_y = datasets[0]

    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix("x")  # the data is presented as rasterized images

    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=28 * 28, n_hidden=2)

    cost, updates = da.get_cost_updates(corruption_level=0.0, learning_rate=learning_rate)

    train_da = theano.function(
        [index], cost, updates=updates, givens={x: train_set_x[index * batch_size : (index + 1) * batch_size]}
    )

    for epoch in xrange(training_epochs):
        # go through trainng set
        c = []
        for batch_index in xrange(n_train_batches):
            c.append(train_da(batch_index))

        print "Training epoch %d, cost " % epoch, numpy.mean(c)

    x = T.matrix("x")
    hidden_values_function = da.get_hidden_values2(x)

    result_function = theano.function(inputs=[x], outputs=hidden_values_function)

    colors = ["b", "g", "r", "c", "m", "y", "k", "w"]
    n = 0
    for x, y in zip(result_function(train_set_x.get_value()), train_set_y.eval()):
        if y < len(colors):
            plt.scatter(x[0], x[1], c=colors[y])
            n += 1

        if n > 2000:
            break

    plt.show()
    n = 0
    pca = PCA(n_components=2)
    for x, y in zip(pca.fit_transform(train_set_x.get_value()), train_set_y.eval()):
        if y < len(colors):
            plt.scatter(x[0], x[1], c=colors[y])
            n += 1

        if n > 2000:
            break

    plt.show()
Exemplo n.º 17
0
def test_stacked_autoencoder(
    finetune_lr=0.1,
    pretraining_epochs=15,
    pretrain_lr=0.001,
    training_epochs=100,
    dataset="mnist.pkl.gz",
    batch_size=1,
    hidden_layers_sizes=[1000, 1000, 1000],
    corruption_levels=[0.1, 0.2, 0.3],
    pretrain_flag=True,
    testerr_file="test_error.txt",
):
    datasets = load_data("../data/mnist.pkl.gz")
    train_set_x = datasets[0][0]
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    numpy_rng = np.random.RandomState(89677)

    print "building the model ..."

    sda = StackedDenoisingAutoencoder(numpy_rng, 28 * 28, hidden_layers_sizes, 10, corruption_levels)

    # Pre-training
    if pretrain_flag:
        print "getting the pre-training functions ..."
        pretraining_functions = sda.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size)

        print "pre-training the model ..."
        for i in xrange(sda.n_layers):
            for epoch in xrange(pretraining_epochs):
                c = []
                for batch_index in xrange(n_train_batches):
                    c.append(
                        pretraining_functions[i](index=batch_index, corruption=corruption_levels[i], lr=pretrain_lr)
                    )
                print "Pre-training layer %i, epoch %d, cost %f" % (i, epoch, np.mean(c))

    # Fine-tuning
    print "getting the fine-tuning functions ..."
    train_model, _, test_model = sda.build_finetune_functions(
        datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr
    )

    print "fine-tuning the model ..."

    epoch = 0
    fp = open(testerr_file, "w")
    while epoch < training_epochs:
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):
            train_model(minibatch_index)
        test_losses = test_model()
        test_score = np.mean(test_losses)
        print "Fine-tuning, epoch %d, test error %f" % (epoch, test_score * 100)
        fp.write("%d\t%f\n" % (epoch, test_score * 100))
    fp.close()
Exemplo n.º 18
0
def my_test():
    datasets = load_data('../data/mnist.pkl.gz')
    train_set_x, train_set_y = datasets[0]
    image_data = numpy.zeros((29 + 1, 29 * 10 - 1),
                             dtype='uint8')
    image_data[:,:,:] = tile_raster_images(X = train_set_x.get_value(borrow=True)[0:10],
                                           img_shape = (28,28),
                                           tile_shape = (1, 10),
                                           tile_spacing = (1, 1))
    image = PIL.Image.fromarray(image_data)
    image.save('samples.png')
Exemplo n.º 19
0
 def load2d(dataset='ISH.pkl.gz', toShuffleInput = False , withZeroMeaning = False):
     print 'loading data...'   
 
     datasets = load_data(dataset, toShuffleInput, withZeroMeaning)
 
     train_set_x, train_set_y = datasets[0]
 #     valid_set_x, valid_set_y = datasets[1]
 #     test_set_x, test_set_y = datasets[2]
     
     train_set_x = train_set_x.reshape(-1, 1, input_width, input_height)
     print(train_set_x.shape[0], 'train samples')
     return train_set_x, train_set_y
Exemplo n.º 20
0
def showDataset(dataset='data/mnist.pkl.gz'):
    """
    :type dataset: string
    :param dataset: path the the pickled dataset
    """

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    #type(train_set_x) => <class 'theano.tensor.sharedvar.TensorSharedVariable'>
    np_train_x = train_set_x.get_value(borrow=True)
    np_valid_x = valid_set_x.get_value(borrow=True)
    np_test_x = test_set_x.get_value(borrow=True)

    #type(train_set_y) => <class 'theano.tensor.basic.TensorVariable'>
    np_train_y = theanoTensor2NumpyArray(train_set_y)
    np_valid_y = theanoTensor2NumpyArray(valid_set_y)
    np_test_y = theanoTensor2NumpyArray(test_set_y)


    np_train_x0 = np_train_x[0]
    np_valid_x0 = np_valid_x[0]
    np_test_x0 = np_test_x[0]

    #varNames = "np_train_x[0] np_train_y np_valid_x[0] np_valid_y np_test_x[0] np_test_y".split(" ")
    #for varName in varNames:
    #    var = eval(varName)
    #    print(varName)
    #    print("len:" + str(len(var)))
    #    pprint(var)
    #
    #for varName in varNames: #print again for convenience
    #    var = eval(varName)
    #    print(varName + " len:" + str(len(var)))

    vars = [np_train_x0, np_train_y, np_valid_x0, np_valid_y, np_test_x0, np_test_y]
    var_names = locals()
    for var in vars:
        name = getVarNames(var, var_names)
        print(str(name) + ' start')
        pprint(var)
        print("len:%d max:%f min:%f"%(len(var), numpy.max(var), numpy.min(var)))
        print(str(name) + ' end\n')

    pairs = [(np_train_x, np_train_y), (np_valid_x, np_valid_y), (np_test_x, np_test_y)]
    for pair in pairs:
        if not (len(pair[0]) == len(pair[1])):
            name_x = getVarNames(pair[0], locals())
            name_y = getVarNames(pair[1], locals())
            print("WARNING: the lengths of %s & %s are different" % (name_x, name_y))
def test_DimentionalReduction(dataset='mnist.pkl.gz', pretraining_epochs=50, pretrain_lr=0.01, batch_size=5):
    datasets = load_data(dataset)
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]
    numpy_rng = numpy.random.RandomState(89677)
    
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    
    print '... building the model'
    # construct the stacked denoising autoencoder class
    sda = SdA(
        numpy_rng=numpy_rng,
        n_ins=28 * 28,
        hidden_layers_sizes=[300, 50, 2],
        n_outs=2
    )
    print '... getting the pretraining functions'
    pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x,
                                                batch_size=batch_size)
    corruption_levels = [0., 0., 0.]
    for i in xrange(sda.n_layers):
        # go through pretraining epochs
        for epoch in xrange(pretraining_epochs):
            # go through the training set
            c = []
            for batch_index in xrange(n_train_batches):
                c.append(pretraining_fns[i](index=batch_index,
                         corruption=corruption_levels[i],
                         lr=pretrain_lr))
            print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
            print numpy.mean(c)
    target = train_set_x.get_value()
    for dA_layer in sda.dA_layers:
        hidden_values_function = dA_layer.get_hidden_values2(sda.x)
        result_function = theano.function(inputs=[sda.x],outputs=hidden_values_function)
        target = result_function(target)
        print target

    colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k','w']
    n = 0
    for x,y in zip(target,train_set_y.eval()):
        if y < len(colors):
            plt.scatter(x[0], x[1],c=colors[y])
            n += 1
            
        if n > 2000:
            break
        
    plt.show()
Exemplo n.º 22
0
def test_columns(exclude_mode, models, valid_test='V'):
    dataset='mnist.pkl.gz'
    print '... Starting to test %i columns' % len(models)
    # create data hash that will be filled with data from different normalizations
    all_datasets = {}
    # instantiate multiple columns
    columns = []
    for model in models:
        # load model params
        f = open('./models/'+model)
        params = cPickle.load(f)
        nkerns, batch_size, normalized_width, distortion = cPickle.load(f)
        if all_datasets.get(normalized_width):
            datasets = all_datasets[normalized_width]
        else:
            datasets = load_data(dataset, normalized_width, 29)
            all_datasets[normalized_width] = datasets
        # no distortion during testing
        columns.append(DNNColumn(datasets, nkerns, batch_size, normalized_width, 0, params))
    print '... Forward propagating %i columns' % len(models)
    # call test on all of them recieving 10 outputs
    if valid_test=='V':
        model_outputs = [column.valid_outputs() for column in columns] 
        position_ds   = 1 
    else:
        model_outputs = [column.test_outputs() for column in columns]      
        position_ds   = 2
    # average 10 outputs
    avg_output = numpy.mean(model_outputs, axis=0)
    # argmax over them
    predictions = numpy.argmax(avg_output, axis=1)
    # compare predictions with true labels
    pred = T.ivector('pred')

    all_true_labels_length = theano.function([], all_datasets.values()[0][position_ds][1].shape)
    remainder = all_true_labels_length() - len(predictions)
    if exclude_mode and remainder:
        print '... Excluding FIRST %i points' % remainder
        true_labels = all_datasets.values()[0][position_ds][1][remainder:]
    elif remainder: # TODO: remove this, doesn't seem to make sense since the predictions would be misaligned
        print '... Excluding LAST %i points' % remainder
        true_labels = all_datasets.values()[0][position_ds][1][:len(predictions)]
    else:
        true_labels = all_datasets.values()[0][position_ds][1][:]

    error = theano.function([pred], T.mean(T.neq(pred, true_labels)))
    acc = error(predictions.astype(dtype=numpy.int32))
    print '....'
    print 'Error across %i columns: %f %%' % (len(models), 100*acc)
    return [predictions, acc]
Exemplo n.º 23
0
def load2d(test=False, cols=None):
    print 'loading data...'   

    datasets = load_data('ISH.pkl.gz',withZeroMeaning=False)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]
    
    batch_size = 5
    input_width = 300
    input_height = 140
    train_set_x = train_set_x.reshape(-1, 1, input_width, input_height)
    return train_set_x, train_set_y
Exemplo n.º 24
0
def run():
    
    datasets = load_data('mnist.pkl.gz')
    
    X, y = datasets[0]
    
    clf = MLPClassifier(28 * 28, 10, n_epochs = 10)
    
    clf.train(X.get_value(),y.eval())
    
    #print(X_train.get_value().shape[0])
    #X_train = shared(X_train.get_value())
    #print(X_train.get_value().shape[0])
    X_test, y_test = datasets[2]
    
    clf.fit(X_test,y_test)
Exemplo n.º 25
0
def fromPickledData(zipName):
    datasets = load_data(zipName)
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    Data = train_set_x + valid_set_x + test_set_x
#     Data = []
#     for i in xrange(train_set_x.get_value(borrow=True).shape[0]):
#         Data.append(train_set_x[i])
#     for i in xrange(valid_set_x.get_value(borrow=True).shape[0]):
#         Data.append(valid_set_x[i])
#     for i in xrange(test_set_x.get_value(borrow=True).shape[0]):
#         Data.append(test_set_x[i])
    
    return Data
Exemplo n.º 26
0
def test_dA(learning_rate=0.1,training_epochs=15,
           dataset='mnist.pkl.gz',
           batch_size=20,
           output_folder='dA_plots'):
    """
    :type learning_rate: float
    :param learning_rate: learning rate used for training the DeNosing
                          AutoEncoder

    :type training_epochs: int
    :param training_epochs: number of epochs used for training

    :type dataset: string
    :param dataset: path to the picked dataset
    """
    datasets=load_data(dataset)
    train_set_x, train_set_y = datasets[0]
    
    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size

    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)
        
    #os.chdir(output_folder)
    #os.chdir('../')

    ####################################
    # BUILDING THE MODEL NO CORRUPTION #
    ####################################    
    train_da,da=build_mdl(n_train_batches,batch_size,
                       0.,
                       learning_rate,train_set_x)

    train_mdl(train_da,da,training_epochs, 
              n_train_batches, 0.0,
              output_folder)
    #####################################
    # BUILDING THE MODEL CORRUPTION 30% #
    #####################################
    train_da,da=build_mdl(n_train_batches,batch_size,
                       0.3,
                       learning_rate,train_set_x)
    
    train_mdl(train_da,da,training_epochs, 
              n_train_batches, 0.3,
              output_folder)
Exemplo n.º 27
0
def train_mcdnn_column(normalized_width=0, n_epochs=800, trail=0):
    print '... train %i column of normalization %i' % (trail, normalized_width)
    print '... num_epochs %i' % (n_epochs)
    
    # load data using logistic_sgd width size normalization param
    # if normalized_width == 0 then the data_set comes without changes on width digit
    # this method load_data reshape all images from 28x28 to 29x29 with padding method
    # is important to use dataset on dnncolumn class
    datasets = load_data(dataset='mnist.pkl.gz', digit_normalized_width=normalized_width, digit_out_image_size=29)
    
    # initialize dnn column  with  dataset above
    column = DNNColumn(ds=datasets, normalized_width=normalized_width)
    column.train_column(n_epochs=n_epochs, init_learning_rate=0.1)
    
    #save the model with params from train
    filename = 'mcdnn_nm%i_trail%i_Layers_time_%i' % (normalized_width, trail, int(time.time()))
    column.save(filename)
Exemplo n.º 28
0
def load2d(num_labels,outputFile=None, input_width=300, input_height=140,end_index=16351,MULTI_POSITIVES=20,dropout_percent=0.1, dataset='ISH.pkl.gz', toShuffleInput = False , withZeroMeaning = False):
    print 'loading data...'   

    datasets = load_data(dataset, toShuffleInput=toShuffleInput, withZeroMeaning=withZeroMeaning,end_index=end_index,MULTI_POSITIVES=MULTI_POSITIVES,dropout_percent=dropout_percent, labelset=num_labels)

    train_set_x, train_set_y = datasets[0]
#     valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]
    
    train_set_x = train_set_x.reshape(-1, 1, input_width, input_height)
#         valid_set_x = valid_set_x.reshape(-1, 1, input_width, input_height)
    test_set_x = test_set_x.reshape(-1, 1, input_width, input_height)

    print(train_set_x.shape[0], 'train samples')
    if outputFile is not None:
        outputFile.write("Number of training examples: "+str(train_set_x.shape[0]) + "\n\n")
    return train_set_x, train_set_y, test_set_x, test_set_y
def evaluate_lenet5(learning_rate=0.1, n_epochs=2,
                    dataset='mnist.pkl.gz',
                    nkerns=[20, 50], batch_size=500):
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """


    datasets = load_data(dataset)

    (n_train_batches, n_valid_batches, n_test_batches,
     train_model, validate_model, test_model) = \
        build_models(learning_rate, datasets, nkerns, batch_size)


    ###############
    # TRAIN MODEL #
    ###############
    print '... training'

    start_time = time.clock()
    (best_validation_loss, best_iter, test_score) = \
        train_models(n_epochs, n_train_batches, n_valid_batches, n_test_batches,
                       train_model, validate_model, test_model)
    end_time = time.clock()

    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
Exemplo n.º 30
0
def minifyDataset(input, output, minify_rate=0.01):
    usage = 'usage: minify_dataset.py <input_file> <output_file>'
    if not input:
        print(usage)
        return False
    if not os.path.exists(input):
        print('file ' + input + ' not found')
        return False
    if not output:
        print(usage)
        return False
    if os.path.exists(output):
        print('output file ' + output + ' already exists')
        return False
    print('read ' + input)
    datasets = load_data(input)
    
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    orig_train_len = len(train_set_x.get_value(borrow=True))
    orig_valid_len = len(valid_set_x.get_value(borrow=True))
    orig_test_len = len(test_set_x.get_value(borrow=True))

    mini_train_len = int(math.floor(orig_train_len * minify_rate))
    mini_valid_len = int(math.floor(orig_valid_len * minify_rate))
    mini_test_len = int(math.floor(orig_test_len * minify_rate))

    minified_train_x = theanoTensor2NumpyArray(train_set_x[0:mini_train_len])
    minified_valid_x = theanoTensor2NumpyArray(valid_set_x[0:mini_valid_len])
    minified_test_x = theanoTensor2NumpyArray(test_set_x[0:mini_test_len])
    
    minified_train_y = theanoTensor2NumpyArray(train_set_y[0:mini_train_len])
    minified_valid_y = theanoTensor2NumpyArray(valid_set_y[0:mini_valid_len])
    minified_test_y = theanoTensor2NumpyArray(test_set_y[0:mini_test_len])

    data = ((minified_train_x, minified_train_y),(minified_valid_x, minified_valid_y), (minified_test_x, minified_test_y))

    f = gzip.open(output, 'wb')
    cPickle.dump(data, f, -1)
    f.close()

    print('output to ' + output)
    return True
Exemplo n.º 31
0
Arquivo: rbm.py Projeto: vd114/galatea
def test_rbm(learning_rate=0.1,
             training_epochs=15,
             dataset='../data/mnist.pkl.gz',
             batch_size=20,
             n_chains=20,
             n_samples=10,
             output_folder='rbm_plots',
             n_hidden=500):
    """
    Demonstrate how to train and afterwards sample from it using Theano.

    This is demonstrated on MNIST.

    :param learning_rate: learning rate used for training the RBM

    :param training_epochs: number of epochs used for training

    :param dataset: path the the pickled dataset

    :param batch_size: size of a batch used to train the RBM

    :param n_chains: number of parallel Gibbs chains to be used for sampling

    :param n_samples: number of samples to plot for each chain

    """
    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images

    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2**30))

    # initialize storage for the persistent chain (state = hidden layer of chain)
    persistent_chain = theano.shared(
        numpy.zeros((batch_size, n_hidden), dtype=theano.config.floatX))

    # construct the RBM class
    rbm = RBM( input = x, n_visible=28*28, \
               n_hidden = n_hidden, numpy_rng = rng, theano_rng = theano_rng)

    # get the cost and the gradient corresponding to one step of CD-15
    cost, updates = rbm.get_cost_updates(lr=learning_rate,
                                         persistent=persistent_chain,
                                         k=15)

    #################################
    #     Training the RBM          #
    #################################

    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)
    os.chdir(output_folder)

    # it is ok for a theano function to have no output
    # the purpose of train_rbm is solely to update the RBM parameters
    train_rbm = theano.function(
        [index],
        cost,
        updates=updates,
        givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]},
        name='train_rbm')

    plotting_time = 0.
    start_time = time.clock()

    # go through training epochs
    for epoch in xrange(training_epochs):

        # go through the training set
        mean_cost = []
        for batch_index in xrange(n_train_batches):
            mean_cost += [train_rbm(batch_index)]

        print 'Training epoch %d, cost is ' % epoch, numpy.mean(mean_cost)

        # Plot filters after each training epoch
        plotting_start = time.clock()
        # Construct image from the weight matrix
        image = PIL.Image.fromarray(
            tile_raster_images(X=rbm.W.get_value(borrow=True).T,
                               img_shape=(28, 28),
                               tile_shape=(10, 10),
                               tile_spacing=(1, 1)))
        image.save('filters_at_epoch_%i.png' % epoch)
        plotting_stop = time.clock()
        plotting_time += (plotting_stop - plotting_start)

    end_time = time.clock()

    pretraining_time = (end_time - start_time) - plotting_time

    print('Training took %f minutes' % (pretraining_time / 60.))

    #################################
    #     Sampling from the RBM     #
    #################################

    # find out the number of test samples
    number_of_test_samples = test_set_x.get_value(borrow=True).shape[0]

    # pick random test examples, with which to initialize the persistent chain
    test_idx = rng.randint(number_of_test_samples - n_chains)
    persistent_vis_chain = theano.shared(
        numpy.asarray(test_set_x.get_value(borrow=True)[test_idx:test_idx +
                                                        n_chains],
                      dtype=theano.config.floatX))

    plot_every = 1000
    # define one step of Gibbs sampling (mf = mean-field)
    # define a function that does `plot_every` steps before returning the sample for
    # plotting
    [presig_hids, hid_mfs, hid_samples, presig_vis, vis_mfs, vis_samples], updates =  \
                        theano.scan(rbm.gibbs_vhv,
                                outputs_info = [None, None,None,None,None,persistent_vis_chain],
                                n_steps = plot_every)

    # add to updates the shared variable that takes care of our persistent
    # chain :.
    updates.update({persistent_vis_chain: vis_samples[-1]})
    # construct the function that implements our persistent chain.
    # we generate the "mean field" activations for plotting and the actual
    # samples for reinitializing the state of our persistent chain
    sample_fn = theano.function([], [vis_mfs[-1], vis_samples[-1]],
                                updates=updates,
                                name='sample_fn')

    # create a space to store the image for plotting ( we need to leave
    # room for the tile_spacing as well)
    image_data = numpy.zeros((29 * n_samples + 1, 29 * n_chains - 1),
                             dtype='uint8')
    for idx in xrange(n_samples):
        # generate `plot_every` intermediate samples that we discard, because successive samples in the chain are too correlated
        vis_mf, vis_sample = sample_fn()
        print ' ... plotting sample ', idx
        image_data[29 * idx:29 * idx + 28, :] = tile_raster_images(
            X=vis_mf,
            img_shape=(28, 28),
            tile_shape=(1, n_chains),
            tile_spacing=(1, 1))
        # construct image

    image = PIL.Image.fromarray(image_data)
    image.save('samples.png')
    os.chdir('../')
Exemplo n.º 32
0
def cg_optimization_mnist(n_epochs=50, mnist_pkl_gz='mnist.pkl.gz'):
    """Demonstrate conjugate gradient optimization of a log-linear model

    This is demonstrated on MNIST.

    :type n_epochs: int
    :param n_epochs: number of epochs to run the optimizer

    :type mnist_pkl_gz: string
    :param mnist_pkl_gz: the path of the mnist training file from
                 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz

    """
    #############
    # LOAD DATA #
    #############
    datasets = load_data(mnist_pkl_gz)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    batch_size = 600  # size of the minibatch

    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    n_in = 28 * 28  # number of input units
    n_out = 10  # number of output units

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    minibatch_offset = T.lscalar()  # offset to the start of a [mini]batch
    x = T.matrix()  # the data is presented as rasterized images
    y = T.ivector()  # the labels are presented as 1D vector of
    # [int] labels

    # construct the logistic regression class
    classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10)

    # the cost we minimize during training is the negative log likelihood of
    # the model in symbolic format
    cost = classifier.negative_log_likelihood(y).mean()

    # compile a theano function that computes the mistakes that are made by
    # the model on a minibatch
    test_model = theano.function(
        [minibatch_offset],
        classifier.errors(y),
        givens={
            x: test_set_x[minibatch_offset:minibatch_offset + batch_size],
            y: test_set_y[minibatch_offset:minibatch_offset + batch_size]
        },
        name="test")

    validate_model = theano.function(
        [minibatch_offset],
        classifier.errors(y),
        givens={
            x: valid_set_x[minibatch_offset:minibatch_offset + batch_size],
            y: valid_set_y[minibatch_offset:minibatch_offset + batch_size]
        },
        name="validate")

    #  compile a theano function that returns the cost of a minibatch
    batch_cost = theano.function(
        [minibatch_offset],
        cost,
        givens={
            x: train_set_x[minibatch_offset:minibatch_offset + batch_size],
            y: train_set_y[minibatch_offset:minibatch_offset + batch_size]
        },
        name="batch_cost")

    # compile a theano function that returns the gradient of the minibatch
    # with respect to theta
    batch_grad = theano.function(
        [minibatch_offset],
        T.grad(cost, classifier.theta),
        givens={
            x: train_set_x[minibatch_offset:minibatch_offset + batch_size],
            y: train_set_y[minibatch_offset:minibatch_offset + batch_size]
        },
        name="batch_grad")

    # creates a function that computes the average cost on the training set
    def train_fn(theta_value):
        classifier.theta.set_value(theta_value, borrow=True)
        train_losses = [
            batch_cost(i * batch_size) for i in xrange(n_train_batches)
        ]
        return numpy.mean(train_losses)

    # creates a function that computes the average gradient of cost with
    # respect to theta
    def train_fn_grad(theta_value):
        classifier.theta.set_value(theta_value, borrow=True)
        grad = batch_grad(0)
        for i in xrange(1, n_train_batches):
            grad += batch_grad(i * batch_size)
        return grad / n_train_batches

    validation_scores = [numpy.inf, 0]

    # creates the validation function
    def callback(theta_value):
        classifier.theta.set_value(theta_value, borrow=True)
        #compute the validation loss
        validation_losses = [
            validate_model(i * batch_size) for i in xrange(n_valid_batches)
        ]
        this_validation_loss = numpy.mean(validation_losses)
        print('validation error %f %%' % (this_validation_loss * 100., ))

        # check if it is better then best validation score got until now
        if this_validation_loss < validation_scores[0]:
            # if so, replace the old one, and compute the score on the
            # testing dataset
            validation_scores[0] = this_validation_loss
            test_losses = [
                test_model(i * batch_size) for i in xrange(n_test_batches)
            ]
            validation_scores[1] = numpy.mean(test_losses)

    ###############
    # TRAIN MODEL #
    ###############

    # using scipy conjugate gradient optimizer
    import scipy.optimize
    print("Optimizing using scipy.optimize.fmin_cg...")
    start_time = timeit.default_timer()
    best_w_b = scipy.optimize.fmin_cg(f=train_fn,
                                      x0=numpy.zeros((n_in + 1) * n_out,
                                                     dtype=x.dtype),
                                      fprime=train_fn_grad,
                                      callback=callback,
                                      disp=0,
                                      maxiter=n_epochs)
    end_time = timeit.default_timer()
    print(('Optimization complete with best validation score of %f %%, with '
           'test performance %f %%') %
          (validation_scores[0] * 100., validation_scores[1] * 100.))

    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.1fs' % ((end_time - start_time)))
Exemplo n.º 33
0
def test_ssDA(finetune_lr=0.1, pretraining_epochs=15,
             pretrain_lr=0.001, training_epochs=1000,
             dataset='mnist.pkl.gz', batch_size=1):
    """
    Demonstrates how to train and test a stochastic denoising autoencoder.

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used in the finetune stage
    (factor for the stochastic gradient)

    :type pretraining_epochs: int
    :param pretraining_epochs: number of epoch to do pretraining

    :type pretrain_lr: float
    :param pretrain_lr: learning rate to be used during pre-training

    :type n_iter: int
    :param n_iter: maximal number of iterations ot run the optimizer

    :type dataset: string
    :param dataset: path the the pickled dataset

    """
    xtropy_fraction = 1
    dir_pretrained = '../data/train_snapshots/stacked_sda/'
    path_finetuned_pre = '../data/train_snapshots/stacked_sda/stackedSDA_pretrainedxtropy2.p'#'/Users/vmisra/data/deepCompress_data/stackedSDA_xtropy1params.p'#../data/train_snapshots/stacked_sda/stackedSDA_pretrainedxtropy.p'
    path_finetuned_post = '../data/train_snapshots/stacked_sda/stackedSDA_finetunedxtropy2.p'#'/Users/vmisra/data/deepCompress_data/stackedSDA_prextropy1_postxtropy0_B.p'#../data/train_snapshots/stacked_sda/stackedSDA_prextropy1_postxtropy0.p'

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]
    
    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size

    # numpy random generator
    # start-snippet-3
    numpy_rng = numpy.random.RandomState(89677)
    print '... building the model'
    # construct the stacked denoising autoencoder class
    ssda = ssDA(
        numpy_rng=numpy_rng,
        n_ins=28 * 28,
        hidden_layers_sizes=[1000, 1000, 1000, 15],
        f_load_SDA = open('../data/Stacked_DA_params.p','r'),
        xtropy_fraction=xtropy_fraction
    )
    #########################
    # PRETRAINING THE MODEL #
    #########################
    print '... getting the pretraining functions'
    pretraining_fns = ssda.pretraining_functions(train_set_x=train_set_x,
                                                batch_size=batch_size)

    print '... pre-training the model'
    start_time = time.clock()
    ## Pre-train layer-wise
    corruption_levels = [.1, .2, .3, .3]#[0] #[.1, .2, .3]

    for i in xrange(ssda.n_layers):
        layerpath = dir_pretrained+ 'layer'+str(i)+'_snapshot_stacked_sda2.p'

        if os.path.isfile(layerpath):
           ssda.load(open(layerpath,'r'))
           continue

        # go through pretraining epochs
        for epoch in xrange(pretraining_epochs):

            # go through the training set
            c = []
            for batch_index in xrange(n_train_batches):
                c.append(pretraining_fns[i](index=batch_index,
                         corruption=corruption_levels[i],
                         lr=pretrain_lr))
            print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
            print numpy.mean(c)
            
        #COPY OVER PRETRAINED PARAMS FROM DA'S TO HIDDEN SIGMOIDS
        ssda.sigmoid_layers[i].W.set_value(ssda.dA_layers[i].W.eval())
        ssda.sigmoid_layers[i].b.set_value(ssda.dA_layers[i].b.eval())
        ssda.out_sigmoid_layers[-i-1].W.set_value(ssda.dA_layers[i].W.T.eval())
        ssda.out_sigmoid_layers[-i-1].b.set_value(ssda.dA_layers[i].b_prime.get_value())
        
        #dump snapshot
        ssda.dump(open(layerpath,'w'))

    end_time = time.clock()

    print >> sys.stderr, ('The pretraining code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))

    ########################
    # FINETUNING THE MODEL
    ########################
    #pre-load partially finetuned version, if it exists
    if os.path.isfile(path_finetuned_pre):
        ssda.load(open(path_finetuned_pre,'r'))
    
    # get the training, validation and testing function for the model
    print '... getting the finetuning functions'
    train_fn, validate_model, test_model, valid_xtropy_logloss = ssda.build_finetune_functions(
        datasets=datasets,
        batch_size=batch_size,
        learning_rate=finetune_lr
    )
    
    print '... finetuning the model'

    # early-stopping parameters
    patience = 10 * n_train_batches  # look as this many examples regardless
    patience_increase = 2.  # wait this much longer when a new best is
                            # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = time.clock()

    done_looping = False
    epoch = 0

    while (epoch < training_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_cost = train_fn(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                validation_losses = validate_model()
                this_validation_loss = numpy.mean(validation_losses)
#                xtropy_logloss_loss = valid_xtropy_logloss()
#                xtropy_loss = [x[0] for x in xtropy_logloss_loss]
                print('epoch %i, minibatch %i/%i, validation error %f  %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100., ))
                ssda.dump(open(path_finetuned_post,'w'))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if (
                        this_validation_loss < best_validation_loss *
                        improvement_threshold
                    ):
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = test_model()
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))
        

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print(
        (
            'Optimization complete with best validation score of %f %%, '
            'on iteration %i, '
            'with test performance %f %%'
        )
        % (best_validation_loss * 100., best_iter + 1, test_score * 100.)
    )
    print >> sys.stderr, ('The training code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
    ssda.dump(open(path_finetuned_post,'w'))

    return ssda
Exemplo n.º 34
0
def test_ssDA_nopretraining(finetune_lr=0.1, pretraining_epochs=15,
             pretrain_lr=0.001, training_epochs=1000,
             dataset='mnist.pkl.gz', batch_size=1,
             data_dir = '../data/'):
    xtropy_fraction = 1
    path_finetuned_pre = os.path.join(data_dir,'train_snapshots/stacked_sda/stackedSDA_nopretrained_ReLU.p')
    path_finetuned_post = os.path.join(data_dir,'train_snapshots/stacked_sda/stackedSDA_nopretrained_ReLU_post.p')
    path_stacked_da = os.path.join(data_dir,'Stacked_DA_params.p')

    datasets = load_data(os.path.join(data_dir,dataset))

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]
    
    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size

    # numpy random generator
    # start-snippet-3
    numpy_rng = numpy.random.RandomState(89677)
    print '... building the model'
    # construct the stacked denoising autoencoder class
    ssda = ssDA(
        numpy_rng=numpy_rng,
        n_ins=28 * 28,
        hidden_layers_sizes=[1000, 1000, 1000, 15],
        f_load_SDA = open(path_stacked_da,'r'),
        xtropy_fraction=xtropy_fraction
    )

    #finetune training
    #pre-load partially finetuned version, if it exists
    if os.path.isfile(path_finetuned_pre):
        ssda.load(open(path_finetuned_pre,'r'))
    
    # get the training, validation and testing function for the model
    print '... getting the finetuning functions'
    train_fn, validate_model, test_model, valid_xtropy_logloss = ssda.build_finetune_functions(
        datasets=datasets,
        batch_size=batch_size,
        learning_rate=finetune_lr
    )
    
    print '... finetuning the model'

    # early-stopping parameters
    patience = 10 * n_train_batches  # look as this many examples regardless
    patience_increase = 2.  # wait this much longer when a new best is
                            # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = time.clock()

    done_looping = False
    epoch = 0

    while (epoch < training_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_cost = train_fn(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                validation_losses = validate_model()
                this_validation_loss = numpy.mean(validation_losses)
#                xtropy_logloss_loss = valid_xtropy_logloss()
#                xtropy_loss = [x[0] for x in xtropy_logloss_loss]
                print('epoch %i, minibatch %i/%i, validation error %f  %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100., ))
                ssda.dump(open(path_finetuned_post,'w'))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if (
                        this_validation_loss < best_validation_loss *
                        improvement_threshold
                    ):
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = test_model()
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))
        

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print(
        (
            'Optimization complete with best validation score of %f %%, '
            'on iteration %i, '
            'with test performance %f %%'
        )
        % (best_validation_loss * 100., best_iter + 1, test_score * 100.)
    )
    print >> sys.stderr, ('The training code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
    ssda.dump(open(path_finetuned_post,'w'))

    return ssda
Exemplo n.º 35
0
def test_rbm(dataset,learning_rate=0.1, training_epochs=5, batch_size=4,n_chains=4,
             n_hidden=7):
    """
    Demonstrate how to train and afterwards sample from it using Theano.

    This is demonstrated on MNIST.

    :param learning_rate: learning rate used for training the RBM

    :param training_epochs: number of epochs used for training

    :param dataset: numpy array

    :param batch_size: size of a batch used to train the RBM

    :param n_chains: number of parallel Gibbs chains to be used for sampling

    :param n_samples: number of samples to plot for each chain

    """
    datasets = load_data(dataset)

    #change thissssssssssssssssss
    train_set_x, train_set_y = datasets[0]

    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.shape[0] // batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()    # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    shared_x = theano.shared(train_set_x)
    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    # initialize storage for the persistent chain (state = hidden
    # layer of chain)
    persistent_chain = theano.shared(numpy.zeros((batch_size, n_hidden),
                                                 dtype=theano.config.floatX),
                                     borrow=True)

    # construct the RBM class
    rbm = RBM(input=x, n_visible=n_hidden,
              n_hidden=n_hidden, numpy_rng=rng, theano_rng=theano_rng)

    # get the cost and the gradient corresponding to one step of CD-15
    cost, updates = rbm.get_cost_updates(lr=learning_rate,
                                         persistent=persistent_chain, k=15)

    #################################
    #     Training the RBM          #
    #################################

    # start-snippet-5
    # it is ok for a theano function to have no output
    # the purpose of train_rbm is solely to update the RBM parameters
    train_rbm = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: shared_x[index * batch_size: (index + 1) * batch_size]
        },
        name='train_rbm'
    )

    start_time = timeit.default_timer()

    #change thissssssssssssssssss
    # go through training epochs
    for epoch in range(training_epochs):

        # go through the training set
        mean_cost = []
        for batch_index in range(n_train_batches):
            mean_cost += [train_rbm(batch_index)]

        print('Training epoch %d, cost is ' % epoch, numpy.mean(mean_cost))


    end_time = timeit.default_timer()

    pretraining_time = (end_time - start_time)

    print ('Training took %f minutes' % (pretraining_time / 60.))
    # end-snippet-5 start-snippet-6
    #################################
    #     Sampling from the RBM     #
    #################################
    #change thissssssssssssssssss
    # find out the number of test samples
    number_of_test_samples = test_set_x.shape[0]
    print(number_of_test_samples)

    # pick random test examples, with which to initialize the persistent chain
    test_idx = rng.randint(number_of_test_samples - n_chains)
    persistent_vis_chain = theano.shared(
        numpy.asarray(
            test_set_x[test_idx:test_idx + n_chains],
            dtype=theano.config.floatX
        )
    )
    # end-snippet-6 start-snippet-7
    plot_every = 1000
    # define one step of Gibbs sampling (mf = mean-field) define a
    # function that does `plot_every` steps before returning the
    # sample for plotting
    (
        [
            presig_hids,
            hid_mfs,
            hid_samples,
            presig_vis,
            vis_mfs,
            vis_samples
        ],
        updates
    ) = theano.scan(
        rbm.gibbs_vhv,
        outputs_info=[None, None, None, None, None, persistent_vis_chain],
        n_steps=plot_every,
        name="gibbs_vhv"
    )

    # add to updates the shared variable that takes care of our persistent
    # chain :.
    #change thissssssssssssssssss
    updates.update({persistent_vis_chain: vis_samples[-1]})
    # construct the function that implements our persistent chain.
    # we generate the "mean field" activations for plotting and the actual
    # samples for reinitializing the state of our persistent chain
    sample_fn = theano.function(
        [],
        [
            vis_mfs[-1],
            vis_samples[-1]
        ],
        updates=updates,
        name='sample_fn'
    )
    W =rbm.W.get_value(borrow=True).T
    H =rbm.hbias.get_value(borrow=True).T
    V = rbm.vbias.get_value(borrow=True).T
    print(W)
    print(H)
    print(V)

    print("\n\n\nEnhanced Feature Matrix: ")
    temp = numpy.dot(dataset, numpy.transpose(W))
    print(temp)
    dataframe = pd.DataFrame(data=temp.astype(float))
    dataframe.to_csv('enhancedFMatrix.csv', sep=' ', header=False, float_format='%.4f', index=False)
    return temp
Exemplo n.º 36
0
    import cPickle
    n_ins, hidden_layers_sizes, n_outs, corruption_levels, params = cPickle.load(open(filename, 'rb'))
    nsda = SdA(n_ins = n_ins, 
               hidden_layers_sizes = hidden_layers_sizes, 
               n_outs = n_outs, 
               corruption_levels = corruption_levels)
    for ele, data in zip(nsda.params, params):
        ele.set_value(data)
    print nsda
    return nsda 

if __name__ == '__main__':
    
    train_ = 0
    if train_: 
        datasets = load_data('../../../Data/mnist/mnist.pkl.gz', 1000)

        sda = SdA(n_ins=28 * 28,
                  hidden_layers_sizes=[10, 10, 10],
                  n_outs=10)

        sda.fit(datasets)

        raw_dump(sda, 'testraw.dat')
        raw_load('testraw.dat')

    test_ = 1
    if test_:
        sda = raw_load('testraw.dat')
        sda.
#here
Exemplo n.º 37
0
def test_DBN(finetune_lr=0.1,pretraining_epochs=100,
             pretrain_lr=0.01,k=1,training_epoch=1000,
             dataset='../data/mnist.pkl.gz',batch_size=10):
    """
    定义训练和测试深度置信网络的函数
    :param finetune_lr: float   微调阶段的学习率
    :param pretraining_epochs: int 进行预训练的迭代次数
    :param pretrain_lr: float 预训练阶段的学习率
    :param training_epoch: int 进行训练的迭代次数
    :param dataset: str 数据集的路径
    :param batch_size: int minibatch的大小
    :return:
    """
    #########################
    #     模型初始化过程     #
    #########################
    datasets=load_data(dataset)
    train_set_x,train_set_y=datasets[0]
    valid_set_x,valid_set_y=datasets[1]
    test_set_x,test_set_y=datasets[2]

    #计算minibatch的数量
    n_train_batches=train_set_x.get_value(borrow=True).shape[0]/batch_size
    #numpy生成的随机数种子
    numpy_rng=numpy.random.RandomState(123)
    print '...building the model'
    #实例化DBN,有三个隐层
    dbn=DBN(numpy_rng,n_ins=28*28,hidden_layers_sizes=[1000,1000,1000],n_outs=10)

    #########################
    #     模型预训练过程     #
    #########################
    print "...getting the pretraining functions"
    pretraining_fns=dbn.pretraining_functions(train_set_x=train_set_x,batch_size=batch_size,k=k)

    print "...pretraining the model"
    start_time=time.clock()
    #逐层预训练
    for i in xrange(dbn.n_layers):
        #遍历训练次数
        for epoch in xrange(pretraining_epochs):
            #遍历每个minibatch
            c=[] #定义储存RBM中cost的列表
            for batch_index in xrange(n_train_batches):
                c.append(pretraining_fns[i](index=batch_index,lr=pretrain_lr))
            print "pretraining layer %i, epoch %i,cost " %(i,epoch),
            print numpy.mean(c)
    end_time=time.clock()
    print >>sys.stderr,("The pretraining code for file "+
                        os.path.split(__file__)[1]+
                        " ran for %0.2fm")%((end_time-start_time)/60.)

    #########################
    #       模型微调过程     #
    #########################
    #构造微调过程的训练函数、验证函数和测试函数
    print "...getting finetuning functions"
    train_fn,valid_model,test_model=dbn.build_finetune_function(datasets=datasets,
                                    batch_size=batch_size,learning_rate=finetune_lr)

    print "...finetuning the model"
    #提前结束的参数设置
    patience=4*n_train_batches
    patience_increase=2.
    improvement_threshold=0.995 #每次优化效果阈值
    #在验证集合检查minibatch,
    #该程序中每个epoch都要检查
    validation_frequency=min(n_train_batches,patience/2)
    #微调过程初始参数设置
    best_params=None
    best_validation_loss=numpy.inf
    test_score=0.
    start_time=time.clock()
    done_looping=False
    epoch=0

    #设置终止条件:大于设定的迭代次数或者达到don_looping
    while(epoch<training_epoch)and(not done_looping):
        epoch+=1
        for minibath_index in xrange(n_train_batches):
            minibath_avg_lost=train_fn(minibath_index)
            iter=(epoch-1)*n_train_batches+minibath_index #当前minibath的总索引

            #判断是否达到validation_frequency
            if (iter+1)%validation_frequency==0:
                validation_losses=valid_model()
                this_validation_loss=numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%') %\
                     (epoch,minibath_index+1,n_train_batches,this_validation_loss*100)

                #如果当前代价值优于历史代价值
                if this_validation_loss<best_validation_loss:
                    if this_validation_loss<best_validation_loss*improvement_threshold:
                        patience=max(patience,iter*patience_increase)

                    #保存最优验证值和minibatch索引
                    best_validation_loss=this_validation_loss
                    best_iter=iter

                    #在测试集进行测试
                    test_losses=test_model()
                    test_score=numpy.mean(test_losses)
                    print "epoch %i, minibath %i/%i, test error of best model %f %%" %\
                    (epoch,minibath_index+1,n_train_batches,test_score*100)

            if patience<=iter:
                done_looping=True
                break
    end_time=time.clock()
    print "Optimizaiton complete with best validation score of %f %%,"\
          "with best performance %f %%"%(best_validation_loss*100.,test_score*100.)

    print >> sys.stderr, ("The fine tuning code for file "+
                          os.path.split(__file__)[1]+
                          " ran for %.2fm")%((end_time-start_time)/60.)
Exemplo n.º 38
0
def test_dA(learning_rate=0.1,
            training_epochs=15,
            dataset='mnist.pkl.gz',
            batch_size=20,
            output_folder='dA_plots'):
    """
    This demo is tested on MNIST

    :type learning_rate: float
    :param learning_rate: learning rate used for training the DeNosing
                          AutoEncoder

    :type training_epochs: int
    :param training_epochs: number of epochs used for training

    :type dataset: string
    :param dataset: path to the picked dataset

    """
    datasets = load_data(dataset)
    train_set_x, train_set_y = datasets[0]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    # start-snippet-2
    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    # end-snippet-2

    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)
    os.chdir(output_folder)

    ####################################
    # BUILDING THE MODEL NO CORRUPTION #
    ####################################

    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2**30))

    da = dA(numpy_rng=rng,
            theano_rng=theano_rng,
            input=x,
            n_visible=28 * 28,
            n_hidden=500)

    cost, updates = da.get_cost_updates(corruption_level=0.,
                                        learning_rate=learning_rate)

    train_da = theano.function(
        [index],
        cost,
        updates=updates,
        givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]})

    start_time = time.clock()

    ############
    # TRAINING #
    ############

    # go through training epochs
    for epoch in xrange(training_epochs):
        # go through trainng set
        c = []
        for batch_index in xrange(n_train_batches):
            c.append(train_da(batch_index))

        print 'Training epoch %d, cost ' % epoch, numpy.mean(c)

    end_time = time.clock()

    training_time = (end_time - start_time)

    print >> sys.stderr, ('The no corruption code for file ' +
                          os.path.split(__file__)[1] + ' ran for %.2fm' %
                          ((training_time) / 60.))
    image = Image.fromarray(
        tile_raster_images(X=da.W.get_value(borrow=True).T,
                           img_shape=(28, 28),
                           tile_shape=(10, 10),
                           tile_spacing=(1, 1)))
    image.save('filters_corruption_0.png')

    # start-snippet-3
    #####################################
    # BUILDING THE MODEL CORRUPTION 30% #
    #####################################

    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2**30))

    da = dA(numpy_rng=rng,
            theano_rng=theano_rng,
            input=x,
            n_visible=28 * 28,
            n_hidden=500)

    cost, updates = da.get_cost_updates(corruption_level=0.3,
                                        learning_rate=learning_rate)

    train_da = theano.function(
        [index],
        cost,
        updates=updates,
        givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]})

    start_time = time.clock()

    ############
    # TRAINING #
    ############

    # go through training epochs
    for epoch in xrange(training_epochs):
        # go through trainng set
        c = []
        for batch_index in xrange(n_train_batches):
            c.append(train_da(batch_index))

        print 'Training epoch %d, cost ' % epoch, numpy.mean(c)

    end_time = time.clock()

    training_time = (end_time - start_time)

    print >> sys.stderr, ('The 30% corruption code for file ' +
                          os.path.split(__file__)[1] + ' ran for %.2fm' %
                          (training_time / 60.))
    # end-snippet-3

    # start-snippet-4
    image = Image.fromarray(
        tile_raster_images(X=da.W.get_value(borrow=True).T,
                           img_shape=(28, 28),
                           tile_shape=(10, 10),
                           tile_spacing=(1, 1)))
    image.save('filters_corruption_30.png')
    # end-snippet-4

    os.chdir('../')
Exemplo n.º 39
0
Arquivo: dA.py Projeto: slykid/Python3
def test_dA(learning_rate=0.1,
            training_epochs=15,
            dataset='mnist.pkl.gz',
            batch_size=20,
            output_folder='dA_plots'):

    datasets = load_data(dataset)
    train_set_x, train_set_y = datasets[0]

    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    index = T.lscalar()
    x = T.matrix('x')

    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)
    os.chdir(output_folder)

    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2**30))

    da = dA(numpy_rng=rng,
            theano_rng=theano_rng,
            input=x,
            n_visible=28 * 28,
            n_hidden=500)

    cost, updates = da.get_cost_updates(corruption_level=0.,
                                        learning_rate=learning_rate)

    train_da = theano.function(
        [index],
        cost,
        updates=updates,
        givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]})

    start_time = timeit.default_timer()

    for epoch in range(training_epochs):
        c = []
        for batch_index in range(int(n_train_batches)):
            c.append(train_da(batch_index))

        print('Training epoch %d, cost ' % epoch, numpy.mean(c))

    end_time = timeit.default_timer()

    training_time = (end_time - start_time)

    print(('The no corruption code for file ' + os.path.split(__file__)[1] +
           ' ran for %.2fm' % ((training_time) / 60.)),
          file=sys.stderr)
    image = Image.fromarray(
        tile_raster_images(X=da.W.get_value(borrow=True).T,
                           img_shape=(28, 28),
                           tile_shape=(10, 10),
                           tile_spacing=(1, 1)))
    image.save('filters_corruption_0.png')

    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2**30))

    da = dA(numpy_rng=rng,
            theano_rng=theano_rng,
            input=x,
            n_visible=28 * 28,
            n_hidden=500)

    cost, updates = da.get_cost_updates(corruption_level=0.3,
                                        learning_rate=learning_rate)

    train_da = theano.function(
        [index],
        cost,
        updates=updates,
        givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]})

    start_time = timeit.default_timer()

    for epoch in range(training_epochs):
        c = []
        for batch_index in range(int(n_train_batches)):
            c.append(train_da(batch_index))

        print('Training epoch %d, cost ' % epoch, numpy.mean(c))

    end_time = timeit.default_timer()

    training_time = (end_time - start_time)

    print(('The 30% corruption code for file ' + os.path.split(__file__)[1] +
           ' ran for %.2fm' % (training_time / 60.)),
          file=sys.stderr)
    image = Image.fromarray(
        tile_raster_images(X=da.W.get_value(borrow=True).T,
                           img_shape=(28, 28),
                           tile_shape=(10, 10),
                           tile_spacing=(1, 1)))
    image.save('filters_corruption_30.png')
    # end-snippet-4

    os.chdir('../')
Exemplo n.º 40
0
def test_dA(learning_rate=0.1,
            training_epochs=15,
            dataset='mnist.pkl.gz',
            batch_size=20,
            output_folder='dA_plots'):
    """
    This demo is tested on MNIST

    :type learning_rate: float
    :param learning_rate: learning rate used for training the DeNosing
                          AutoEncoder

    :type training_epochs: int
    :param training_epochs: number of epochs used for training

    :type dataset: string
    :param dataset: path to the picked dataset

    """
    datasets = load_data(dataset)
    train_set_x, train_set_y = datasets[0]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size

    # start-snippet-2
    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    # end-snippet-2

    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)
    os.chdir(output_folder)

    ####################################
    # BUILDING THE MODEL NO CORRUPTION #
    ####################################

    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2**30))

    da = dA(numpy_rng=rng,
            theano_rng=theano_rng,
            input=x,
            n_visible=28 * 28,
            n_hidden=500)

    cost, updates = da.get_cost_updates(corruption_level=0.,
                                        learning_rate=learning_rate)
    """The 'givens' below allows us to pick out one slice of the input matrix, and set the value of
    x in the graph. This means that the graph will only have 'index' as an input.
    This function uses the usual mechanims of mapping from inputs to costs, so the graph forces calculations of
    all the intermediate symbolic variables"""

    train_da = theano.function(
        [index],
        cost,
        updates=updates,
        givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]})

    start_time = timeit.default_timer()

    ############
    # TRAINING #
    ############

    # go through training epochs
    for epoch in range(training_epochs):
        # go through trainng set
        c = []
        for batch_index in range(n_train_batches):
            c.append(train_da(batch_index))

        print('Training epoch %d, cost ' % epoch, numpy.mean(c))

    end_time = timeit.default_timer()

    training_time = (end_time - start_time)

    print(('The no corruption code for file ' + os.path.split(__file__)[1] +
           ' ran for %.2fm' % ((training_time) / 60.)),
          file=sys.stderr)
    image = Image.fromarray(
        tile_raster_images(X=da.W.get_value(borrow=True).T,
                           img_shape=(28, 28),
                           tile_shape=(10, 10),
                           tile_spacing=(1, 1)))
    image.save('filters_corruption_0.png')

    tempW = da.W.get_value()

    plt.figure(2)
    n, bins, patches = plt.hist(numpy.reshape(tempW, tempW.size),
                                100,
                                normed=1,
                                facecolor='green')

    # start-snippet-3
    #####################################
    # BUILDING THE MODEL CORRUPTION 30% #
    #####################################

    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2**30))

    da = dA(numpy_rng=rng,
            theano_rng=theano_rng,
            input=x,
            n_visible=28 * 28,
            n_hidden=500)

    cost, updates = da.get_cost_updates(corruption_level=0.3,
                                        learning_rate=learning_rate)

    train_da = theano.function(
        [index],
        cost,
        updates=updates,
        givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]})

    start_time = timeit.default_timer()

    ############
    # TRAINING #
    ############

    # go through training epochs
    for epoch in range(training_epochs):
        # go through trainng set
        c = []
        for batch_index in range(n_train_batches):
            c.append(train_da(batch_index))

        print('Training epoch %d, cost ' % epoch, numpy.mean(c))

    end_time = timeit.default_timer()

    training_time = (end_time - start_time)

    print(('The 30% corruption code for file ' + os.path.split(__file__)[1] +
           ' ran for %.2fm' % (training_time / 60.)),
          file=sys.stderr)
    # end-snippet-3

    # start-snippet-4
    image = Image.fromarray(
        tile_raster_images(X=da.W.get_value(borrow=True).T,
                           img_shape=(28, 28),
                           tile_shape=(10, 10),
                           tile_spacing=(1, 1)))
    image.save('filters_corruption_30.png')
    # end-snippet-4

    tempW = da.W.get_value()
    plt.figure(3)
    n, bins, patches = plt.hist(numpy.reshape(tempW, tempW.size),
                                100,
                                normed=1,
                                facecolor='green')
    plt.show()

    os.chdir('../')
def evaluate_lenet5(learning_rate=0.1,
                    n_epochs=200,
                    dataset=DataSet,
                    nkerns=[cls1, cls2],
                    batch_size=100):
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]
    print(type(train_set_x))

    #train_set_x.set_value(train_set_x.get_value(borrow=True)[:,:540])
    #valid_set_x.set_value(valid_set_x.get_value(borrow=True)[:,:540])
    #test_set_x.set_value(test_set_x.get_value(borrow=True)[:,:540])

    #train_set_x = train_set_x / 100
    #valid_set_x = valid_set_x / 100
    #test_set_x = test_set_x / 100

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size
    #n_test_batches = (n_test_batches/batch_size) + (n_test_batches % batch_size > 0)

    print(n_test_batches)
    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    Alr = T.scalar('Alr')
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    ishape = (nFB, nFs)  # this is the size of MNIST images

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # Reshape matrix of rasterized images of shape (batch_size,28*28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    dFeatureV = iFMs * nFB * nFs
    xinp = x[:, :dFeatureV]

    #    print (x.shahpe)

    layer0_input = xinp.reshape((batch_size, iFMs, nFB, nFs))
    layer1H_input = x[:, dFeatureV:]
    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1,28-5+1)=(24,24)
    # maxpooling reduces this further to (24/2,24/2) = (12,12)
    # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12)
    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=(batch_size, iFMs, nFB, nFs),
                                filter_shape=(nkerns[0], iFMs, fsx, fsy),
                                poolsize=(p, p))
    cl2x = (nFB - fsx + 1) / p
    cl2y = (nFs - fsy + 1) / p
    layer1H = HiddenLayer(rng,
                          input=layer1H_input,
                          n_in=14,
                          n_out=nhus,
                          activation=T.tanh)
    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1,12-5+1)=(8,8)
    # maxpooling reduces this further to (8/2,8/2) = (4,4)
    # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4)

    #layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
    #        image_shape=(batch_size, nkerns[0], cl2x, cl2y),
    #        filter_shape=(nkerns[1], nkerns[0], fsx, 1), poolsize=(p2, 1))
    #hl1 = (cl2x - fsx + 1)/p2
    hl1 = cl2x * cl2y
    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size,num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (20,32*4*4) = (20,512)
    layer2_input = layer0.output.flatten(2)
    #layer2_inputT = T.concatenate([layer2_input,x[:,dFeatureV:]],axis = 1)
    layer2_inputT = T.concatenate([layer2_input, layer1H.output], axis=1)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(rng,
                         input=layer2_inputT,
                         n_in=(nkerns[0] * hl1 * 1) + nhus,
                         n_out=nhu1,
                         activation=T.tanh)

    #layer22 = HiddenLayer(rng, input=layer2.output, n_in=nhu1,
    #                     n_out=nhu1, activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=nhu1, n_out=n_out)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)
    #yPred = layer3.ypred(layer2.output)
    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index], [layer3.errors(y), layer3.y_pred],
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # create a list of all model parameters to be fit by gradient descent
    #params = layer3.params + layer22.params + layer2.params + layer1.params + layer0.params
    params = layer3.params + layer2.params + layer1H.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i],grads[i]) pairs.
    updates = []
    for param_i, grad_i in zip(params, grads):
        #updates.append((param_i, param_i - learning_rate * grad_i))
        updates.append((param_i, param_i - Alr * grad_i))

    train_model = theano.function(
        [index, Alr],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size][:],
            y: train_set_y[index * batch_size:(index + 1) * batch_size][:]
        })

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    #best_params = None
    best_params = []
    best_validation_loss = numpy.inf
    prev_validation_loss = 200

    best_iter = 0
    test_score = 0.
    start_time = time.clock()
    Alrc = 0.2
    AlrE = 0.00001
    epochC = 0
    epoch = 0
    done_looping = False
    for param in params:
        best_params.append(param.get_value())
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        epochC = epochC + 1
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print('training @ iter = ', iter)
            cost_ij = train_model(minibatch_index, Alrc)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)
                lossratio = (this_validation_loss -
                             prev_validation_loss) / (prev_validation_loss + 1)
                print(lossratio)
                print('epoch %i, minibatch %i/%i, validation error %f, lr %f %%' % \
                      (epoch, minibatch_index + 1, n_train_batches, \
                       this_validation_loss * 100., Alrc))

                # if we got the best validation score until now
                #if this_validation_loss < best_validation_loss:
                if lossratio <= 0.0:
                    for i in range(len(params)):
                        best_params[i] = params[i].get_value()
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    prev_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    #tm =  test_model(0)

                    yP = numpy.asarray([])
                    test_losses = [
                        test_model(i)[0] for i in xrange(n_test_batches)
                    ]
                    for i in xrange(n_test_batches):
                        yP = numpy.concatenate((yP, test_model(i)[1]))
                    print(yP.shape)
                    test_score = numpy.mean(test_losses)

                    #yP = yPred#yPred(layer2.output.owner.inputs[0].get_value())
                    y = test_set_y.owner.inputs[0].get_value()[:3000]

                    print(yP.shape)
                    print(y.shape)
                    I1 = numpy.nonzero(y == 0.0)
                    I2 = numpy.nonzero(y == 1.0)
                    I3 = numpy.nonzero(y == 2.0)
                    I4 = numpy.nonzero(y == 3.0)
                    print(I1[0].shape)
                    print(I2[0].shape)
                    print(I3[0].shape)
                    print(I4[0].shape)
                    I11 = numpy.nonzero(yP[I1[0]] == 0)
                    I12 = numpy.nonzero(yP[I1[0]] == 1)
                    I13 = numpy.nonzero(yP[I1[0]] == 2)
                    I14 = numpy.nonzero(yP[I1[0]] == 3)
                    I21 = numpy.nonzero(yP[I2[0]] == 0)
                    I22 = numpy.nonzero(yP[I2[0]] == 1)
                    I23 = numpy.nonzero(yP[I2[0]] == 2)
                    I24 = numpy.nonzero(yP[I2[0]] == 3)
                    I31 = numpy.nonzero(yP[I3[0]] == 0)
                    I32 = numpy.nonzero(yP[I3[0]] == 1)
                    I33 = numpy.nonzero(yP[I3[0]] == 2)
                    I34 = numpy.nonzero(yP[I3[0]] == 3)
                    I41 = numpy.nonzero(yP[I4[0]] == 0)
                    I42 = numpy.nonzero(yP[I4[0]] == 1)
                    I43 = numpy.nonzero(yP[I4[0]] == 2)
                    I44 = numpy.nonzero(yP[I4[0]] == 3)

                    acc1 = float(float(I11[0].size) / float(I1[0].size))
                    acc2 = float(float(I22[0].size) / float(I2[0].size))
                    if n_out == 3:
                        acc3 = float(float(I33[0].size) / float(I3[0].size))
                    elif n_out == 4:
                        acc3 = float(float(I33[0].size) / float(I3[0].size))
                        acc4 = float(float(I44[0].size) / float(I4[0].size))
                    else:
                        acc3 = 0
                        acc4 = 0
                    print((
                        '     epoch %i, minibatch %i/%i, test error of '
                        'best model %f, acc1 = %f, acc2 = %f, acc3 = %f, acc4 = %f, I11 = %i, I12 = %i, I13 = %i, I14 = %i, I21 = %i, I22 = %i, I23 = %i, I24 = %i, I31 = %i, I32 = %i, I33 = %i, I34 = %i, I41 = %i, I42 = %i, I43 = %i, I44 = %i %%'
                    ) % (epoch, minibatch_index + 1, n_train_batches,
                         test_score * 100., acc1 * 100., acc2 * 100.,
                         acc3 * 100, acc4 * 100, I11[0].size, I12[0].size,
                         I13[0].size, I14[0].size, I21[0].size, I22[0].size,
                         I23[0].size, I24[0].size, I31[0].size, I32[0].size,
                         I33[0].size, I34[0].size, I41[0].size, I42[0].size,
                         I43[0].size, I44[0].size))

                    #print(('     epoch %i, minibatch %i/%i, test error of best '
                    #       'model %f %%') %
                    #      (epoch, minibatch_index + 1, n_train_batches,
                    #       test_score * 100.))
                else:
                    if Alrc <= AlrE:
                        done_looping = True
                        break
                    elif epochC > 40:
                        Alrc = Alrc / 2
                        for param, best_param in zip(params, best_params):
                            param.set_value(best_param)
                        epochC = 0
            #if patience <= iter:
            #    done_looping = True
            #    break

    end_time = time.clock()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i,'\
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    #print >> sys.stderr, ('The code for file ' +
    #                      os.path.split(__file__)[1] +
    #                      ' ran for %.2fm' % ((end_time - start_time) / 60.))
    OF = open(outFile, 'a')
    print(DataSet,
          n_out,
          fsx,
          fsy,
          p,
          cls1,
          cls2,
          nhu1,
          nFB,
          nFs,
          iFMs,
          nhus,
          batch_size,
          test_score * 100.,
          acc1 * 100.,
          acc2 * 100.,
          acc3 * 100,
          acc4 * 100,
          I11[0].size,
          I12[0].size,
          I13[0].size,
          I14[0].size,
          I21[0].size,
          I22[0].size,
          I23[0].size,
          I24[0].size,
          I31[0].size,
          I32[0].size,
          I33[0].size,
          I34[0].size,
          I41[0].size,
          I42[0].size,
          I43[0].size,
          I44[0].size,
          file=OF)

    OF.close()
Exemplo n.º 42
0
def mlp_run(train_set,
            valid_set,
            test_set,
            learning_rate=0.01,
            L1_reg=0.00,
            L2_reg=0.0001,
            n_epochs=1000,
            batch_size=20,
            n_hidden=500):
    """
    Demonstrate stochastic gradient descent optimization of a log-linear
    model
    """
    print 'loading ', train_set, ' for train'
    train_set_x, train_set_y = load_data(train_set)
    print 'loading ', valid_set, ' for valid'
    valid_set_x, valid_set_y = load_data(valid_set)
    print 'loading ', test_set, ' for test'
    if test_set != valid_set:
        test_set_x, test_set_y = load_data(test_set)
    else:
        test_set_x, test_set_y = valid_set_x, valid_set_y

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    print "train_set_x size:", train_set_x.get_value(borrow=True).shape[0]
    print "batch_size:", batch_size
    print "n_train_batches:", n_train_batches
    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # generate symbolic variables for input (x and y represent a
    # minibatch)
    x = T.matrix('x')  # data, presented as rasterized images
    y = T.ivector('y')  # labels, presented as 1D vector of [int] labels

    # construct the logistic regression class
    # Each MNIST image has size 28*28
    total_dim = train_set_x.get_value(borrow=True).shape[1]

    rng = numpy.random.RandomState(1234)

    # construct the MLP class
    classifier = MLP(rng=rng,
                     input=x,
                     n_in=total_dim,
                     n_hidden=n_hidden,
                     n_out=2)

    # start-snippet-4
    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 +
            L2_reg * classifier.L2_sqr)
    # end-snippet-4

    # start-snippet-5
    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two lists of the same length, A = [a1, a2, a3, a4] and
    # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
    # element is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    updates = [(param, param - learning_rate * gparam)
               for param, gparam in zip(classifier.params, gparams)]

    # compiling a Theano function that computes the mistakes that are made by
    # the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_auc = theano.function(inputs=[],
                                   outputs=classifier.auc(y),
                                   givens={
                                       x: valid_set_x,
                                       y: valid_set_y
                                   })

    test_auc = theano.function(inputs=[],
                               outputs=classifier.auc(y),
                               givens={
                                   x: test_set_x,
                                   y: test_set_y
                               })

    # compiling a Theano function `train_model` that returns the cost, but in
    # the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })
    # end-snippet-3

    ###############
    # TRAIN MODEL #
    ###############
    print '... training the model'
    # early-stopping parameters
    patience = FLAGS.iter  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    print "n_train_batches:", n_train_batches
    print "validation_frequency:", validation_frequency

    best_validation_loss = numpy.inf
    best_auc = 0
    test_score = 0.
    start_time = timeit.default_timer()

    done_looping = False
    epoch = 0
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        if epoch == 10:
            learning_rate *= 0.8
        if epoch == 20:
            learning_rate *= 0.5
        if epoch == 30:
            learning_rate = 0.01

        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)

                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))
                auc_values = [validate_auc()]
                auc = numpy.mean(auc_values)
                print "current valid auc: ", auc, " best auc: ", best_auc, " imporve: ", auc - best_auc, " significant?: ", auc - best_auc > FLAGS.min_improvement
                #print validate_auc(0)

                if auc > best_auc:
                    if auc - best_auc > FLAGS.min_improvement:
                        print 'before patience:', patience, ' iter:', iter
                        patience = max(patience, iter * patience_increase)
                        print 'after patience:', patience
                    best_auc = auc
                    auc_values = [test_auc()]
                    testauc = numpy.mean(auc_values)
                    print "test auc: ", testauc
                    #cPickle.dump(classifier, open('best_model.pkl', 'wb'))

            if patience <= iter:
                done_looping = True
                print "patience:", patience, "iter:", iter, "done_looping:", done_looping
                break

    end_time = timeit.default_timer()
    print(('Optimization complete with best validation score of %f %%,'
           'with test performance %f %%') %
          (best_validation_loss * 100., test_score * 100.))
    print 'The code run for %d epochs, with %f epochs/sec' % (
        epoch, 1. * epoch / (end_time - start_time))
    print 'best valid auc is ', best_auc
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.1fs' % ((end_time - start_time)))
Exemplo n.º 43
0
def test_SdA(finetune_lr=0.1,
             pretraining_epochs=15,
             pretrain_lr=0.001,
             training_epochs=1000,
             dataset='mnist.pkl.gz',
             batch_size=1):
    """
    Demonstrates how to train and test a stochastic denoising autoencoder.

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used in the finetune stage
    (factor for the stochastic gradient)

    :type pretraining_epochs: int
    :param pretraining_epochs: number of epoch to do pretraining

    :type pretrain_lr: float
    :param pretrain_lr: learning rate to be used during pre-training

    :type n_iter: int
    :param n_iter: maximal number of iterations ot run the optimizer

    :type dataset: string
    :param dataset: path the the pickled dataset

    """

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size

    # numpy random generator
    numpy_rng = numpy.random.RandomState(89677)
    print '... building the model'
    # construct the stacked denoising autoencoder class
    sda = SdA(numpy_rng=numpy_rng,
              n_ins=28 * 28,
              hidden_layers_sizes=[1000, 1000, 1000],
              n_outs=10)

    #########################
    # PRETRAINING THE MODEL #
    #########################
    print '... getting the pretraining functions'
    pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x,
                                                batch_size=batch_size)

    print '... pre-training the model'
    start_time = time.clock()
    ## Pre-train layer-wise
    corruption_levels = [.1, .2, .3]
    for i in xrange(sda.n_layers):
        # go through pretraining epochs
        for epoch in xrange(pretraining_epochs):
            # go through the training set
            c = []
            for batch_index in xrange(n_train_batches):
                c.append(pretraining_fns[i](index=batch_index,
                                            corruption=corruption_levels[i],
                                            lr=pretrain_lr))
            print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
            print numpy.mean(c)

    end_time = time.clock()

    print >> sys.stderr, ('The pretraining code for file ' +
                          os.path.split(__file__)[1] + ' ran for %.2fm' %
                          ((end_time - start_time) / 60.))

    ########################
    # FINETUNING THE MODEL #
    ########################

    # get the training, validation and testing function for the model
    print '... getting the finetuning functions'
    train_fn, validate_model, test_model = sda.build_finetune_functions(
        datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr)

    print '... finetunning the model'
    # early-stopping parameters
    patience = 10 * n_train_batches  # look as this many examples regardless
    patience_increase = 2.  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = time.clock()

    done_looping = False
    epoch = 0

    while (epoch < training_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_cost = train_fn(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                validation_losses = validate_model()
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if (this_validation_loss <
                            best_validation_loss * improvement_threshold):
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = test_model()
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print(('Optimization complete with best validation score of %f %%,'
           'with test performance %f %%') %
          (best_validation_loss * 100., test_score * 100.))
    print >> sys.stderr, ('The training code for file ' +
                          os.path.split(__file__)[1] + ' ran for %.2fm' %
                          ((end_time - start_time) / 60.))
Exemplo n.º 44
0
def test_rbm(learning_rate=0.1,
             training_epochs=5,
             dataset='mnist.pkl.gz',
             batch_size=20,
             n_chains=20,
             n_samples=10,
             output_folder='newRBM_plots',
             n_hidden=625,
             n_output=10):
    """
    Demonstrate how to train and afterwards sample from it using Theano.

    This is demonstrated on MNIST.

    :param learning_rate: learning rate used for training the RBM

    :param training_epochs: number of epochs used for training

    :param dataset: path the the pickled dataset

    :param batch_size: size of a batch used to train the RBM

    :param n_chains: number of parallel Gibbs chains to be used for sampling

    :param n_samples: number of samples to plot for each chain

    """
    datasets = load_data(dataset, 1)

    train_set_x, train_set_y = datasets[0]
    test_set_x, test_set_y = datasets[2]

    # train_set_y = convert_yval(train_set_y)
    # test_set_y = convert_yval(train_set_y)

    # print type(train_set_x)

    # print 'sadfsdf'
    # print train_set_x[0,].eval()
    # print test_set_y[0:10].eval()

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    number_of_test_samples = test_set_x.get_value(borrow=True).shape[0]
    print number_of_test_samples

    # print n_train_batches

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.matrix('y', dtype='int32')
    test_x = T.matrix('test_x')
    test_y = T.matrix('test_y', dtype='int32')

    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2**30))

    # initialize storage for the persistent chain (state = hidden
    # layer of chain)
    persistent_chain = theano.shared(numpy.zeros((batch_size, n_hidden),
                                                 dtype=theano.config.floatX),
                                     borrow=True)

    # construct the RBM class
    rbm = classRBM(inputx=x,
                   inputy=y,
                   testx=test_x,
                   testy=test_y,
                   n_visible=28 * 28,
                   n_hidden=n_hidden,
                   n_output=n_output,
                   numpy_rng=rng,
                   theano_rng=theano_rng,
                   batch_size=batch_size,
                   n_samples=number_of_test_samples)

    # get the cost and the gradient corresponding to one step of CD-15
    # cost, updates = rbm.get_cost_updates(lr=learning_rate, k=15)
    cost, updates = rbm.get_cost_updates(lr=learning_rate, k=1)

    # energy = theano.function([],rbm.free_energy1())
    # print 'Energy = ',energy

    #################################
    #     Training the RBM          #
    #################################
    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)
    os.chdir(output_folder)

    # start-snippet-5
    # it is ok for a theano function to have no output
    # the purpose of train_rbm is solely to update the RBM parameters
    train_rbm = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        },
        name='train_rbm')

    plotting_time = 0.
    start_time = time.clock()

    # go through training epochs
    for epoch in xrange(training_epochs):

        # go through the training set
        mean_cost = []
        print 'number of batches = ', n_train_batches
        print 'Epoch = ', epoch
        for batch_index in xrange(n_train_batches):
            if (batch_index % 500 == 0):
                print batch_index
            mean_cost += [train_rbm(batch_index)]
            # train_rbm(batch_index)
            # print 'epoch 1######'

        print 'Training epoch %d, cost is ' % epoch, numpy.mean(mean_cost)
        # print 'Training epoch %d, cost is ' % epoch

        # num_correct = get_num_correct(rbm.W.get_value(borrow=True),
        # rbm.U.get_value(borrow=True),rbm.hbias.get_value(borrow=True),rbm.outbias.get_value(borrow=True),
        # test_set_x.eval(),test_set_y.eval(),number_of_test_samples,n_output)
        # print 'Accuracy = ', float(num_correct)/number_of_test_samples

        # Plot filters after each training epoch
        plotting_start = time.clock()
        # Construct image from the weight matrix
        image = Image.fromarray(
            tile_raster_images(X=rbm.W.get_value(borrow=True).T,
                               img_shape=(28, 28),
                               tile_shape=(10, 10),
                               tile_spacing=(1, 1)))
        image.save('filters_at_epoch_%i.png' % epoch)

        image = Image.fromarray(
            tile_raster_images(X=rbm.U.get_value(borrow=True).T,
                               img_shape=(25, 25),
                               tile_shape=(1, 10),
                               tile_spacing=(1, 1)))
        image.save('U_filters_at_epoch_%i.png' % epoch)
        plotting_stop = time.clock()
        plotting_time += (plotting_stop - plotting_start)

    end_time = time.clock()

    pretraining_time = (end_time - start_time) - plotting_time

    print('Training took %f minutes' % (pretraining_time / 60.))
Exemplo n.º 45
0
def test_static_activations(rng,
                            batch_size=1,
                            learning_rate=0.01,
                            n_epochs=1000,
                            L1_reg=0.0,
                            L2_reg=0.0001):
    print "Loading data"
    print "... MNIST"
    dataset = 'mnist.pkl.gz'
    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]

    x_size = train_set_x.shape[1].eval()

    temp = train_set_x.get_value(borrow=True, return_internal_type=True)
    train_set_x = shared(temp.reshape((temp.shape[0], 1, temp.shape[1])),
                         borrow=True,
                         name='train_set_x')

    valid_set_x, valid_set_y = datasets[1]

    temp = valid_set_x.get_value(borrow=True, return_internal_type=True)
    valid_set_x = shared(temp.reshape((temp.shape[0], 1, temp.shape[1])),
                         borrow=True,
                         name='valid_set_x')

    test_set_x, test_set_y = datasets[2]

    temp = test_set_x.get_value(borrow=True, return_internal_type=True)
    test_set_x = shared(temp.reshape((temp.shape[0], 1, temp.shape[1])),
                        borrow=True,
                        name='valid_set_x')

    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    n_in = x_size
    n_units_per = 32
    n_out = 500

    dims = []

    print "... Activation patterns"
    ins, outs = cnx.load('ins_and_outs.pkl')

    in_idxs = []
    out_idxs = []
    for i in range(len(ins)):
        dims.append((ins[i].shape[0], outs[i].shape[0]))
        in_idxs.append(
            shared(cnx.repeat(ins[i], batch_size), name='in_idxs_%i' % i))
        out_idxs.append(
            shared(cnx.repeat(outs[i], batch_size), name='out_idxs_%i' % i))

    print "Building model"
    index = T.lscalar('index')
    x = T.tensor3('x', dtype=config.floatX)
    y = T.ivector('y')

    layers = []

    n_in = 1
    n_out = 500
    layers.append(
        HiddenBlockLayer((n_in, x_size), (n_out, n_units_per),
                         in_idxs[0],
                         out_idxs[0],
                         batch_size,
                         activation=T.tanh,
                         name='layer_' + str(len(layers))))

    n_in = n_out
    n_out = 10
    layers.append(
        HiddenBlockLayer((n_in, n_units_per), (n_out, n_units_per),
                         in_idxs[1],
                         out_idxs[1],
                         batch_size,
                         None,
                         name='layer_' + str(len(layers))))
    layers[-1].W.set_value(0 * layers[-1].W.get_value())

    print "... Building cost and error equations"
    activation = x
    for layer in layers:
        activation = layer.output(activation)
    activation = T.nnet.softmax(T.mean(activation, axis=2))
    cost = add_regularization(layers, layers[-1].cost(activation, y), L1_reg,
                              L2_reg)
    error = layers[-1].error(activation, y)

    print "... Building parameter updates"
    param_updates = []
    for layer in layers:
        for param in layer.params:
            gparam = T.grad(cost, param)
            param_updates.append((param, param - learning_rate * gparam))

    print "... Compiling train function"
    train_model = function(
        [index],
        cost,
        updates=param_updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    print "... Compiling test function"
    test_model = function(
        [index],
        error,
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    print "... Compiling validate function"
    validate_model = function(
        [index],
        error,
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'

    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 100  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_params = None
    best_validation_loss = np.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False
    accum = 0
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            accum = accum + minibatch_avg_cost
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                accum = accum / validation_frequency
                print "minibatch_avg_cost: ", accum

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = np.mean(validation_losses)

                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i) for i in xrange(n_test_batches)
                    ]
                    test_score = np.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print(('Optimization complete. Best validation score of %f %% '
           'obtained at iteration %i, with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
Exemplo n.º 46
0
    end_time = timeit.default_timer()
    print(('Optimization complete with best validation score of %f %%, '
           'obtained at iteration %i, '
           'with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print('The fine tuning code for file ' + os.path.split(__file__)[1] +
          ' ran for %.2fm' % ((end_time - start_time) / 60.),
          file=sys.stderr)
    return train_prob, test_prob, valid_prob, dbn.rbm_layers


if __name__ == '__main__':
    dataset = 'mnist.pkl.gz'

    datasets = load_data(dataset)
    # train_set_x  shape is 500000 *784
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]
    numpy_rng = numpy.random.RandomState(123)
    train_prob, test_prob, valid_prob, rbm_layers = test_DBN(
        finetune_lr=0.1,
        pretraining_epochs=10,
        pretrain_lr=0.01,
        k=1,
        training_epochs=10,
        train_set_x=train_set_x,
        train_set_y=train_set_y,
        valid_set_x=valid_set_x,
        valid_set_y=valid_set_y,
Exemplo n.º 47
0
def test_dA_sanity(learning_rate=0.001,
                   training_epochs=500,
                   dataset='../datasets/da_sanity.pkl.gz',
                   batch_size=15):
    """
    This demo is tested on da_sanity

    :type learning_rate: float
    :param learning_rate: learning rate used for training the DeNosing
                          AutoEncoder

    :type training_epochs: int
    :param training_epochs: number of epochs used for training

    :type dataset: string
    :param dataset: path to the picked dataset

    """
    datasets = load_data(dataset)
    train_set_x, train_set_y = datasets[0]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    # start-snippet-2
    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    # end-snippet-2

    #####################################
    # BUILDING THE MODEL 30% CORRUPTION #
    #####################################

    rng = numpy.random.RandomState(42)
    theano_rng = RandomStreams(rng.randint(2**30))

    da = dA(numpy_rng=rng,
            theano_rng=theano_rng,
            input=x,
            n_visible=100,
            n_hidden=80)

    cost, updates = da.get_cost_updates(corruption_level=0.3,
                                        learning_rate=learning_rate)

    train_da = theano.function(
        [index],
        cost,
        updates=updates,
        givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]})

    start_time = time.clock()

    ############
    # TRAINING #
    ############

    # go through training epochs
    for epoch in xrange(training_epochs):
        # go through training set
        c = []
        for batch_index in xrange(n_train_batches):
            c.append(train_da(batch_index))

        print 'Training epoch %d, cost ' % epoch, numpy.mean(c)

    end_time = time.clock()

    training_time = (end_time - start_time)

    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((training_time) / 60.))
Exemplo n.º 48
0
def evaluate_lenet5(
    learning_rate=0.1,
    n_epochs=200,
    dataset="mnist.pkl.gz",
    nkerns=[20, 50],
    batch_size=500,
):
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # display some chars:
    display_some(train_set_x, train_set_y.eval(), n=5, title="label=")

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    x = T.matrix("x")  # the data is presented as rasterized images
    y = T.ivector("y")  # the labels are presented as 1D vector of [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print("... building the model")

    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.
    layer0_input = x.reshape((batch_size, 1, 28, 28))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=(batch_size, 1, 28, 28),
        filter_shape=(nkerns[0], 1, 5, 5),
        poolsize=(2, 2),
    )

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, nkerns[0], 12, 12),
        filter_shape=(nkerns[1], nkerns[0], 5, 5),
        poolsize=(2, 2),
    )

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(rng,
                         input=layer2_input,
                         n_in=nkerns[1] * 4 * 4,
                         n_out=500,
                         activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        [layer3.errors(y), layer3.y_pred],
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size],
        },
    )

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size],
        },
    )

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [(param_i, param_i - learning_rate * grad_i)
               for param_i, grad_i in zip(params, grads)]

    train_model = theano.function(
        inputs=[index],
        outputs=[cost, layer3.errors(y)],
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size],
        },
    )

    ###############
    # TRAIN MODEL #
    ###############
    print("... training")
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.0
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    # for error_curve plot
    cost_train = []  # observe likelihood cost while training
    err_train = []  # observe train err while training
    err_valid = []  # observe valid err while training
    err_test = []  # observe test  err while training

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print("training @ iter = ", iter)
            train_outputs = train_model(minibatch_index)
            cost_ij = train_outputs[0]
            err_train.append(train_outputs[1])  # add error_train

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in range(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)

                err_valid.append(this_validation_loss)

                print("epoch %i, minibatch %i/%i, validation error %f %%" % (
                    epoch,
                    minibatch_index + 1,
                    n_train_batches,
                    this_validation_loss * 100.0,
                ))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    # improve patience if loss improvement is good enough
                    if (this_validation_loss <
                            best_validation_loss * improvement_threshold):
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i)[0] for i in range(n_test_batches)
                    ]

                    test_score = numpy.mean(test_losses)

                    err_test.append(test_score)

                    print(("     epoch %i, minibatch %i/%i, test error of "
                           "best model %f %%") % (
                               epoch,
                               minibatch_index + 1,
                               n_train_batches,
                               test_score * 100.0,
                           ))
                    """
                    # save the best model
                    with open('../doc/data/best_model.pkl', 'wb') as f:
                        pickle.dump(layer0, layer1, layer2, layer3, f)
                    """

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print("Optimization complete.")
    print("Best validation score of %f %% obtained at iteration %i, "
          "with test performance %f %%" %
          (best_validation_loss * 100.0, best_iter + 1, test_score * 100.0))
    print(
        ("The code for file " + os.path.split(__file__)[1] + " ran for %.2fm" %
         ((end_time - start_time) / 60.0)),
        file=sys.stderr,
    )

    model = [layer0, layer1, layer2, layer3]
    # save the best model
    with open("../doc/data/best_model.pkl", "wb") as f:
        pickle.dump(model, f)

    test_pred_y = test_model(0)[1]  # predict on first batch_size sampless

    # display some chars using predict
    display_some(test_set_x, test_pred_y, n=5, title="pred=")  # n < batch_size
    return err_train, err_valid, err_test
Exemplo n.º 49
0
def test_DBN(finetune_lr=lr,
             pretraining_epochs=100,
             pretrain_lr=0.0025,
             k=2,
             training_epochs=NOfepoch,
             dataset=DataSet,
             batch_size=batchSize):
    """
    Demonstrates how to train and test a Deep Belief Network.

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used in the finetune stage
    :type pretraining_epochs: int
    :param pretraining_epochs: number of epoch to do pretraining
    :type pretrain_lr: float
    :param pretrain_lr: learning rate to be used during pre-training
    :type k: int
    :param k: number of Gibbs steps in CD/PCD
    :type training_epochs: int
    :param training_epochs: maximal number of iterations ot run the optimizer
    :type dataset: string
    :param dataset: path the the pickled dataset
    :type batch_size: int
    :param batch_size: the size of a minibatch
    """

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    # numpy random generator
    numpy_rng = numpy.random.RandomState(123)
    print('... building the model')
    # construct the Deep Belief Network
    dbn = DBN(numpy_rng=numpy_rng,
              n_ins=552,
              hidden_layers_sizes=[n_hus for i in range(n_hls)],
              n_outs=n_out)

    #########################
    # PRETRAINING THE MODEL #
    #########################
    print('... getting the pretraining functions')
    pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x,
                                                batch_size=batch_size,
                                                k=k)

    print('... pre-training the model')
    start_time = time.clock()
    # Pre-train layer-wise
    for i in xrange(dbn.n_layers):
        # go through pretraining epochs
        for epoch in xrange(pretraining_epochs):
            # go through the training set
            c = []
            for batch_index in xrange(n_train_batches):
                c.append(pretraining_fns[i](index=batch_index, lr=pretrain_lr))
            print('Pre-training layer ', i, ' epoch ', epoch, 'cost ',
                  numpy.mean(c))


#            print numpy.mean(c)

    end_time = time.clock()
    # print >> sys.stderr, ('The pretraining code for file ' +
    #                      os.path.split(__file__)[1] +
    #                      ' ran for %.2fm' % ((end_time - start_time) / 60.))

    ########################
    # FINETUNING THE MODEL #
    ########################

    # get the training, validation and testing function for the model
    print('... getting the finetuning functions')
    train_fn, validate_model, test_model, gety_pred = dbn.build_finetune_functions(
        datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr)

    print('... finetunning the model')
    # early-stopping parameters
    patience = 4 * n_train_batches  # look as this many examples regardless
    patience_increase = 2.  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_params = []
    best_validation_loss = numpy.inf
    prev_validation_loss = 200
    test_score = 0.
    start_time = time.clock()
    Alrc = 0.1
    AlrE = 0.00001
    done_looping = False
    epoch = 0
    epochC = 0
    for param in dbn.params:
        best_params.append(param.get_value())
    while (epoch < training_epochs) and (not done_looping):
        epoch = epoch + 1
        epochC = epochC + 1
        for minibatch_index in xrange(n_train_batches):
            #print n_train_batches, epoch, minibatch_index
            minibatch_avg_cost = train_fn(minibatch_index, Alrc)
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                validation_losses = validate_model()
                this_validation_loss = numpy.mean(validation_losses)
                lossratio = (this_validation_loss -
                             prev_validation_loss) / (prev_validation_loss + 1)
                print(lossratio)
                print('epoch %i, minibatch %i/%i, validation error %f, lr %f %%' % \
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100., Alrc))

                # if we got the best validation score until now
                #if this_validation_loss < best_validation_loss:
                if lossratio <= 0.0:
                    #print '*******************1**************'
                    #print dbn.params[0].get_value()
                    for i in range(len(dbn.params)):
                        best_params[i] = dbn.params[i].get_value()
                    #print '*******************2**************'
                    #print best_params[0]
                    #print 'zzzzzzzzzzzzzzzzzzzzzzz'
                    #print best_params[-1]
                    #print best_params[0].get_value()
                    #dbn.params = best_params
                    #improve patience if loss improvement is good enough
                    if (this_validation_loss <
                            best_validation_loss * improvement_threshold):
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    prev_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = test_model()
                    test_score = numpy.mean(test_losses)
                    yP = gety_pred()
                    y = test_set_y.owner.inputs[0].get_value()
                    #print (type(yP))
                    #acc2 = T.mean(T.neq(yP,y))
                    #print type(y)
                    print(yP.shape)
                    print(y.shape)
                    I1 = numpy.nonzero(y == 0.0)
                    I2 = numpy.nonzero(y == 1.0)
                    I3 = numpy.nonzero(y == 2.0)
                    I4 = numpy.nonzero(y == 3.0)
                    print(I1[0].shape)
                    print(I2[0].shape)
                    print(I3[0].shape)
                    print(I4[0].shape)
                    I11 = numpy.nonzero(yP[I1[0]] == 0)
                    I12 = numpy.nonzero(yP[I1[0]] == 1)
                    I13 = numpy.nonzero(yP[I1[0]] == 2)
                    I14 = numpy.nonzero(yP[I1[0]] == 3)
                    I21 = numpy.nonzero(yP[I2[0]] == 0)
                    I22 = numpy.nonzero(yP[I2[0]] == 1)
                    I23 = numpy.nonzero(yP[I2[0]] == 2)
                    I24 = numpy.nonzero(yP[I2[0]] == 3)
                    I31 = numpy.nonzero(yP[I3[0]] == 0)
                    I32 = numpy.nonzero(yP[I3[0]] == 1)
                    I33 = numpy.nonzero(yP[I3[0]] == 2)
                    I34 = numpy.nonzero(yP[I3[0]] == 3)
                    I41 = numpy.nonzero(yP[I4[0]] == 0)
                    I42 = numpy.nonzero(yP[I4[0]] == 1)
                    I43 = numpy.nonzero(yP[I4[0]] == 2)
                    I44 = numpy.nonzero(yP[I4[0]] == 3)
                    #f = open('a.txt','w')
                    #numpy.savetxt('a.txt',y)
                    #print I3[0].shape
                    #print I1[0].size,I11[0].size
                    acc1 = float(float(I11[0].size) / float(I1[0].size))
                    acc2 = float(float(I22[0].size) / float(I2[0].size))
                    if n_out == 3:
                        acc3 = float(float(I33[0].size) / float(I3[0].size))
                    elif n_out == 4:
                        acc3 = float(float(I33[0].size) / float(I3[0].size))
                        acc4 = float(float(I44[0].size) / float(I4[0].size))
                    else:
                        acc3 = 0
                        acc4 = 0
                    #print y
                    #print yP
                    #print 'ACC Next'
                    #print acc1
                    #print acc2
                    #print 'ACC Prev'
                    print((
                        '     epoch %i, minibatch %i/%i, test error of '
                        'best model %f, acc1 = %f, acc2 = %f, acc3 = %f, acc4 = %f, I11 = %i, I12 = %i, I13 = %i, I14 = %i, I21 = %i, I22 = %i, I23 = %i, I24 = %i, I31 = %i, I32 = %i, I33 = %i, I34 = %i, I41 = %i, I42 = %i, I43 = %i, I44 = %i %%'
                    ) % (epoch, minibatch_index + 1, n_train_batches,
                         test_score * 100., acc1 * 100., acc2 * 100.,
                         acc3 * 100, acc4 * 100, I11[0].size, I12[0].size,
                         I13[0].size, I14[0].size, I21[0].size, I22[0].size,
                         I23[0].size, I24[0].size, I31[0].size, I32[0].size,
                         I33[0].size, I34[0].size, I41[0].size, I42[0].size,
                         I43[0].size, I44[0].size))
                else:
                    if Alrc <= AlrE:
                        done_looping = True
                        break
                    elif epochC > 40:
                        Alrc = Alrc / 2
                        #print '***************3****************'
                        #print dbn.params[0].get_value()
                        for param, best_param in zip(dbn.params, best_params):
                            param.set_value(best_param)
                        #print '***************4*****************'
                        #print best_params[0]
                        #print '***************5*****************'
                        #print dbn.params[0].get_value()
                        #print 'Epoch Rejected, ', Alrc
                        epochC = 0
                    #else:
                    #   print dbn.params[0].get_value()
                    #    for param, best_param in zip(dbn.params,best_params):
                    #       param.set_value(best_param)
                    #   print dbn.params[0].get_value()
            #if patience <= iter:
            #    done_looping = True
            #    break

    end_time = time.clock()
    print(('Optimization complete with best validation score of %f %%,'
           'with test performance %f %%') %
          (best_validation_loss * 100., test_score * 100.))
    #print >> sys.stderr, ('The fine tuning code for file ' +
    #                      os.path.split(__file__)[1] +
    #                      ' ran for %.2fm' % ((end_time - start_time)
    #                                          / 60.))
    OF = open(outFile, 'a')
    print(DataSet,
          lr,
          n_hls,
          n_hus,
          n_out,
          batchSize,
          NOfepoch,
          outFile,
          test_score * 100.,
          acc1 * 100.,
          acc2 * 100.,
          acc3 * 100,
          acc4 * 100,
          I11[0].size,
          I12[0].size,
          I13[0].size,
          I14[0].size,
          I21[0].size,
          I22[0].size,
          I23[0].size,
          I24[0].size,
          I31[0].size,
          I32[0].size,
          I33[0].size,
          I34[0].size,
          I41[0].size,
          I42[0].size,
          I43[0].size,
          I44[0].size,
          file=OF)

    OF.close()
Exemplo n.º 50
0
def test_mlp(learning_rate=0.05,
             L1_reg=0.00,
             L2_reg=0.0001,
             n_epochs=1000,
             split=0,
             batch_size=1,
             n_hidden=[100],
             rot=5,
             seuil=0.25):
    datasets = load_data(split)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(
        borrow=True).shape[0]  #/ batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]  #/ batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.matrix('y')  # the labels are presented as 1D vector of
    # [int] labels

    rng = numpy.random.RandomState(1234)
    shp = train_set_x.get_value().shape[1]
    # construct the MLP class
    classifier = MLP(rng=rng, input=x, n_in=shp, n_hidden=n_hidden, n_out=shp)

    # start-snippet-4
    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 +
            L2_reg * classifier.L2_sqr)
    # end-snippet-4

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    pred_test = theano.function(inputs=[index],
                                outputs=[classifier.y_pred, y],
                                givens={
                                    x: test_set_x[index:(index + 1)],
                                    y: test_set_y[index:(index + 1)]
                                })

    pred_train = theano.function(inputs=[index],
                                 outputs=[classifier.y_pred, y],
                                 givens={
                                     x: train_set_x[index:(index + 1)],
                                     y: train_set_y[index:(index + 1)]
                                 })

    pred_valid = theano.function(inputs=[index],
                                 outputs=[classifier.y_pred, y],
                                 givens={
                                     x: valid_set_x[index:(index + 1)],
                                     y: valid_set_y[index:(index + 1)]
                                 })

    def evaluation(fn, d, ens, epoch, seuil, plot):
        x = d.get_value()
        n_samples = x.shape[0]
        if plot: bigpic = []
        acc = []

        for i in xrange(n_samples):

            pred, true = fn(i)
            pred_mask = pred * (x[i] > 0)

            pred_out = (pred_mask >= seuil).astype(numpy.int)
            true_out = true.astype(numpy.int)
            acc += [jaccard(pred_out, true_out)]

            if plot:
                bigpic += [x[i], pred, pred_mask, pred_out, true_out]

        this_acc = numpy.mean(acc)
        std_acc = numpy.std(acc)

        print('epoch %i, %s error %f +- %f %%' %
              (epoch, ens, this_acc * 100., std_acc * 100.))

        if plot:
            bigpic = numpy.vstack(bigpic)
            tile = tile_raster_images(bigpic, (311, 457),
                                      (n_samples // 4, 5 * 4),
                                      output_pixel_vals=True)
            Im.fromarray(tile).convert("RGB").save("images/" + ens +
                                                   str(epoch) + ".png")

        return this_acc

    gparams = [T.grad(cost, param) for param in classifier.params]
    updates = [(param, param - learning_rate * gparam)
               for param, gparam in zip(classifier.params, gparams)]

    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    ###############
    # TRAIN MODEL #
    ###############
    n_training_samples = train_set_x.get_value().shape[0]
    print '... training over %i training samples' % n_training_samples

    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 1  # 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = -numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    evaluation(pred_train, train_set_x, "train", epoch, seuil, True)
    print "training started..."

    while (epoch < n_epochs) and (not done_looping):
        rotate_data((train_set_x, train_set_y), rot)
        epoch = epoch + 1
        minibatch_avg_cost = []
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost += [train_model(minibatch_index)]

            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                print "mean avg cost over training :: ", numpy.mean(
                    minibatch_avg_cost)

                evaluation(pred_train, train_set_x, "train", epoch, seuil,
                           True)
                val = evaluation(pred_valid, valid_set_x, "valid", epoch,
                                 seuil, True)

                # if we got the best validation score until now
                if val > best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if (val > best_validation_loss * improvement_threshold):
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = val
                    best_iter = iter
                    evaluation(pred_test, test_set_x, "test", epoch, seuil,
                               True)

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print(('Optimization complete. Best validation score of %f %% '
           'obtained at iteration %i, with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
def test_stack_machine():

    # Load learning:
    save_path = "Saving/Stack_AE_theano"
    save_dir = "10-06_3L1000"
    load_dir = os.path.join(save_path, save_dir)
    stack_AE = SAE.load(load_dir)

    # Load the dataset:
    print("Loading dataset...")
    datasets = load_data('mnist.pkl.gz')
    train_set_x, train_set_y = datasets[0]
    test_set_x, test_set_y = datasets[2]

    reconstructed_layer_value, error = stack_AE.reconstruct(test_set_x)

    print("The error of reconstruction is:  {0}".format(error.eval()), "%")

    # Classification:
    # Perform the feed-forward pass for train & testing sets:
    train_deconstructed_layer_value = stack_AE.forward_encoding(
        train_set_x, 0, stack_AE.architecture.shape[0])
    train_reconstructed_layer_value = stack_AE.forward_decoding(
        train_deconstructed_layer_value, 0, stack_AE.architecture.shape[0])

    test_deconstructed_layer_value = stack_AE.forward_encoding(
        test_set_x, 0, stack_AE.architecture.shape[0])
    test_reconstructed_layer_value = stack_AE.forward_decoding(
        test_deconstructed_layer_value, 0, stack_AE.architecture.shape[0])

    # Classifiers:
    classifier = 'AdaBoostClassifier'
    print("Classifier used: ", classifier)
    print("Learning the logistic regression without stack...")
    logReg_withoutStack = stack_AE.supervized_classification(
        train_set_x.eval(),
        train_set_y.eval(),
        classification_method=classifier)
    print("Learning the logistic regression with stack...")
    logReg_afterStack = stack_AE.supervized_classification(
        train_reconstructed_layer_value.eval(),
        train_set_y.eval(),
        classification_method=classifier)

    # Performances:
    print("Without Stack_AE:")
    print("Accuracy training set:",
          logReg_withoutStack.score(train_set_x.eval(), train_set_y.eval()))
    print("Accuracy test set:",
          logReg_withoutStack.score(test_set_x.eval(), test_set_y.eval()))

    print("With Stack_AE:")
    print(
        "Accuracy training set:",
        logReg_afterStack.score(train_reconstructed_layer_value.eval(),
                                train_set_y.eval()))

    print(
        "Accuracy test set:",
        logReg_afterStack.score(test_reconstructed_layer_value.eval(),
                                test_set_y.eval()))

    return stack_AE
Exemplo n.º 52
0
def test_mlp(learning_rate=0.01,
             L1_reg=0.00,
             L2_reg=0.0001,
             n_epochs=1000,
             dataset='mnist.pkl.gz',
             batch_size=20,
             n_hidden1=500,
             n_hidden2=100,
             n_hidden3=50):
    """
    Demonstrate stochastic gradient descent optimization for a multilayer
    perceptron

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient

    :type L1_reg: float
    :param L1_reg: L1-norm's weight when added to the cost (see
    regularization)

    :type L2_reg: float
    :param L2_reg: L2-norm's weight when added to the cost (see
    regularization)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: the path of the MNIST dataset file from
                 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz


   """
    datasets = load_data(dataset)
    ada_lr = theano.shared(np.float32(learning_rate), name="ada_lr")

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    rng = numpy.random.RandomState(1234)

    # construct the MLP class
    classifier = MLP(rng=rng,
                     input=x,
                     n_in=28 * 28,
                     n_hidden1=n_hidden1,
                     n_hidden2=n_hidden2,
                     n_hidden3=n_hidden3,
                     n_out=10)

    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = classifier.negative_log_likelihood(y) \
         + L1_reg * classifier.L1 \
         + L2_reg * classifier.L2_sqr

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = []
    for param in classifier.params:
        gparam = T.grad(cost, param)
        gparams.append(gparam)

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs
    updates = []
    # given two list the zip A = [a1, a2, a3, a4] and B = [b1, b2, b3, b4] of
    # same length, zip generates a list C of same size, where each element
    # is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    for param, gparam in zip(classifier.params, gparams):
        updates.append((param, param - ada_lr.get_value() * gparam))
    #updates.append((ada_lr, ada_lr - ada_lr**2.25+ 0.000002))

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'

    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 25  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)

                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                           improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i) for i in xrange(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print(('Optimization complete. Best validation score of %f %% '
           'obtained at iteration %i, with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print('The code for file ' + os.path.split(__file__)[1] +
          ' ran for %.2fm' % ((end_time - start_time) / 60.))
def evaluate_lenet5(sigma=0.01,
                    learning_rate=0.1,
                    n_epochs=200,
                    dataset='mnist.pkl.gz',
                    nkerns=[20, 50],
                    batch_size=500):
    """ Demonstrates lenet on MNIST dataset

    :type sigma: float
    :param sigma: standard deviation in normal distribution

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(930508)

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.

    # Parameterizing
    n_feature = train_set_x.get_value().shape[1]
    matrix_dim = numpy.sqrt(n_feature)
    matrix_dim = matrix_dim.astype('int8')

    layer0_input = x.reshape((batch_size, 1, matrix_dim, matrix_dim))
    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=(batch_size, 1, matrix_dim,
                                             matrix_dim),
                                filter_shape=(nkerns[0], 1, 5, 5),
                                poolsize=(2, 2))

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (nkerns[0], nkerns[1], 4, 4)
    temp1 = (matrix_dim - 5 + 1) / 2
    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batch_size, nkerns[0], temp1,
                                             temp1),
                                filter_shape=(nkerns[1], nkerns[0], 5, 5),
                                poolsize=(2, 2))

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    layer2_input = layer1.output.flatten(2)

    temp2 = (temp1 - 5 + 1) / 2
    ### This is a good place to add noise ###
    srng = RandomStreams(seed=508)
    variation = srng.normal((temp2 * temp2 * 50, ), 0, sigma)
    layer2_input += variation
    ### end ###

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(rng,
                         input=layer2_input,
                         n_in=nkerns[1] * temp2 * temp2,
                         n_out=500,
                         activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    n_out = max(train_set_y.eval()) - min(train_set_y.eval()) + 1
    # print n_out
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=n_out)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [(param_i, param_i - learning_rate * grad_i)
               for param_i, grad_i in zip(params, grads)]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            # if iter % 10 == 0:
            # print 'training @ iter = ', iter
            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)
                '''print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))'''

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i) for i in xrange(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    '''print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))'''
                    print test_score * 100.

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    '''print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score *
          100.))a'''
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
def train_DBN(
        finetune_lr=0.1,
        pretraining_epochs=100,
        pretrain_lr=0.03,
        k=1,
        training_epochs=600,
        dataset='/Users/apple/Desktop/pattern_recognition_pr/data/mnist.pkl.gz',
        batch_size=10):
    """
    Demonstrates how to train and test a Deep Belief Network.

    This is demonstrated on MNIST.

    :type finetune_lr: float
    :param finetune_lr: learning rate used in the finetune stage
    :type pretraining_epochs: int
    :param pretraining_epochs: number of epoch to do pretraining
    :type pretrain_lr: float
    :param pretrain_lr: learning rate to be used during pre-training
    :type k: int
    :param k: number of Gibbs steps in CD/PCD
    :type training_epochs: int
    :param training_epochs: maximal number of iterations ot run the optimizer
    :type dataset: string
    :param dataset: path the the pickled dataset
    :type batch_size: int
    :param batch_size: the size of a minibatch
    """

    datasets1 = load_data(dataset)
    datasets = [[theano.shared(numpy.load("train.npy")), datasets1[0][1]],
                [theano.shared(numpy.load("validation.npy")), datasets1[1][1]],
                [theano.shared(numpy.load("test.npy")), datasets1[2][1]]]
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    # numpy random generator
    numpy_rng = numpy.random.RandomState(123)
    print '... building the model'
    # construct the Deep Belief Network
    dbn = DBN(numpy_rng=numpy_rng,
              n_ins=28 * 28,
              hidden_layers_sizes=[2000, 1000, 500, 200],
              n_outs=10)

    # start-snippet-2
    #########################
    # PRETRAINING THE MODEL #
    #########################
    print '... getting the pretraining functions'
    pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x,
                                                batch_size=batch_size,
                                                k=k)

    print '... pre-training the model'
    start_time = timeit.default_timer()
    ## Pre-train layer-wise
    for i in range(dbn.n_layers):
        # go through pretraining epochs
        for epoch in range(pretraining_epochs):
            # go through the training set
            c = []
            for batch_index in range(n_train_batches):
                c.append(pretraining_fns[i](index=batch_index, lr=pretrain_lr))
            print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
            print numpy.mean(c)

    end_time = timeit.default_timer()
    # end-snippet-2
    print >> sys.stderr, ('The pretraining code for file ' +
                          os.path.split(__file__)[1] + ' ran for %.2fm' %
                          ((end_time - start_time) / 60.))
    ########################
    # FINETUNING THE MODEL #
    ########################

    # get the training, validation and testing function for the model
    print '... getting the finetuning functions'
    train_fn, validate_model, test_model = dbn.build_finetune_functions(
        datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr)

    print '... finetuning the model'
    # early-stopping parameters
    patience = 4 * n_train_batches  # look as this many examples regardless
    patience_increase = 2.  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatches before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = timeit.default_timer()

    done_looping = False
    epoch = 0
    while (epoch < training_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):

            minibatch_avg_cost = train_fn(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:

                validation_losses = validate_model()
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if (this_validation_loss <
                            best_validation_loss * improvement_threshold):
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = test_model()
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print(('Optimization complete with best validation score of %f %%, '
           'obtained at iteration %i, '
           'with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The fine tuning code for file ' +
                          os.path.split(__file__)[1] + ' ran for %.2fm' %
                          ((end_time - start_time) / 60.))
    file_ = open("./model3", "w")
    pickle.dump(dbn, file_)
    file_.close()
Exemplo n.º 55
0
def test_SdA(finetune_lr=0.1, pretraining_epochs=15,
             pretrain_lr=0.001, training_epochs=1000,
             dataset='mnist.pkl.gz', batch_size=1):

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size

    numpy_rng = numpy.random.RandomState(89677)
    print('... building the model')

    sda = SdA(
        numpy_rng=numpy_rng,
        n_ins=28 * 28,
        hidden_layers_sizes=[1000, 1000, 1000],
        n_outs=10
    )

    print('... getting the pretraining functions')
    pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size)

    print('... pre-training the model')
    start_time = timeit.default_timer()

    corruption_levels = [.1, .2, .3]
    for i in range(sda.n_layers):

        for epoch in range(pretraining_epochs):

            c = []
            for batch_index in range(int(n_train_batches)):
                c.append(pretraining_fns[i](index=batch_index,
                         corruption=corruption_levels[i],
                         lr=pretrain_lr))
            print('Pre-training layer %i, epoch %d, cost ' % (i, epoch), end=' ')
            print(numpy.mean(c))

    end_time = timeit.default_timer()

    print(('The pretraining code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)

    print('... getting the finetuning functions')
    train_fn, validate_model, test_model = sda.build_finetune_functions(
        datasets=datasets,
        batch_size=batch_size,
        learning_rate=finetune_lr
    )

    print('... finetunning the model')
    patience = 10 * n_train_batches
    patience_increase = 2.

    improvement_threshold = 0.995

    validation_frequency = min(n_train_batches, patience / 2)

    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = timeit.default_timer()

    done_looping = False
    epoch = 0

    while (epoch < training_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in range(int(n_train_batches)):
            minibatch_avg_cost = train_fn(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                validation_losses = validate_model()
                this_validation_loss = numpy.mean(validation_losses)
                print(('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.)))

                if this_validation_loss < best_validation_loss:

                    if (
                        this_validation_loss < best_validation_loss *
                        improvement_threshold
                    ):
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    test_losses = test_model()
                    test_score = numpy.mean(test_losses)
                    print((('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.)))

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print((
        (
            'Optimization complete with best validation score of %f %%, '
            'on iteration %i, '
            'with test performance %f %%'
        )
        % (best_validation_loss * 100., best_iter + 1, test_score * 100.)
    ))
    print(('The training code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)