Exemplo n.º 1
0
def svm_cva(dir, start=0, end=500, learning_rate=3e-4, n_epochs=10000,
                           dataset='./data/mnist.pkl.gz',
                           batch_size=500):
    """
    Demonstrate stochastic gradient descent optimization of a log-linear
    model

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: the path of the MNIST dataset file from
                 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz

    """

    ''' 
    Difference
    '''
    print start, end, learning_rate, batch_size

    datasets = datapy.load_data_gpu(dataset, have_matrix=True)

    _, train_set_y, train_y_matrix = datasets[0]
    _, valid_set_y, valid_y_matrix = datasets[1]
    _, test_set_y, test_y_matrix = datasets[2]

    train_set_x, valid_set_x, test_set_x = datapy.load_feature_gpu(dir=dir, start=start,end=end)

    print train_set_x.get_value().shape
    print valid_set_x.get_value().shape
    print test_set_x.get_value().shape

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # generate symbolic variables for input (x and y represent a
    # minibatch)
    x = T.matrix('x')  # data, presented as rasterized images
    y = T.ivector('y')  # labels, presented as 1D vector of [int] labels

    '''
    Differences
    '''

    y_matrix = T.imatrix('y_matrix') # labels, presented as 2D matrix of int labels 

    # construct the logistic regression class
    # Each MNIST image has size 28*28
    rng = np.random.RandomState(0)
    n_in=end-start
    classifier = Pegasos.Pegasos(input=x, rng=rng, n_in=n_in, n_out=10,  weight_decay=1e-4, loss=1)

    # the cost we minimize during training is the negative log likelihood of
    # the model in symbolic format
    cost = classifier.objective(10, y, y_matrix)

    # compiling a Theano function that computes the mistakes that are made by
    # the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size],
            #y_matrix: test_y_matrix[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size],
            #y_matrix: valid_y_matrix[index * batch_size: (index + 1) * batch_size]
        }
    )

    # compute the gradient of cost with respect to theta = (W,b)
    g_W = T.grad(cost=cost, wrt=classifier.W)
    g_b = T.grad(cost=cost, wrt=classifier.b)

    params = [classifier.W, classifier.b]
    grads = [g_W, g_b]

    # start-snippet-3
    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs.
    
    l_r = theano.shared(np.asarray(learning_rate, dtype=np.float32))
    #get_optimizer = optimizer.get_simple_optimizer(learning_rate=learning_rate)
    get_optimizer = optimizer.get_adam_optimizer_min(learning_rate=l_r, decay1 = 0.1, decay2 = 0.001)
    updates = get_optimizer(params,grads)
    

    # compiling a Theano function `train_model` that returns the cost, but in
    # the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=[cost],
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size],
            y_matrix: train_y_matrix[index * batch_size: (index + 1) * batch_size]
        }
    )
    # end-snippet-3

    ###############
    # TRAIN MODEL #
    ###############
    print '... training the model'
    # early-stopping parameters
    patience = 5000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                                  # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                  # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = np.inf
    best_test_score = np.inf
    test_score = 0.
    start_time = time.clock()

    logdir = dir + str(learning_rate)+'_c-'

    done_looping = False
    epoch = 0
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            #print minibatch_avg_cost
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [validate_model(i)
                                     for i in xrange(n_valid_batches)]
                this_validation_loss = np.mean(validation_losses)

                this_test_losses = [test_model(i)
                                   for i in xrange(n_test_batches)]
                this_test_score = np.mean(this_test_losses)

                if this_test_score < best_test_score:
                    best_test_score = this_test_score

                with open(logdir+'hook.txt', 'a') as f:
                    print >>f, (
                        'epoch %i, minibatch %i/%i, validation error %f %%, test error %f %%' %
                        (
                            epoch,
                            minibatch_index + 1,
                            n_train_batches,
                            this_validation_loss * 100,
                            this_test_score *100.
                        )
                    )
                print(
                    'epoch %i, minibatch %i/%i, validation error %f %%, test error %f %%' %
                    (
                        epoch,
                        minibatch_index + 1,
                        n_train_batches,
                        this_validation_loss * 100,
                        this_test_score *100.
                    )
                )

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    # test it on the test set

                    test_losses = [test_model(i)
                                   for i in xrange(n_test_batches)]
                    test_score = np.mean(test_losses)
                    with open(logdir+'hook.txt', 'a') as f:
                        print >>f,(
                            (
                                '     epoch %i, minibatch %i/%i, test error of'
                                ' best model %f %%'
                            ) %
                            (
                                epoch,
                                minibatch_index + 1,
                                n_train_batches,
                                test_score * 100.
                            )
                        )

                    print(
                        (
                            '     epoch %i, minibatch %i/%i, test error of'
                            ' best model %f %%'
                        ) %
                        (
                            epoch,
                            minibatch_index + 1,
                            n_train_batches,
                            test_score * 100.
                        )
                    )

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    with open(logdir+'hook.txt', 'a') as f:
        print>>f,(
            (
                'Optimization complete with best validation score of %f %%,'
                'with test performance %f %%'
            )
            % (best_validation_loss * 100., test_score * 100.)
        )
        print>>f, 'The code run for %d epochs, with %f epochs/sec' % (
            epoch, 1. * epoch / (end_time - start_time))
        print>>f, sys.stderr, ('The code for file ' +
                              os.path.split(__file__)[1] +
                              ' ran for %.1fs' % ((end_time - start_time)))
        print>>f, best_test_score

    print(
        (
            'Optimization complete with best validation score of %f %%,'
            'with test performance %f %%'
        )
        % (best_validation_loss * 100., test_score * 100.)
    )
    print 'The code run for %d epochs, with %f epochs/sec' % (
        epoch, 1. * epoch / (end_time - start_time))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.1fs' % ((end_time - start_time)))
    print best_test_score
Exemplo n.º 2
0
def svm_cva(dir,
            start=0,
            end=500,
            learning_rate=3e-4,
            n_epochs=10000,
            dataset='./data/mnist.pkl.gz',
            batch_size=500):
    """
    Demonstrate stochastic gradient descent optimization of a log-linear
    model

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: the path of the MNIST dataset file from
                 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz

    """
    ''' 
    Difference
    '''
    print start, end, learning_rate, batch_size

    datasets = datapy.load_data_gpu(dataset, have_matrix=True)

    _, train_set_y, train_y_matrix = datasets[0]
    _, valid_set_y, valid_y_matrix = datasets[1]
    _, test_set_y, test_y_matrix = datasets[2]

    train_set_x, valid_set_x, test_set_x = datapy.load_feature_gpu(dir=dir,
                                                                   start=start,
                                                                   end=end)

    print train_set_x.get_value().shape
    print valid_set_x.get_value().shape
    print test_set_x.get_value().shape

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # generate symbolic variables for input (x and y represent a
    # minibatch)
    x = T.matrix('x')  # data, presented as rasterized images
    y = T.ivector('y')  # labels, presented as 1D vector of [int] labels
    '''
    Differences
    '''

    y_matrix = T.imatrix(
        'y_matrix')  # labels, presented as 2D matrix of int labels

    # construct the logistic regression class
    # Each MNIST image has size 28*28
    rng = np.random.RandomState(0)
    n_in = end - start
    classifier = Pegasos.Pegasos(input=x,
                                 rng=rng,
                                 n_in=n_in,
                                 n_out=10,
                                 weight_decay=1e-4,
                                 loss=1)

    # the cost we minimize during training is the negative log likelihood of
    # the model in symbolic format
    cost = classifier.objective(10, y, y_matrix)

    # compiling a Theano function that computes the mistakes that are made by
    # the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size],
            #y_matrix: test_y_matrix[index * batch_size: (index + 1) * batch_size]
        })

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size],
            #y_matrix: valid_y_matrix[index * batch_size: (index + 1) * batch_size]
        })

    # compute the gradient of cost with respect to theta = (W,b)
    g_W = T.grad(cost=cost, wrt=classifier.W)
    g_b = T.grad(cost=cost, wrt=classifier.b)

    params = [classifier.W, classifier.b]
    grads = [g_W, g_b]

    # start-snippet-3
    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs.

    l_r = theano.shared(np.asarray(learning_rate, dtype=np.float32))
    #get_optimizer = optimizer.get_simple_optimizer(learning_rate=learning_rate)
    get_optimizer = optimizer.get_adam_optimizer_min(learning_rate=l_r,
                                                     decay1=0.1,
                                                     decay2=0.001)
    updates = get_optimizer(params, grads)

    # compiling a Theano function `train_model` that returns the cost, but in
    # the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=[cost],
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size],
            y_matrix:
            train_y_matrix[index * batch_size:(index + 1) * batch_size]
        })
    # end-snippet-3

    ###############
    # TRAIN MODEL #
    ###############
    print '... training the model'
    # early-stopping parameters
    patience = 5000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = np.inf
    best_test_score = np.inf
    test_score = 0.
    start_time = time.clock()

    logdir = dir + str(learning_rate) + '_c-'

    done_looping = False
    epoch = 0
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            #print minibatch_avg_cost
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = np.mean(validation_losses)

                this_test_losses = [
                    test_model(i) for i in xrange(n_test_batches)
                ]
                this_test_score = np.mean(this_test_losses)

                if this_test_score < best_test_score:
                    best_test_score = this_test_score

                with open(logdir + 'hook.txt', 'a') as f:
                    print >> f, (
                        'epoch %i, minibatch %i/%i, validation error %f %%, test error %f %%'
                        % (epoch, minibatch_index + 1, n_train_batches,
                           this_validation_loss * 100, this_test_score * 100.))
                print(
                    'epoch %i, minibatch %i/%i, validation error %f %%, test error %f %%'
                    % (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100, this_test_score * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    # test it on the test set

                    test_losses = [
                        test_model(i) for i in xrange(n_test_batches)
                    ]
                    test_score = np.mean(test_losses)
                    with open(logdir + 'hook.txt', 'a') as f:
                        print >> f, (
                            ('     epoch %i, minibatch %i/%i, test error of'
                             ' best model %f %%') %
                            (epoch, minibatch_index + 1, n_train_batches,
                             test_score * 100.))

                    print(('     epoch %i, minibatch %i/%i, test error of'
                           ' best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    with open(logdir + 'hook.txt', 'a') as f:
        print >> f, (
            ('Optimization complete with best validation score of %f %%,'
             'with test performance %f %%') %
            (best_validation_loss * 100., test_score * 100.))
        print >> f, 'The code run for %d epochs, with %f epochs/sec' % (
            epoch, 1. * epoch / (end_time - start_time))
        print >> f, sys.stderr, ('The code for file ' +
                                 os.path.split(__file__)[1] +
                                 ' ran for %.1fs' % ((end_time - start_time)))
        print >> f, best_test_score

    print(('Optimization complete with best validation score of %f %%,'
           'with test performance %f %%') %
          (best_validation_loss * 100., test_score * 100.))
    print 'The code run for %d epochs, with %f epochs/sec' % (
        epoch, 1. * epoch / (end_time - start_time))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.1fs' % ((end_time - start_time)))
    print best_test_score
Exemplo n.º 3
0
def c_6layer_mnist_imputation(seed=0, ctype='cva',
             pertub_type=3, pertub_prob=6, pertub_prob1=14, visualization_times=20,
             denoise_times=200, predir=None, n_batch=144,
             dataset='mnist.pkl.gz', batch_size=500):

    """
    Missing data imputation
    """    
    #cp->cd->cpd->cd->c
    nkerns=[32, 32, 64, 64, 64]
    drops=[0, 0, 0, 0, 0, 1]
    #skerns=[5, 3, 3, 3, 3]
    #pools=[2, 1, 1, 2, 1]
    #modes=['same']*5
    n_hidden=[500, 50]
    drop_inverses=[1,]
    # 28->12->12->5->5/5*5*64->500->50->500->5*5*64/5->5->12->12->28
    
    if dataset=='mnist.pkl.gz':
        dim_input=(28, 28)
        colorImg=False

    logdir = 'results/imputation/'+ctype+'/mnist/'+ctype+'_6layer_mnist_'+str(pertub_type)+'_'+str(pertub_prob)+'_'+str(pertub_prob1)+'_'+str(denoise_times)+'_'
    logdir += str(int(time.time()))+'/'

    if not os.path.exists(logdir): os.makedirs(logdir)

    print predir
    with open(logdir+'hook.txt', 'a') as f:
        print >>f, predir
   
    train_set_x, test_set_x, test_set_x_pertub, pertub_label, pertub_number = datapy.load_pertub_data(dirs='data_imputation/', pertub_type=pertub_type, pertub_prob=pertub_prob,pertub_prob1=pertub_prob1)
    
    datasets = datapy.load_data_gpu(dataset, have_matrix=True)

    _, _, _ = datasets[0]
    valid_set_x, _, _ = datasets[1]
    _, _, _ = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')
    x_pertub = T.matrix('x_pertub')  # the data is presented as rasterized images
    p_label = T.matrix('p_label')

    random_z = T.matrix('random_z')

    drop = T.iscalar('drop')
    drop_inverse = T.iscalar('drop_inverse')
    
    activation = nonlinearity.relu

    rng = np.random.RandomState(seed)
    rng_share = theano.tensor.shared_randomstreams.RandomStreams(0)

    input_x = x_pertub.reshape((batch_size, 1, 28, 28))
    
    recg_layer = []
    cnn_output = []

    #1
    recg_layer.append(ConvMaxPool.ConvMaxPool(
            rng,
            image_shape=(batch_size, 1, 28, 28),
            filter_shape=(nkerns[0], 1, 5, 5),
            poolsize=(2, 2),
            border_mode='valid',
            activation=activation
        ))
    if drops[0]==1:
        cnn_output.append(recg_layer[-1].drop_output(input=input_x, drop=drop, rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(input=input_x))

    #2
    recg_layer.append(ConvMaxPool.ConvMaxPool(
        rng,
        image_shape=(batch_size, nkerns[0], 12, 12),
        filter_shape=(nkerns[1], nkerns[0], 3, 3),
        poolsize=(1, 1),
        border_mode='same', 
        activation=activation
    ))
    if drops[1]==1:
        cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(cnn_output[-1]))
    
    #3
    recg_layer.append(ConvMaxPool.ConvMaxPool(
        rng,
        image_shape=(batch_size, nkerns[1], 12, 12),
        filter_shape=(nkerns[2], nkerns[1], 3, 3),
        poolsize=(2, 2),
        border_mode='valid', 
        activation=activation
    ))
    if drops[2]==1:
        cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(cnn_output[-1]))

    #4
    recg_layer.append(ConvMaxPool.ConvMaxPool(
        rng,
        image_shape=(batch_size, nkerns[2], 5, 5),
        filter_shape=(nkerns[3], nkerns[2], 3, 3),
        poolsize=(1, 1),
        border_mode='same', 
        activation=activation
    ))
    if drops[3]==1:
        cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(cnn_output[-1]))
    #5
    recg_layer.append(ConvMaxPool.ConvMaxPool(
        rng,
        image_shape=(batch_size, nkerns[3], 5, 5),
        filter_shape=(nkerns[4], nkerns[3], 3, 3),
        poolsize=(1, 1),
        border_mode='same', 
        activation=activation
    ))
    if drops[4]==1:
        cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(cnn_output[-1]))

    mlp_input_x = cnn_output[-1].flatten(2)

    activations = []

    #1
    recg_layer.append(FullyConnected.FullyConnected(
            rng=rng,
            n_in= 5 * 5 * nkerns[-1],
            n_out=n_hidden[0],
            activation=activation
        ))
    if drops[-1]==1:
        activations.append(recg_layer[-1].drop_output(input=mlp_input_x, drop=drop, rng=rng_share))
    else:
        activations.append(recg_layer[-1].output(input=mlp_input_x))

    #stochastic layer
    recg_layer.append(GaussianHidden.GaussianHidden(
            rng=rng,
            input=activations[-1],
            n_in=n_hidden[0],
            n_out = n_hidden[1],
            activation=None
        ))

    z = recg_layer[-1].sample_z(rng_share)


    gene_layer = []
    z_output = []
    random_z_output = []

    #1
    gene_layer.append(FullyConnected.FullyConnected(
            rng=rng,
            n_in=n_hidden[1],
            n_out = n_hidden[0],
            activation=activation
        ))
    
    z_output.append(gene_layer[-1].output(input=z))
    random_z_output.append(gene_layer[-1].output(input=random_z))

    #2
    gene_layer.append(FullyConnected.FullyConnected(
            rng=rng,
            n_in=n_hidden[0],
            n_out = 5*5*nkerns[-1],
            activation=activation
        ))

    if drop_inverses[0]==1:
        z_output.append(gene_layer[-1].drop_output(input=z_output[-1], drop=drop_inverse, rng=rng_share))
        random_z_output.append(gene_layer[-1].drop_output(input=random_z_output[-1], drop=drop_inverse, rng=rng_share))
    else:
        z_output.append(gene_layer[-1].output(input=z_output[-1]))
        random_z_output.append(gene_layer[-1].output(input=random_z_output[-1]))

    input_z = z_output[-1].reshape((batch_size, nkerns[-1], 5, 5))
    input_random_z = random_z_output[-1].reshape((n_batch, nkerns[-1], 5, 5))

    #1
    gene_layer.append(UnpoolConvNon.UnpoolConvNon(
            rng,
            image_shape=(batch_size, nkerns[-1], 5, 5),
            filter_shape=(nkerns[-2], nkerns[-1], 3, 3),
            poolsize=(1, 1),
            border_mode='same', 
            activation=activation
        ))
    
    z_output.append(gene_layer[-1].output(input=input_z))
    random_z_output.append(gene_layer[-1].output_random_generation(input=input_random_z, n_batch=n_batch))
    
    #2
    gene_layer.append(UnpoolConvNon.UnpoolConvNon(
            rng,
            image_shape=(batch_size, nkerns[-2], 5, 5),
            filter_shape=(nkerns[-3], nkerns[-2], 3, 3),
            poolsize=(2, 2),
            border_mode='full', 
            activation=activation
        ))
    
    z_output.append(gene_layer[-1].output(input=z_output[-1]))
    random_z_output.append(gene_layer[-1].output_random_generation(input=random_z_output[-1], n_batch=n_batch))

    #3
    gene_layer.append(UnpoolConvNon.UnpoolConvNon(
            rng,
            image_shape=(batch_size, nkerns[-3], 12, 12),
            filter_shape=(nkerns[-4], nkerns[-3], 3, 3),
            poolsize=(1, 1),
            border_mode='same', 
            activation=activation
        ))
    
    z_output.append(gene_layer[-1].output(input=z_output[-1]))
    random_z_output.append(gene_layer[-1].output_random_generation(input=random_z_output[-1], n_batch=n_batch))

    #4
    gene_layer.append(UnpoolConvNon.UnpoolConvNon(
            rng,
            image_shape=(batch_size, nkerns[-4], 12, 12),
            filter_shape=(nkerns[-5], nkerns[-4], 3, 3),
            poolsize=(1, 1),
            border_mode='same', 
            activation=activation
        ))
    
    z_output.append(gene_layer[-1].output(input=z_output[-1]))
    random_z_output.append(gene_layer[-1].output_random_generation(input=random_z_output[-1], n_batch=n_batch))

    #5 stochastic layer 
    # for the last layer, the nonliearity should be sigmoid to achieve mean of Bernoulli
    gene_layer.append(UnpoolConvNon.UnpoolConvNon(
            rng,
            image_shape=(batch_size, nkerns[-5], 12, 12),
            filter_shape=(1, nkerns[-5], 5, 5),
            poolsize=(2, 2),
            border_mode='full', 
            activation=nonlinearity.sigmoid
        ))

    z_output.append(gene_layer[-1].output(input=z_output[-1]))
    random_z_output.append(gene_layer[-1].output_random_generation(input=random_z_output[-1], n_batch=n_batch))
   
    gene_layer.append(NoParamsBernoulliVisiable.NoParamsBernoulliVisiable(
            #rng=rng,
            #mean=z_output[-1],
            #data=input_x,
        ))
    logpx = gene_layer[-1].logpx(mean=z_output[-1], data=input_x)


    # 4-D tensor of random generation
    random_x_mean = random_z_output[-1]
    random_x = gene_layer[-1].sample_x(rng_share, random_x_mean)

    x_denoised = z_output[-1].flatten(2)
    x_denoised = p_label*x+(1-p_label)*x_denoised

    mse = ((x - x_denoised)**2).sum() / pertub_number

    params=[]
    for g in gene_layer:
        params+=g.params
    for r in recg_layer:
        params+=r.params

    train_activations = theano.function(
        inputs=[index],
        outputs=T.concatenate(activations, axis=1),
        givens={
            x_pertub: train_set_x[index * batch_size: (index + 1) * batch_size],
            drop: np.cast['int32'](0)
        }
    )

    valid_activations = theano.function(
        inputs=[index],
        outputs=T.concatenate(activations, axis=1),
        givens={
            x_pertub: valid_set_x[index * batch_size: (index + 1) * batch_size],
            drop: np.cast['int32'](0)
        }
    )

    test_activations = theano.function(
        inputs=[x_pertub],
        outputs=T.concatenate(activations, axis=1),
        givens={
            drop: np.cast['int32'](0)
        }
    )

    imputation_model = theano.function(
        inputs=[index, x_pertub],
        outputs=[x_denoised, mse],
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            p_label:pertub_label[index * batch_size: (index + 1) * batch_size],
            drop: np.cast['int32'](0),
            drop_inverse: np.cast['int32'](0)
        }
    )

    ##################
    # Pretrain MODEL #
    ##################

    model_epoch = 600
    if os.environ.has_key('model_epoch'):
        model_epoch = int(os.environ['model_epoch'])
    if predir is not None:
        color.printBlue('... setting parameters')
        color.printBlue(predir)
        if model_epoch == -1:
            pre_train = np.load(predir+'best-model.npz')
        else:
            pre_train = np.load(predir+'model-'+str(model_epoch)+'.npz')
        pre_train = pre_train['model']
        if ctype == 'cva':
            for (para, pre) in zip(params, pre_train):
                para.set_value(pre)
        elif ctype == 'cmmva':
            for (para, pre) in zip(params, pre_train[:-2]):
                para.set_value(pre)
        else:
            exit()
    else:
        exit()

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'

    epoch = 0
    n_visualization = 100
    output = np.ones((n_visualization, visualization_times+2, 784))
    output[:,0,:] = test_set_x.get_value()[:n_visualization,:]
    output[:,1,:] = test_set_x_pertub.get_value()[:n_visualization,:]
    
    image = paramgraphics.mat_to_img(output[:,0,:].T, dim_input, colorImg=colorImg)
    image.save(logdir+'data.png', 'PNG')
    image = paramgraphics.mat_to_img(output[:,1,:].T, dim_input, colorImg=colorImg)
    image.save(logdir+'data_pertub.png', 'PNG')

    tmp = test_set_x_pertub.get_value()

    while epoch < denoise_times:
        epoch = epoch + 1
        this_mse=0
        for i in xrange(n_test_batches):
            d, m = imputation_model(i, tmp[i * batch_size: (i + 1) * batch_size])
            tmp[i * batch_size: (i + 1) * batch_size] = np.asarray(d)
            this_mse+=m
        if epoch<=visualization_times:
            output[:,epoch+1,:] = tmp[:n_visualization,:]

        print epoch, this_mse
        with open(logdir+'hook.txt', 'a') as f:
                print >>f, epoch, this_mse

        image = paramgraphics.mat_to_img(tmp[:n_visualization,:].T, dim_input, colorImg=colorImg)
        image.save(logdir+'procedure-'+str(epoch)+'.png', 'PNG')
        np.savez(logdir+'procedure-'+str(epoch), tmp=tmp)

    image = paramgraphics.mat_to_img((output.reshape(-1,784)).T, dim_input, colorImg=colorImg, tile_shape=(n_visualization,22))
    image.save(logdir+'output.png', 'PNG')
    np.savez(logdir+'output', output=output)

    # save original train features and denoise test features
    for i in xrange(n_train_batches):
        if i == 0:
            train_features = np.asarray(train_activations(i))
        else:
            train_features = np.vstack((train_features, np.asarray(train_activations(i))))

    for i in xrange(n_valid_batches):
        if i == 0:
            valid_features = np.asarray(valid_activations(i))
        else:
            valid_features = np.vstack((valid_features, np.asarray(valid_activations(i))))

    for i in xrange(n_test_batches):
        if i == 0:
            test_features = np.asarray(test_activations(tmp[i * batch_size: (i + 1) * batch_size]))
        else:
            test_features = np.vstack((test_features, np.asarray(test_activations(tmp[i * batch_size: (i + 1) * batch_size]))))
    
    np.save(logdir+'train_features', train_features)
    np.save(logdir+'valid_features', valid_features)
    np.save(logdir+'test_features', test_features)
Exemplo n.º 4
0
def c_6layer_mnist_imputation(seed=0,
                              pertub_type=3,
                              pertub_prob=6,
                              pertub_prob1=14,
                              predir=None,
                              n_batch=144,
                              dataset='mnist.pkl.gz',
                              batch_size=500):
    """
    Missing data imputation
    """
    #cp->cd->cpd->cd->c
    nkerns = [32, 32, 64, 64, 64]
    drops = [0, 0, 0, 0, 0, 1]
    #skerns=[5, 3, 3, 3, 3]
    #pools=[2, 1, 1, 2, 1]
    #modes=['same']*5
    n_hidden = [500, 50]
    drop_inverses = [
        1,
    ]
    # 28->12->12->5->5/5*5*64->500->50->500->5*5*64/5->5->12->12->28

    if dataset == 'mnist.pkl.gz':
        dim_input = (28, 28)
        colorImg = False

    train_set_x, test_set_x, test_set_x_pertub, pertub_label, pertub_number = datapy.load_pertub_data(
        dirs='data_imputation/',
        pertub_type=pertub_type,
        pertub_prob=pertub_prob,
        pertub_prob1=pertub_prob1)

    datasets = datapy.load_data_gpu(dataset, have_matrix=True)

    _, train_set_y, train_y_matrix = datasets[0]
    valid_set_x, valid_set_y, valid_y_matrix = datasets[1]
    _, test_set_y, test_y_matrix = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')
    #x_pertub = T.matrix('x_pertub')  # the data is presented as rasterized images
    #p_label = T.matrix('p_label')

    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels
    y_matrix = T.imatrix('y_matrix')

    drop = T.iscalar('drop')
    drop_inverse = T.iscalar('drop_inverse')

    activation = nonlinearity.relu

    rng = np.random.RandomState(seed)
    rng_share = theano.tensor.shared_randomstreams.RandomStreams(0)

    input_x = x.reshape((batch_size, 1, 28, 28))

    recg_layer = []
    cnn_output = []

    #1
    recg_layer.append(
        ConvMaxPool.ConvMaxPool(rng,
                                image_shape=(batch_size, 1, 28, 28),
                                filter_shape=(nkerns[0], 1, 5, 5),
                                poolsize=(2, 2),
                                border_mode='valid',
                                activation=activation))
    if drops[0] == 1:
        cnn_output.append(recg_layer[-1].drop_output(input=input_x,
                                                     drop=drop,
                                                     rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(input=input_x))

    #2
    recg_layer.append(
        ConvMaxPool.ConvMaxPool(rng,
                                image_shape=(batch_size, nkerns[0], 12, 12),
                                filter_shape=(nkerns[1], nkerns[0], 3, 3),
                                poolsize=(1, 1),
                                border_mode='same',
                                activation=activation))
    if drops[1] == 1:
        cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1],
                                                     drop=drop,
                                                     rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(cnn_output[-1]))

    #3
    recg_layer.append(
        ConvMaxPool.ConvMaxPool(rng,
                                image_shape=(batch_size, nkerns[1], 12, 12),
                                filter_shape=(nkerns[2], nkerns[1], 3, 3),
                                poolsize=(2, 2),
                                border_mode='valid',
                                activation=activation))
    if drops[2] == 1:
        cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1],
                                                     drop=drop,
                                                     rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(cnn_output[-1]))

    #4
    recg_layer.append(
        ConvMaxPool.ConvMaxPool(rng,
                                image_shape=(batch_size, nkerns[2], 5, 5),
                                filter_shape=(nkerns[3], nkerns[2], 3, 3),
                                poolsize=(1, 1),
                                border_mode='same',
                                activation=activation))
    if drops[3] == 1:
        cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1],
                                                     drop=drop,
                                                     rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(cnn_output[-1]))
    #5
    recg_layer.append(
        ConvMaxPool.ConvMaxPool(rng,
                                image_shape=(batch_size, nkerns[3], 5, 5),
                                filter_shape=(nkerns[4], nkerns[3], 3, 3),
                                poolsize=(1, 1),
                                border_mode='same',
                                activation=activation))
    if drops[4] == 1:
        cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1],
                                                     drop=drop,
                                                     rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(cnn_output[-1]))

    mlp_input = cnn_output[-1].flatten(2)

    recg_layer.append(
        FullyConnected.FullyConnected(rng=rng,
                                      n_in=nkerns[4] * 5 * 5,
                                      n_out=500,
                                      activation=activation))

    feature = recg_layer[-1].drop_output(mlp_input, drop=drop, rng=rng_share)

    # classify the values of the fully-connected sigmoidal layer
    classifier = Pegasos.Pegasos(input=feature,
                                 rng=rng,
                                 n_in=500,
                                 n_out=10,
                                 weight_decay=0,
                                 loss=1)

    # the cost we minimize during training is the NLL of the model
    cost = classifier.hinge_loss(10, y, y_matrix) * batch_size
    weight_decay = 1.0 / n_train_batches

    # create a list of all model parameters to be fit by gradient descent
    params = []
    for r in recg_layer:
        params += r.params
    params += classifier.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)
    learning_rate = 3e-4
    l_r = theano.shared(np.asarray(learning_rate, dtype=np.float32))
    get_optimizer = optimizer.get_adam_optimizer_min(learning_rate=l_r,
                                                     decay1=0.1,
                                                     decay2=0.001,
                                                     weight_decay=weight_decay)
    updates = get_optimizer(params, grads)
    '''
    Save parameters and activations
    '''

    parameters = theano.function(
        inputs=[],
        outputs=params,
    )

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size],
            #y_matrix: test_y_matrix[index * batch_size: (index + 1) * batch_size],
            drop: np.cast['int32'](0)
        })

    test_pertub_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x_pertub[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size],
            #y_matrix: test_y_matrix[index * batch_size: (index + 1) * batch_size],
            drop: np.cast['int32'](0)
        })

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            #y_matrix: valid_y_matrix[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size],
            drop: np.cast['int32'](0)
        })

    ##################
    # Pretrain MODEL #
    ##################

    model_epoch = 250
    if os.environ.has_key('model_epoch'):
        model_epoch = int(os.environ['model_epoch'])
    if predir is not None:
        color.printBlue('... setting parameters')
        color.printBlue(predir)
        if model_epoch == -1:
            pre_train = np.load(predir + 'best-model.npz')
        else:
            pre_train = np.load(predir + 'model-' + str(model_epoch) + '.npz')
        pre_train = pre_train['model']
        for (para, pre) in zip(params, pre_train):
            para.set_value(pre)
    else:
        exit()

    ###############
    # TRAIN MODEL #
    ###############
    valid_losses = [validate_model(i) for i in xrange(n_valid_batches)]
    valid_score = np.mean(valid_losses)

    test_losses = [test_model(i) for i in xrange(n_test_batches)]
    test_score = np.mean(test_losses)

    test_losses_pertub = [test_pertub_model(i) for i in xrange(n_test_batches)]
    test_score_pertub = np.mean(test_losses_pertub)

    print valid_score, test_score, test_score_pertub
Exemplo n.º 5
0
def c_6layer_mnist_imputation(seed=0,
                              ctype='cva',
                              pertub_type=3,
                              pertub_prob=6,
                              pertub_prob1=14,
                              visualization_times=20,
                              denoise_times=200,
                              predir=None,
                              n_batch=144,
                              dataset='mnist.pkl.gz',
                              batch_size=500):
    """
    Missing data imputation
    """
    #cp->cd->cpd->cd->c
    nkerns = [32, 32, 64, 64, 64]
    drops = [0, 0, 0, 0, 0, 1]
    #skerns=[5, 3, 3, 3, 3]
    #pools=[2, 1, 1, 2, 1]
    #modes=['same']*5
    n_hidden = [500, 50]
    drop_inverses = [
        1,
    ]
    # 28->12->12->5->5/5*5*64->500->50->500->5*5*64/5->5->12->12->28

    if dataset == 'mnist.pkl.gz':
        dim_input = (28, 28)
        colorImg = False

    logdir = 'results/imputation/' + ctype + '/mnist/' + ctype + '_6layer_mnist_' + str(
        pertub_type) + '_' + str(pertub_prob) + '_' + str(
            pertub_prob1) + '_' + str(denoise_times) + '_'
    logdir += str(int(time.time())) + '/'

    if not os.path.exists(logdir): os.makedirs(logdir)

    print predir
    with open(logdir + 'hook.txt', 'a') as f:
        print >> f, predir

    train_set_x, test_set_x, test_set_x_pertub, pertub_label, pertub_number = datapy.load_pertub_data(
        dirs='data_imputation/',
        pertub_type=pertub_type,
        pertub_prob=pertub_prob,
        pertub_prob1=pertub_prob1)

    datasets = datapy.load_data_gpu(dataset, have_matrix=True)

    _, _, _ = datasets[0]
    valid_set_x, _, _ = datasets[1]
    _, _, _ = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')
    x_pertub = T.matrix(
        'x_pertub')  # the data is presented as rasterized images
    p_label = T.matrix('p_label')

    random_z = T.matrix('random_z')

    drop = T.iscalar('drop')
    drop_inverse = T.iscalar('drop_inverse')

    activation = nonlinearity.relu

    rng = np.random.RandomState(seed)
    rng_share = theano.tensor.shared_randomstreams.RandomStreams(0)

    input_x = x_pertub.reshape((batch_size, 1, 28, 28))

    recg_layer = []
    cnn_output = []

    #1
    recg_layer.append(
        ConvMaxPool.ConvMaxPool(rng,
                                image_shape=(batch_size, 1, 28, 28),
                                filter_shape=(nkerns[0], 1, 5, 5),
                                poolsize=(2, 2),
                                border_mode='valid',
                                activation=activation))
    if drops[0] == 1:
        cnn_output.append(recg_layer[-1].drop_output(input=input_x,
                                                     drop=drop,
                                                     rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(input=input_x))

    #2
    recg_layer.append(
        ConvMaxPool.ConvMaxPool(rng,
                                image_shape=(batch_size, nkerns[0], 12, 12),
                                filter_shape=(nkerns[1], nkerns[0], 3, 3),
                                poolsize=(1, 1),
                                border_mode='same',
                                activation=activation))
    if drops[1] == 1:
        cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1],
                                                     drop=drop,
                                                     rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(cnn_output[-1]))

    #3
    recg_layer.append(
        ConvMaxPool.ConvMaxPool(rng,
                                image_shape=(batch_size, nkerns[1], 12, 12),
                                filter_shape=(nkerns[2], nkerns[1], 3, 3),
                                poolsize=(2, 2),
                                border_mode='valid',
                                activation=activation))
    if drops[2] == 1:
        cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1],
                                                     drop=drop,
                                                     rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(cnn_output[-1]))

    #4
    recg_layer.append(
        ConvMaxPool.ConvMaxPool(rng,
                                image_shape=(batch_size, nkerns[2], 5, 5),
                                filter_shape=(nkerns[3], nkerns[2], 3, 3),
                                poolsize=(1, 1),
                                border_mode='same',
                                activation=activation))
    if drops[3] == 1:
        cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1],
                                                     drop=drop,
                                                     rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(cnn_output[-1]))
    #5
    recg_layer.append(
        ConvMaxPool.ConvMaxPool(rng,
                                image_shape=(batch_size, nkerns[3], 5, 5),
                                filter_shape=(nkerns[4], nkerns[3], 3, 3),
                                poolsize=(1, 1),
                                border_mode='same',
                                activation=activation))
    if drops[4] == 1:
        cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1],
                                                     drop=drop,
                                                     rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(cnn_output[-1]))

    mlp_input_x = cnn_output[-1].flatten(2)

    activations = []

    #1
    recg_layer.append(
        FullyConnected.FullyConnected(rng=rng,
                                      n_in=5 * 5 * nkerns[-1],
                                      n_out=n_hidden[0],
                                      activation=activation))
    if drops[-1] == 1:
        activations.append(recg_layer[-1].drop_output(input=mlp_input_x,
                                                      drop=drop,
                                                      rng=rng_share))
    else:
        activations.append(recg_layer[-1].output(input=mlp_input_x))

    #stochastic layer
    recg_layer.append(
        GaussianHidden.GaussianHidden(rng=rng,
                                      input=activations[-1],
                                      n_in=n_hidden[0],
                                      n_out=n_hidden[1],
                                      activation=None))

    z = recg_layer[-1].sample_z(rng_share)

    gene_layer = []
    z_output = []
    random_z_output = []

    #1
    gene_layer.append(
        FullyConnected.FullyConnected(rng=rng,
                                      n_in=n_hidden[1],
                                      n_out=n_hidden[0],
                                      activation=activation))

    z_output.append(gene_layer[-1].output(input=z))
    random_z_output.append(gene_layer[-1].output(input=random_z))

    #2
    gene_layer.append(
        FullyConnected.FullyConnected(rng=rng,
                                      n_in=n_hidden[0],
                                      n_out=5 * 5 * nkerns[-1],
                                      activation=activation))

    if drop_inverses[0] == 1:
        z_output.append(gene_layer[-1].drop_output(input=z_output[-1],
                                                   drop=drop_inverse,
                                                   rng=rng_share))
        random_z_output.append(gene_layer[-1].drop_output(
            input=random_z_output[-1], drop=drop_inverse, rng=rng_share))
    else:
        z_output.append(gene_layer[-1].output(input=z_output[-1]))
        random_z_output.append(
            gene_layer[-1].output(input=random_z_output[-1]))

    input_z = z_output[-1].reshape((batch_size, nkerns[-1], 5, 5))
    input_random_z = random_z_output[-1].reshape((n_batch, nkerns[-1], 5, 5))

    #1
    gene_layer.append(
        UnpoolConvNon.UnpoolConvNon(rng,
                                    image_shape=(batch_size, nkerns[-1], 5, 5),
                                    filter_shape=(nkerns[-2], nkerns[-1], 3,
                                                  3),
                                    poolsize=(1, 1),
                                    border_mode='same',
                                    activation=activation))

    z_output.append(gene_layer[-1].output(input=input_z))
    random_z_output.append(gene_layer[-1].output_random_generation(
        input=input_random_z, n_batch=n_batch))

    #2
    gene_layer.append(
        UnpoolConvNon.UnpoolConvNon(rng,
                                    image_shape=(batch_size, nkerns[-2], 5, 5),
                                    filter_shape=(nkerns[-3], nkerns[-2], 3,
                                                  3),
                                    poolsize=(2, 2),
                                    border_mode='full',
                                    activation=activation))

    z_output.append(gene_layer[-1].output(input=z_output[-1]))
    random_z_output.append(gene_layer[-1].output_random_generation(
        input=random_z_output[-1], n_batch=n_batch))

    #3
    gene_layer.append(
        UnpoolConvNon.UnpoolConvNon(rng,
                                    image_shape=(batch_size, nkerns[-3], 12,
                                                 12),
                                    filter_shape=(nkerns[-4], nkerns[-3], 3,
                                                  3),
                                    poolsize=(1, 1),
                                    border_mode='same',
                                    activation=activation))

    z_output.append(gene_layer[-1].output(input=z_output[-1]))
    random_z_output.append(gene_layer[-1].output_random_generation(
        input=random_z_output[-1], n_batch=n_batch))

    #4
    gene_layer.append(
        UnpoolConvNon.UnpoolConvNon(rng,
                                    image_shape=(batch_size, nkerns[-4], 12,
                                                 12),
                                    filter_shape=(nkerns[-5], nkerns[-4], 3,
                                                  3),
                                    poolsize=(1, 1),
                                    border_mode='same',
                                    activation=activation))

    z_output.append(gene_layer[-1].output(input=z_output[-1]))
    random_z_output.append(gene_layer[-1].output_random_generation(
        input=random_z_output[-1], n_batch=n_batch))

    #5 stochastic layer
    # for the last layer, the nonliearity should be sigmoid to achieve mean of Bernoulli
    gene_layer.append(
        UnpoolConvNon.UnpoolConvNon(rng,
                                    image_shape=(batch_size, nkerns[-5], 12,
                                                 12),
                                    filter_shape=(1, nkerns[-5], 5, 5),
                                    poolsize=(2, 2),
                                    border_mode='full',
                                    activation=nonlinearity.sigmoid))

    z_output.append(gene_layer[-1].output(input=z_output[-1]))
    random_z_output.append(gene_layer[-1].output_random_generation(
        input=random_z_output[-1], n_batch=n_batch))

    gene_layer.append(
        NoParamsBernoulliVisiable.NoParamsBernoulliVisiable(
            #rng=rng,
            #mean=z_output[-1],
            #data=input_x,
        ))
    logpx = gene_layer[-1].logpx(mean=z_output[-1], data=input_x)

    # 4-D tensor of random generation
    random_x_mean = random_z_output[-1]
    random_x = gene_layer[-1].sample_x(rng_share, random_x_mean)

    x_denoised = z_output[-1].flatten(2)
    x_denoised = p_label * x + (1 - p_label) * x_denoised

    mse = ((x - x_denoised)**2).sum() / pertub_number

    params = []
    for g in gene_layer:
        params += g.params
    for r in recg_layer:
        params += r.params

    train_activations = theano.function(
        inputs=[index],
        outputs=T.concatenate(activations, axis=1),
        givens={
            x_pertub: train_set_x[index * batch_size:(index + 1) * batch_size],
            drop: np.cast['int32'](0)
        })

    valid_activations = theano.function(
        inputs=[index],
        outputs=T.concatenate(activations, axis=1),
        givens={
            x_pertub: valid_set_x[index * batch_size:(index + 1) * batch_size],
            drop: np.cast['int32'](0)
        })

    test_activations = theano.function(inputs=[x_pertub],
                                       outputs=T.concatenate(activations,
                                                             axis=1),
                                       givens={drop: np.cast['int32'](0)})

    imputation_model = theano.function(
        inputs=[index, x_pertub],
        outputs=[x_denoised, mse],
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            p_label: pertub_label[index * batch_size:(index + 1) * batch_size],
            drop: np.cast['int32'](0),
            drop_inverse: np.cast['int32'](0)
        })

    ##################
    # Pretrain MODEL #
    ##################

    model_epoch = 600
    if os.environ.has_key('model_epoch'):
        model_epoch = int(os.environ['model_epoch'])
    if predir is not None:
        color.printBlue('... setting parameters')
        color.printBlue(predir)
        if model_epoch == -1:
            pre_train = np.load(predir + 'best-model.npz')
        else:
            pre_train = np.load(predir + 'model-' + str(model_epoch) + '.npz')
        pre_train = pre_train['model']
        if ctype == 'cva':
            for (para, pre) in zip(params, pre_train):
                para.set_value(pre)
        elif ctype == 'cmmva':
            for (para, pre) in zip(params, pre_train[:-2]):
                para.set_value(pre)
        else:
            exit()
    else:
        exit()

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'

    epoch = 0
    n_visualization = 100
    output = np.ones((n_visualization, visualization_times + 2, 784))
    output[:, 0, :] = test_set_x.get_value()[:n_visualization, :]
    output[:, 1, :] = test_set_x_pertub.get_value()[:n_visualization, :]

    image = paramgraphics.mat_to_img(output[:, 0, :].T,
                                     dim_input,
                                     colorImg=colorImg)
    image.save(logdir + 'data.png', 'PNG')
    image = paramgraphics.mat_to_img(output[:, 1, :].T,
                                     dim_input,
                                     colorImg=colorImg)
    image.save(logdir + 'data_pertub.png', 'PNG')

    tmp = test_set_x_pertub.get_value()

    while epoch < denoise_times:
        epoch = epoch + 1
        this_mse = 0
        for i in xrange(n_test_batches):
            d, m = imputation_model(i,
                                    tmp[i * batch_size:(i + 1) * batch_size])
            tmp[i * batch_size:(i + 1) * batch_size] = np.asarray(d)
            this_mse += m
        if epoch <= visualization_times:
            output[:, epoch + 1, :] = tmp[:n_visualization, :]

        print epoch, this_mse
        with open(logdir + 'hook.txt', 'a') as f:
            print >> f, epoch, this_mse

        image = paramgraphics.mat_to_img(tmp[:n_visualization, :].T,
                                         dim_input,
                                         colorImg=colorImg)
        image.save(logdir + 'procedure-' + str(epoch) + '.png', 'PNG')
        np.savez(logdir + 'procedure-' + str(epoch), tmp=tmp)

    image = paramgraphics.mat_to_img((output.reshape(-1, 784)).T,
                                     dim_input,
                                     colorImg=colorImg,
                                     tile_shape=(n_visualization, 22))
    image.save(logdir + 'output.png', 'PNG')
    np.savez(logdir + 'output', output=output)

    # save original train features and denoise test features
    for i in xrange(n_train_batches):
        if i == 0:
            train_features = np.asarray(train_activations(i))
        else:
            train_features = np.vstack(
                (train_features, np.asarray(train_activations(i))))

    for i in xrange(n_valid_batches):
        if i == 0:
            valid_features = np.asarray(valid_activations(i))
        else:
            valid_features = np.vstack(
                (valid_features, np.asarray(valid_activations(i))))

    for i in xrange(n_test_batches):
        if i == 0:
            test_features = np.asarray(
                test_activations(tmp[i * batch_size:(i + 1) * batch_size]))
        else:
            test_features = np.vstack(
                (test_features,
                 np.asarray(
                     test_activations(tmp[i * batch_size:(i + 1) *
                                          batch_size]))))

    np.save(logdir + 'train_features', train_features)
    np.save(logdir + 'valid_features', valid_features)
    np.save(logdir + 'test_features', test_features)
Exemplo n.º 6
0
def c_6layer_mnist_imputation(seed=0,
             pertub_type=3, pertub_prob=6, pertub_prob1=14,
             predir=None, n_batch=144,
             dataset='mnist.pkl.gz', batch_size=500):

    """
    Missing data imputation
    """    
    #cp->cd->cpd->cd->c
    nkerns=[32, 32, 64, 64, 64]
    drops=[0, 0, 0, 0, 0, 1]
    #skerns=[5, 3, 3, 3, 3]
    #pools=[2, 1, 1, 2, 1]
    #modes=['same']*5
    n_hidden=[500, 50]
    drop_inverses=[1,]
    # 28->12->12->5->5/5*5*64->500->50->500->5*5*64/5->5->12->12->28
    
    if dataset=='mnist.pkl.gz':
        dim_input=(28, 28)
        colorImg=False
   
    train_set_x, test_set_x, test_set_x_pertub, pertub_label, pertub_number = datapy.load_pertub_data(dirs='data_imputation/', pertub_type=pertub_type, pertub_prob=pertub_prob,pertub_prob1=pertub_prob1)
    
    datasets = datapy.load_data_gpu(dataset, have_matrix=True)

    _, train_set_y, train_y_matrix = datasets[0]
    valid_set_x, valid_set_y, valid_y_matrix = datasets[1]
    _, test_set_y, test_y_matrix = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')
    #x_pertub = T.matrix('x_pertub')  # the data is presented as rasterized images
    #p_label = T.matrix('p_label')

    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels
    y_matrix = T.imatrix('y_matrix')

    drop = T.iscalar('drop')
    drop_inverse = T.iscalar('drop_inverse')
    
    activation = nonlinearity.relu

    rng = np.random.RandomState(seed)
    rng_share = theano.tensor.shared_randomstreams.RandomStreams(0)

    input_x = x.reshape((batch_size, 1, 28, 28))
    
    recg_layer = []
    cnn_output = []

    #1
    recg_layer.append(ConvMaxPool.ConvMaxPool(
            rng,
            image_shape=(batch_size, 1, 28, 28),
            filter_shape=(nkerns[0], 1, 5, 5),
            poolsize=(2, 2),
            border_mode='valid',
            activation=activation
        ))
    if drops[0]==1:
        cnn_output.append(recg_layer[-1].drop_output(input=input_x, drop=drop, rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(input=input_x))

    #2
    recg_layer.append(ConvMaxPool.ConvMaxPool(
        rng,
        image_shape=(batch_size, nkerns[0], 12, 12),
        filter_shape=(nkerns[1], nkerns[0], 3, 3),
        poolsize=(1, 1),
        border_mode='same', 
        activation=activation
    ))
    if drops[1]==1:
        cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(cnn_output[-1]))
    
    #3
    recg_layer.append(ConvMaxPool.ConvMaxPool(
        rng,
        image_shape=(batch_size, nkerns[1], 12, 12),
        filter_shape=(nkerns[2], nkerns[1], 3, 3),
        poolsize=(2, 2),
        border_mode='valid', 
        activation=activation
    ))
    if drops[2]==1:
        cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(cnn_output[-1]))

    #4
    recg_layer.append(ConvMaxPool.ConvMaxPool(
        rng,
        image_shape=(batch_size, nkerns[2], 5, 5),
        filter_shape=(nkerns[3], nkerns[2], 3, 3),
        poolsize=(1, 1),
        border_mode='same', 
        activation=activation
    ))
    if drops[3]==1:
        cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(cnn_output[-1]))
    #5
    recg_layer.append(ConvMaxPool.ConvMaxPool(
        rng,
        image_shape=(batch_size, nkerns[3], 5, 5),
        filter_shape=(nkerns[4], nkerns[3], 3, 3),
        poolsize=(1, 1),
        border_mode='same', 
        activation=activation
    ))
    if drops[4]==1:
        cnn_output.append(recg_layer[-1].drop_output(cnn_output[-1], drop=drop, rng=rng_share))
    else:
        cnn_output.append(recg_layer[-1].output(cnn_output[-1]))

    mlp_input = cnn_output[-1].flatten(2)

    recg_layer.append(FullyConnected.FullyConnected(
        rng=rng,
        n_in=nkerns[4] * 5 * 5,
        n_out=500,
        activation=activation
    ))

    feature = recg_layer[-1].drop_output(mlp_input, drop=drop, rng=rng_share)

    # classify the values of the fully-connected sigmoidal layer
    classifier = Pegasos.Pegasos(input=feature, rng=rng, n_in=500, n_out=10, weight_decay=0, loss=1)

    # the cost we minimize during training is the NLL of the model
    cost = classifier.hinge_loss(10, y, y_matrix) * batch_size
    weight_decay=1.0/n_train_batches

    # create a list of all model parameters to be fit by gradient descent
    params=[]
    for r in recg_layer:
        params+=r.params
    params += classifier.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)
    learning_rate = 3e-4
    l_r = theano.shared(np.asarray(learning_rate, dtype=np.float32))
    get_optimizer = optimizer.get_adam_optimizer_min(learning_rate=l_r, decay1 = 0.1, decay2 = 0.001, weight_decay=weight_decay)
    updates = get_optimizer(params,grads)

    '''
    Save parameters and activations
    '''

    parameters = theano.function(
        inputs=[],
        outputs=params,
    )

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size],
            #y_matrix: test_y_matrix[index * batch_size: (index + 1) * batch_size],
            drop: np.cast['int32'](0)
        }
    )

    test_pertub_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x_pertub[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size],
            #y_matrix: test_y_matrix[index * batch_size: (index + 1) * batch_size],
            drop: np.cast['int32'](0)
        }
    )


    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            #y_matrix: valid_y_matrix[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size],
            drop: np.cast['int32'](0)
        }
    )

    
    ##################
    # Pretrain MODEL #
    ##################

    model_epoch = 250
    if os.environ.has_key('model_epoch'):
        model_epoch = int(os.environ['model_epoch'])
    if predir is not None:
        color.printBlue('... setting parameters')
        color.printBlue(predir)
        if model_epoch == -1:
            pre_train = np.load(predir+'best-model.npz')
        else:
            pre_train = np.load(predir+'model-'+str(model_epoch)+'.npz')
        pre_train = pre_train['model']
        for (para, pre) in zip(params, pre_train):
            para.set_value(pre)
    else:
        exit()

    ###############
    # TRAIN MODEL #
    ###############
    valid_losses = [validate_model(i) for i in xrange(n_valid_batches)]
    valid_score = np.mean(valid_losses)

    test_losses = [test_model(i) for i in xrange(n_test_batches)]
    test_score = np.mean(test_losses)

    test_losses_pertub = [test_pertub_model(i) for i in xrange(n_test_batches)]
    test_score_pertub = np.mean(test_losses_pertub)

    print valid_score, test_score, test_score_pertub