示例#1
0
文件: LeNet.py 项目: odysszis/AML
def fine_tuning(learning_rate = 0.1, n_epochs = 1000, nkerns = 100, batch_size = 260,
                logistic_params_path = None, CNN_inputFilters_path = None, CNN_inputBias_path = None):
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type nkerns: int
    :param nkerns: number of convolution layer filters (kernels)

    :type batch_size: int
    :param batch_size: size of batch in which the data are passed to the model
    """

    ######################
    #   INITIALIZATIONS  #
    ######################

    # load Auto-encoder pre-trained bias
    if CNN_inputBias_path is None:
        b_CNN_input = None
    else:
        b_temp = numpy.load(CNN_inputBias_path)
        b_CNN_input = theano.shared(
            value=b_temp.astype(fx),       # b is 100 x 1, is ok
            name='b_CNN_input',
            borrow = True
        )

    # load Auto-encoder pre-trained filter weights
    if CNN_inputFilters_path is None:
        W_CNN_input = None
    else:
        W = numpy.load(CNN_inputFilters_path)
        W_4D_tensor = numpy.reshape(W, (100,1,11,11))
        W_CNN_input = theano.shared(
            value=W_4D_tensor.astype(fx),    # W is 100 x 11 x 11 should convert to 100 x 1 x 11 x 11
            name='W_CNN_input',
            borrow = True
        )

    # load logistic layer pre-training parameters
    if logistic_params_path is None:
        W_logistic = None
        b_logistic = None
    else:
        with open(logistic_params_path) as f:
            params = pickle.load(f)
        W_logistic, b_logistic = params[0]

    rng = numpy.random.RandomState(23455)

    # load data set
    datasets = load_data()
    train_set_x, train_set_y = datasets[0]
    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size                                           # 13

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x', dtype=fx)   # the data is presented as rasterized images
    y = T.matrix('y', dtype=fx)  # the labels are presented as 1D vector of
                        # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    #print('... building the model')

    # Reshape matrix of images of shape (batch_size, 64 * 64)
    # to a 4D tensor of shape (batch_size, 1, 64, 64)
    layer0_input = x.reshape((batch_size, 1, 64, 64))

    # Construct convolutional & pooling layer:
    # filtering reduces the image size to (64-11+1 , 64-11+1) = (54, 54)
    # maxpooling reduces this further to (54/6, 54/6) = (9, 9)
    # 4D output tensor is thus of shape (batch_size, 100, 9, 9)
    layer0 = LeNetConvPoolLayer(
        rng = rng,
        input = layer0_input,
        filter_shape = (nkerns, 1, 11, 11),
        image_shape = (batch_size, 1, 64, 64),                # batch_size x 100 x 11 x 11
        poolsize = (6, 6),
        W = W_CNN_input,
        b = b_CNN_input
    )

    # flatten out the input of the logistic layer
    layer0_output = layer0.output.flatten(2)                # batch_size x 8,100

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(
        input = layer0_output,
        n_in = 8100,
        n_out = 1024,
        W = W_logistic,
        b = b_logistic
    )

    layer3_output = layer3.output                           # batch_size x 1024 tensor

    # compute cost
    #cost = 0.5 * T.mean((layer3_output - y) ** 2)
    # regularization parameter
    l = 0.0001
    # calculate norms for cost
    l2_squared = (layer0.W ** 2).sum() + (layer3.W ** 2).sum()
    cost = 0.5 * T.mean((layer3_output - y) ** 2) + 0.5 * l * l2_squared

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # updates. loop over all parameters and gradients
    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]

    # theano function to evaluate model
    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    ###############
    # TRAIN MODEL #
    ###############
    print('... fine tuning')

    epoch = 0
    #epsilon = 0.0000005
    #last_loss = 0

    logging.debug('%-10s%-10s%-10s' %('Epoch','Batch','Cost'))
    while (epoch < n_epochs):
        epoch += 1
        for minibatch_index in xrange(n_train_batches):

            cost_ij = train_model(minibatch_index)
            #print '\nepoch = %s' % epoch
            #print 'batch = %s' % minibatch_index
            print 'epoch = %s batch = %s cost = %s' % (epoch,minibatch_index,cost_ij)
            logging.debug('%-10s %-10s %-10s' % (epoch, minibatch_index, cost_ij))
            #if cost_ij - last_loss <= epsilon:
                #print 'converged: %.2f' % (cost_ij - last_loss)
            #    logging.debug('Converged %s'%(cost_ij - last_loss))
            #    return

            #last_loss = cost_ij

    print('Optimization complete.')

    with open('../data/fine_tune_paramsXnew.pickle', 'w') as f:
        pickle.dump([params], f)
def pre_training(learning_rate = 0.1, n_epochs = 1000, nkerns = 100, batch_size = 260, CNN_inputFilters_path = None, CNN_inputBias_path = None):

    ######################
    #   INITIALIZATIONS  #
    ######################

    # load Auto-encoder pre-trained bias
    if CNN_inputBias_path is None:
        b_CNN_input = None
    else:
        b_temp = numpy.load(CNN_inputBias_path)
        b_CNN_input = theano.shared(
            value=b_temp.astype(fx),       # b is 100 x 1, is ok
            name='b_CNN_input',
            borrow = True
        )

    # load Auto-encoder pre-trained filter weights
    if CNN_inputFilters_path is None:
        W_CNN_input = None
    else:
        W = numpy.load(CNN_inputFilters_path)
        W_4D_tensor = numpy.reshape(W, (100,1,11,11))
        W_CNN_input = theano.shared(
            value=W_4D_tensor.astype(fx),    # W is 100 x 11 x 11 should convert to 100 x 1 x 11 x 11
            name='W_CNN_input',
            borrow = True
        )

    # initialize random generator
    rng = numpy.random.RandomState(23455)

    # load data set
    datasets = load_data()
    train_set_x, train_set_y = datasets[0]
    # compute number of mini-batches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x', dtype=fx)   # the data is presented as rasterized images
    y = T.matrix('y', dtype=fx)  # the labels are presented as 2D mask vector

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # Convolution + Pooling Layer

    layer0_input = x.reshape((batch_size, 1, 64, 64))
    layer0 = LeNetConvPoolLayer(
        rng = rng,
        input=layer0_input,
        filter_shape=(nkerns, 1, 11, 11),
        image_shape=(batch_size, 1, 64, 64),
        poolsize=(6, 6),
        W = W_CNN_input,
        b = b_CNN_input
    )
    layer0_output = layer0.output.flatten(2)                # batch_size x 8,100

    # Logistic Regression Layer
    layer3 = LogisticRegression(input = layer0_output, n_in = 8100, n_out = 1024)
    layer3_output = layer3.output                           # batch_size x 1024 tensor

    # cost for training
    #cost = T.mean((layer3_output - y) ** 2)
    # regularization parameter
    l = 0.0001
    l2_squared = (layer3.W ** 2).sum()
    cost = 0.5 * T.mean((layer3_output - y) ** 2) + 0.5 * l * l2_squared

    # parameters to be updated
    params = layer3.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    epoch = 0

    while (epoch < n_epochs):
        epoch += 1
        for minibatch_index in xrange(n_train_batches):

            cost_ij = train_model(minibatch_index)
            print '\nepoch = %s' % epoch
            print 'batch = %s' % minibatch_index
            print 'cost = %s' % cost_ij


    print('Optimization complete.')

    with open('../data/logistic_paramsXnew.pickle', 'w') as f:
        pickle.dump([params], f)