def fine_tuning(learning_rate = 0.1, n_epochs = 1000, nkerns = 100, batch_size = 260, logistic_params_path = None, CNN_inputFilters_path = None, CNN_inputBias_path = None): """ Demonstrates lenet on MNIST dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: int :param nkerns: number of convolution layer filters (kernels) :type batch_size: int :param batch_size: size of batch in which the data are passed to the model """ ###################### # INITIALIZATIONS # ###################### # load Auto-encoder pre-trained bias if CNN_inputBias_path is None: b_CNN_input = None else: b_temp = numpy.load(CNN_inputBias_path) b_CNN_input = theano.shared( value=b_temp.astype(fx), # b is 100 x 1, is ok name='b_CNN_input', borrow = True ) # load Auto-encoder pre-trained filter weights if CNN_inputFilters_path is None: W_CNN_input = None else: W = numpy.load(CNN_inputFilters_path) W_4D_tensor = numpy.reshape(W, (100,1,11,11)) W_CNN_input = theano.shared( value=W_4D_tensor.astype(fx), # W is 100 x 11 x 11 should convert to 100 x 1 x 11 x 11 name='W_CNN_input', borrow = True ) # load logistic layer pre-training parameters if logistic_params_path is None: W_logistic = None b_logistic = None else: with open(logistic_params_path) as f: params = pickle.load(f) W_logistic, b_logistic = params[0] rng = numpy.random.RandomState(23455) # load data set datasets = load_data() train_set_x, train_set_y = datasets[0] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size # 13 # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x', dtype=fx) # the data is presented as rasterized images y = T.matrix('y', dtype=fx) # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### #print('... building the model') # Reshape matrix of images of shape (batch_size, 64 * 64) # to a 4D tensor of shape (batch_size, 1, 64, 64) layer0_input = x.reshape((batch_size, 1, 64, 64)) # Construct convolutional & pooling layer: # filtering reduces the image size to (64-11+1 , 64-11+1) = (54, 54) # maxpooling reduces this further to (54/6, 54/6) = (9, 9) # 4D output tensor is thus of shape (batch_size, 100, 9, 9) layer0 = LeNetConvPoolLayer( rng = rng, input = layer0_input, filter_shape = (nkerns, 1, 11, 11), image_shape = (batch_size, 1, 64, 64), # batch_size x 100 x 11 x 11 poolsize = (6, 6), W = W_CNN_input, b = b_CNN_input ) # flatten out the input of the logistic layer layer0_output = layer0.output.flatten(2) # batch_size x 8,100 # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression( input = layer0_output, n_in = 8100, n_out = 1024, W = W_logistic, b = b_logistic ) layer3_output = layer3.output # batch_size x 1024 tensor # compute cost #cost = 0.5 * T.mean((layer3_output - y) ** 2) # regularization parameter l = 0.0001 # calculate norms for cost l2_squared = (layer0.W ** 2).sum() + (layer3.W ** 2).sum() cost = 0.5 * T.mean((layer3_output - y) ** 2) + 0.5 * l * l2_squared # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # updates. loop over all parameters and gradients updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] # theano function to evaluate model train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... fine tuning') epoch = 0 #epsilon = 0.0000005 #last_loss = 0 logging.debug('%-10s%-10s%-10s' %('Epoch','Batch','Cost')) while (epoch < n_epochs): epoch += 1 for minibatch_index in xrange(n_train_batches): cost_ij = train_model(minibatch_index) #print '\nepoch = %s' % epoch #print 'batch = %s' % minibatch_index print 'epoch = %s batch = %s cost = %s' % (epoch,minibatch_index,cost_ij) logging.debug('%-10s %-10s %-10s' % (epoch, minibatch_index, cost_ij)) #if cost_ij - last_loss <= epsilon: #print 'converged: %.2f' % (cost_ij - last_loss) # logging.debug('Converged %s'%(cost_ij - last_loss)) # return #last_loss = cost_ij print('Optimization complete.') with open('../data/fine_tune_paramsXnew.pickle', 'w') as f: pickle.dump([params], f)
def pre_training(learning_rate = 0.1, n_epochs = 1000, nkerns = 100, batch_size = 260, CNN_inputFilters_path = None, CNN_inputBias_path = None): ###################### # INITIALIZATIONS # ###################### # load Auto-encoder pre-trained bias if CNN_inputBias_path is None: b_CNN_input = None else: b_temp = numpy.load(CNN_inputBias_path) b_CNN_input = theano.shared( value=b_temp.astype(fx), # b is 100 x 1, is ok name='b_CNN_input', borrow = True ) # load Auto-encoder pre-trained filter weights if CNN_inputFilters_path is None: W_CNN_input = None else: W = numpy.load(CNN_inputFilters_path) W_4D_tensor = numpy.reshape(W, (100,1,11,11)) W_CNN_input = theano.shared( value=W_4D_tensor.astype(fx), # W is 100 x 11 x 11 should convert to 100 x 1 x 11 x 11 name='W_CNN_input', borrow = True ) # initialize random generator rng = numpy.random.RandomState(23455) # load data set datasets = load_data() train_set_x, train_set_y = datasets[0] # compute number of mini-batches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x', dtype=fx) # the data is presented as rasterized images y = T.matrix('y', dtype=fx) # the labels are presented as 2D mask vector ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Convolution + Pooling Layer layer0_input = x.reshape((batch_size, 1, 64, 64)) layer0 = LeNetConvPoolLayer( rng = rng, input=layer0_input, filter_shape=(nkerns, 1, 11, 11), image_shape=(batch_size, 1, 64, 64), poolsize=(6, 6), W = W_CNN_input, b = b_CNN_input ) layer0_output = layer0.output.flatten(2) # batch_size x 8,100 # Logistic Regression Layer layer3 = LogisticRegression(input = layer0_output, n_in = 8100, n_out = 1024) layer3_output = layer3.output # batch_size x 1024 tensor # cost for training #cost = T.mean((layer3_output - y) ** 2) # regularization parameter l = 0.0001 l2_squared = (layer3.W ** 2).sum() cost = 0.5 * T.mean((layer3_output - y) ** 2) + 0.5 * l * l2_squared # parameters to be updated params = layer3.params # create a list of gradients for all model parameters grads = T.grad(cost, params) updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') epoch = 0 while (epoch < n_epochs): epoch += 1 for minibatch_index in xrange(n_train_batches): cost_ij = train_model(minibatch_index) print '\nepoch = %s' % epoch print 'batch = %s' % minibatch_index print 'cost = %s' % cost_ij print('Optimization complete.') with open('../data/logistic_paramsXnew.pickle', 'w') as f: pickle.dump([params], f)