示例#1
0
文件: cnnr.py 项目: Seleucia/CNNRNet
def train_model(params):
    rn_id=params["rn_id"]
    im_type=params["im_type"]
    nc =params["nc"]  # number of channcels
    size =params["size"]  # size = [480,640] orijinal size,[height,width]

    # Conv an Pooling parameters
    nkerns =params["nkerns"]
    kern_mat =params["kern_mat"]
    pool_mat =params["pool_mat"]

    # learning parameters
    batch_size =params["batch_size"]
    n_epochs =params["n_epochs"]
    initial_learning_rate =params["initial_learning_rate"]
    learning_rate_decay =params["learning_rate_decay"]
    squared_filter_length_limit =params["squared_filter_length_limit"]
    learning_rate = theano.shared(numpy.asarray(initial_learning_rate, dtype=theano.config.floatX))
    lambda_1 = params["lambda_1"]  # regulizer param
    lambda_2 = params["lambda_2"]

    #### the params for momentum
    mom_start =params["mom_start"]
    mom_end = params["mom_end"]
    # for epoch in [0, mom_epoch_interval], the momentum increases linearly
    # from mom_start to mom_end. After mom_epoch_interval, it stay at mom_end
    mom_epoch_interval =params["mom_epoch_interval"]

    # early-stopping parameters
    patience = params["patience"]  # look as this many examples regardless
    patience_increase = params["patience_increase"]  # wait this much longer when a new best is
    # found
    improvement_threshold = params["improvement_threshold"]  # a relative improvement of this much is


    #Loading dataset
    datasets = dataset_loader.load_tum_dataV2(params)
    X_train, y_train,overlaps_train = datasets[0]
    X_val, y_val,overlaps_val = datasets[1]
    X_test, y_test,overlaps_test = datasets[2]
    # compute number of minibatches for training, validation and testing
    n_train_batches = len(X_train)
    n_valid_batches = len(X_val)
    n_test_batches = len(X_test)
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    #Parameters to be passed net
    epoch = T.scalar()
    Fx = T.matrix(name='Fx_input')  # the data is presented as rasterized images
    Sx = T.matrix(name='Sx_input')  # the data is presented as rasterized images
    y = T.matrix('y')  # the output are presented as matrix 1*3.
    Fx_inp = T.matrix(name='Fx_inp')  # the data is presented as rasterized images
    Sx_inp = T.matrix(name='Sx_inp')  # the data is presented as rasterized images
    y_inp = T.matrix('y_inp')

    print '... building the model'

    rng = numpy.random.RandomState(23455)
    cnnr = CNNRNet(rng, input, batch_size, nc, size, nkerns,
                   kern_mat[0], kern_mat[1],
                   pool_mat[0],pool_mat[1],
                   Fx, Sx)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [Fx_inp, Sx_inp, y_inp],
        cnnr.errors(y),
        givens={
            Fx: Fx_inp,
            Sx: Sx_inp,
            y: y_inp,
        },allow_input_downcast=True

    )

    validate_model = theano.function(
        [Fx_inp, Sx_inp, y_inp],
        cnnr.errors(y),
        givens={
            Fx: Fx_inp,
            Sx: Sx_inp,
            y: y_inp,
        },allow_input_downcast=True
    )

    cost = cnnr.mse(y) + lambda_1 * cnnr.L1 + lambda_2 * cnnr.L2_sqr

    # Compute gradients of the model wrt parameters
    gparams = []
    for param in cnnr.params:
        # Use the right cost function here to train with or without dropout.
        gparam = T.grad(cost, param)
        gparams.append(gparam)

    # ... and allocate mmeory for momentum'd versions of the gradient
    gparams_mom = []
    for param in cnnr.params:
        gparam_mom = theano.shared(numpy.zeros(param.get_value(borrow=True).shape,
                                               dtype=theano.config.floatX))
        gparams_mom.append(gparam_mom)

    # Compute momentum for the current epoch
    mom = mom_start * (1.0 - epoch / mom_epoch_interval) + mom_end * (epoch / mom_epoch_interval) if T.lt(epoch,
                                                                                                          mom_epoch_interval) else mom_end

    # Update the step direction using momentum
    updates = OrderedDict()

    for gparam_mom, gparam in zip(gparams_mom, gparams):
        # Misha Denil's original version
        # updates[gparam_mom] = mom * gparam_mom + (1. - mom) * gparam

        # change the update rule to match Hinton's dropout paper
        updates[gparam_mom] = mom * gparam_mom - (1. - mom) * learning_rate * gparam

    # ... and take a step along that direction
    for param, gparam_mom in zip(cnnr.params, gparams_mom):
        # Misha Denil's original version
        # stepped_param = param - learning_rate * updates[gparam_mom]

        # since we have included learning_rate in gparam_mom, we don't need it
        # here
        stepped_param = param + updates[gparam_mom]

        # This is a silly hack to constrain the norms of the rows of the weight
        # matrices.  This just checks if there are two dimensions to the
        # parameter and constrains it if so... maybe this is a bit silly but it
        # should work for now.
        if param.get_value(borrow=True).ndim == 2:
            # squared_norms = T.sum(stepped_param**2, axis=1).reshape((stepped_param.shape[0],1))
            # scale = T.clip(T.sqrt(squared_filter_length_limit / squared_norms), 0., 1.)
            # updates[param] = stepped_param * scale

            # constrain the norms of the COLUMNs of the weight, according to
            # https://github.com/BVLC/caffe/issues/109
            col_norms = T.sqrt(T.sum(T.sqr(stepped_param), axis=0))
            desired_norms = T.clip(col_norms, 0, T.sqrt(squared_filter_length_limit))
            scale = desired_norms / (1e-7 + col_norms)
            updates[param] = stepped_param * scale
        else:
            updates[param] = stepped_param

    # Compile theano function for training.  This returns the training cost and
    # updates the model parameters.
    output = cost

    train_model = theano.function(
        [Fx_inp, Sx_inp, y_inp, epoch],
        outputs=output,
        updates=updates,
        givens={
            Fx: Fx_inp,
            Sx: Sx_inp,
            y: y_inp,
        },allow_input_downcast=True

    )
    # create a function to compute the mistakes that are made by the model
    predict_model = theano.function(
        [Fx_inp, Sx_inp],
        cnnr.y_pred,
        givens={
            Fx: Fx_inp,
            Sx: Sx_inp,
        },allow_input_downcast=True
    )
    decay_learning_rate = theano.function(inputs=[], outputs=learning_rate,
                                          updates={learning_rate: learning_rate * learning_rate_decay})
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False
    epoch_counter = 0

    while (epoch_counter < n_epochs) and (not done_looping):
        epoch_counter = epoch_counter + 1
        for minibatch_index in xrange(n_train_batches):
            iter = (epoch_counter - 1) * n_train_batches + minibatch_index
            if iter % 100 == 0:
                print 'training @ iter = ', iter

            Fx = X_train[minibatch_index * batch_size: (minibatch_index + 1) * batch_size]
            data_Fx = dataset_loader.load_batch_imagesV2(size, nc, "F", Fx,im_type)
            data_Sx = dataset_loader.load_batch_imagesV2(size, nc, "S", Fx,im_type)
            data_y = y_train[minibatch_index * batch_size: (minibatch_index + 1) * batch_size]
            cost_ij = train_model(data_Fx, data_Sx, data_y, epoch)
            # model_saver.save_model(epoch % 3, params)
            print('epoch %i, minibatch %i/%i, training cost %f ' %
                  (epoch_counter, minibatch_index + 1, n_train_batches,
                   cost_ij))

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = 0
                for i in xrange(n_valid_batches):
                    Fx = X_val[i * batch_size: (i + 1) * batch_size]
                    data_Fx = dataset_loader.load_batch_imagesV2(size, nc, "F", Fx,im_type)
                    data_Sx = dataset_loader.load_batch_imagesV2(size, nc, "S", Fx,im_type)
                    data_y = y_val[i * batch_size: (i + 1) * batch_size]
                    validation_losses = validation_losses + validate_model(data_Fx, data_Sx, data_y)

                this_validation_loss = validation_losses / n_valid_batches
                new_learning_rate = decay_learning_rate()

                print('epoch %i, minibatch %i/%i, learning_rate %f validation error %f %%' %
                      (epoch_counter, minibatch_index + 1, n_train_batches,
                       learning_rate.get_value(borrow=True),
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    # improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss * \
                            improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    test_losses = 0
                    for i in xrange(n_test_batches):
                        Fx = X_test[i * batch_size: (i + 1) * batch_size]
                        data_Fx = dataset_loader.load_batch_imagesV2(size, nc, "F", Fx,im_type)
                        data_Sx = dataset_loader.load_batch_imagesV2(size, nc, "S", Fx,im_type)
                        data_y = y_test[i * batch_size: (i + 1) * batch_size]
                        err= test_model(data_Fx, data_Sx, data_y)
                        test_losses = test_losses + err
                        if(i%100==-1):
                            store=[]
                            ypred= predict_model(data_Fx, data_Sx)
                            print(ypred)
                            print(Fx)
                            store.append(Fx)
                            store.append(ypred)
                            store.append(data_y)
                            model_saver.save_garb(store)
                            print("Iteration saved %i, err %f"%(i,err))

                    test_score = test_losses / n_test_batches
                    ext="models/"+str(rn_id)+"_"+str(epoch_counter % 3)+"_model_numpy"
                    cnnr.save(ext)
                    #model_saver.save_model(ext, cnnr.params)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch_counter, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
示例#2
0
def train_model():
    rng = numpy.random.RandomState(23455)
    #size = [480,640] orijinal size
    size = [120,160]

    nkerns = [20, 50]
    nkern1_size = [5, 5]
    nkern2_size = [5, 5]

    npool1_size = [2, 2]
    npool2_size = [2, 2]

    batch_size = 30
    fl_size = size[0] * size[1]
    multi = 100
    learning_rate = 0.001
    n_epochs = 400

    datasets = dataset_loader.load_tum_dataV2(size, multi)

    X_train, y_train = datasets[0]
    X_val, y_val = datasets[1]
    X_test, y_test = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = len(X_train)
    n_valid_batches = len(X_val)
    n_test_batches = len(X_test)

    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    x = T.tensor3(name='input')  # the data is presented as rasterized images
    y = T.matrix('y')  # the output are presented as matrix 1*3.

    x_inp = T.tensor3(name='x_inp')  # the data is presented as rasterized images
    y_inp = T.matrix('y_inp')

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # number of channels
    layer0_input = x.reshape((batch_size, 2, size[0], size[1]))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (640-5+1 , 480-5+1) = (636, 476)
    # maxpooling reduces this further to (636/2, 476/2) = (318, 238)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], size[0], size[1])
    layer0 = ConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=(batch_size, 2, size[0], size[1]),
        filter_shape=(nkerns[0], 2, nkern1_size[0], nkern1_size[1]),
        poolsize=(npool1_size[0], npool1_size[1])
    )
    l0out = ((size[0] - nkern1_size[0] + 1) / npool1_size[0], (size[1] - nkern1_size[0] + 1) / npool1_size[1])

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (318-5+1, 238-5+1) = (314, 234)
    # maxpooling reduces this further to (314/2, 234/2) = (157, 117)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 157, 117)

    layer1 = ConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, nkerns[0]) + l0out,
        filter_shape=(nkerns[1], nkerns[0], nkern2_size[0], nkern2_size[1]),
        poolsize=(npool2_size[0], npool2_size[1])
    )
    l2out = ((l0out[0] - nkern2_size[0] + 1) / npool2_size[0], (l0out[1] - nkern2_size[0] + 1) / npool2_size[0])

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 5 * 3),
    # or (500, 50 * 5 * 3) = (500, 800) with the default values.
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(
        rng,
        input=layer2_input,
        n_in=nkerns[1] * l2out[0] * l2out[1],
        n_out=500
    )

    # classify the values of the fully-connected sigmoidal layer
    layer3 = OutputLayer(input=layer2.output, n_in=500, n_out=3)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [x_inp, y_inp],
        layer3.errors(y),
        givens={
            x: x_inp,
            y: y_inp,
        }

    )

    validate_model = theano.function(
        [x_inp, y_inp],
        layer3.errors(y),
        givens={
            x: x_inp,
            y: y_inp,
        }
    )


    #Creat cost
    L1 = (abs(layer3.W).sum()) + (abs(layer2.W).sum()) + (abs(layer1.W).sum()) + (abs(layer0.W).sum())
    L2_sqr = ((layer3.W ** 2).sum()) + ((layer1.W ** 2).sum()) + ((layer1.W ** 2).sum()) + ((layer0.W ** 2).sum())
    lambda_1 = 0.1
    lambda_2 = 0.1
    cost = layer3.mse(y) + lambda_1 * L1 + lambda_2 * L2_sqr

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
        ]

    train_model = theano.function(
        [x_inp, y_inp],
        cost,
        updates=updates,
        givens={
            x: x_inp,
            y: y_inp,
        }
    )
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print 'training @ iter = ', iter

            x = X_train[minibatch_index * batch_size: (minibatch_index + 1) * batch_size]
            data_x = dataset_loader.load_batch_imagesV2(size, x)
            data_y = y_train[minibatch_index * batch_size: (minibatch_index + 1) * batch_size]
            cost_ij = train_model(data_x, data_y)
            #model_saver.save_model(epoch % 3, params)
            print('epoch %i, minibatch %i/%i, training cost %f ' %
                  (epoch, minibatch_index + 1, n_train_batches,
                   cost_ij))

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = 0
                for i in xrange(n_valid_batches):
                    x = X_val[i * batch_size: (i + 1) * batch_size]
                    data_x = dataset_loader.load_batch_imagesV2(size, x)
                    data_y = y_val[i * batch_size: (i + 1) * batch_size]
                    validation_losses = validation_losses + validate_model(data_x, data_y)

                this_validation_loss = validation_losses / n_valid_batches

                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    # improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss * \
                            improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    test_losses = 0
                    for i in xrange(n_test_batches):
                        x = X_test[i * batch_size: (i + 1) * batch_size]
                        data_x = dataset_loader.load_batch_imagesV2(size,  x)
                        data_y = y_test[i * batch_size: (i + 1) * batch_size]
                        test_losses = test_losses + validate_model(data_x, data_y)

                    test_score = test_losses / n_valid_batches
                    model_saver.save_model(epoch % 3, params)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))