示例#1
0
def evaluate_lenet5(learning_rate=0.005,
                    n_epochs=5,
                    data=None,
                    nkerns=64,
                    batch_size=30):

    #for i in range(len(x_val)):
    #if len(x_val[i]) == 490 and len(x_val[i][0]) == 640:
    #x1.append(x_val[i])
    #y1.append(y_val[i]-1)
    #if len(x1) == 80:
    #break

    from data_loader import load_data
    train, validate, test = load_data()
    x_train = np.array(train[0], 'float32')
    y_train = train[1]
    x_valid = np.array(validate[0], 'float32')
    y_valid = validate[1]
    x_test = np.array(test[0], 'float32')
    y_test = test[1]
    x_train2 = theano.shared(numpy.asarray(x_train,
                                           dtype=theano.config.floatX))
    y_train_2 = theano.shared(
        numpy.asarray(y_train, dtype=theano.config.floatX))
    x_valid2 = theano.shared(numpy.asarray(x_valid,
                                           dtype=theano.config.floatX))
    y_valid_2 = theano.shared(
        numpy.asarray(y_valid, dtype=theano.config.floatX))
    x_test2 = theano.shared(numpy.asarray(x_test, dtype=theano.config.floatX))
    y_test_2 = theano.shared(numpy.asarray(y_test, dtype=theano.config.floatX))

    y_train2 = T.cast(y_train_2, 'int32')
    y_test2 = T.cast(y_test_2, 'int32')
    y_valid2 = T.cast(y_valid_2, 'int32')

    print len(x_train)
    print len(y_train)

    rng = numpy.random.RandomState(23455)

    n_train_batches = len(y_train) / batch_size
    n_valid_batches = len(y_valid) / batch_size
    n_test_batches = len(y_test) / batch_size
    index = T.lscalar()  # index to a [mini]batch

    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are p

    layer0_input = x.reshape((batch_size, 1, 64, 64))
    '''构建第一层网络:
    image_shape:输入大小为490*640的特征图,batch_size个训练数据,每个训练数据有1个特征图
    filter_shape:卷积核个数为nkernes=64,因此本层每个训练样本即将生成64个特征图
    经过卷积操作,图片大小变为(490-7+1 , 640-7+1) = (484, 634)
    经过池化操作,图片大小变为 (484/2, 634/2) = (242, 317)
    最后生成的本层image_shape为(batch_size, nklearn, 242, 317)'''

    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=(batch_size, 1, 64, 64),
                                filter_shape=(nkerns, 1, 7, 7),
                                poolsize=(2, 2))

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns * 7 * 7),
    # (100, 64*7*7) with the default values.
    layer2_input = layer0.output.flatten(2)
    '''全链接:输入layer2_input是一个二维的矩阵,第一维表示样本,第二维表示上面经过卷积下采样后
    每个样本所得到的神经元,也就是每个样本的特征,HiddenLayer类是一个单层网络结构
    下面的layer2把神经元个数由800个压缩映射为500个'''
    layer2 = HiddenLayer(rng,
                         input=layer2_input,
                         n_in=nkerns * 29 * 29,
                         n_out=500,
                         activation=T.tanh)

    layer2.output = dropout_layer(layer2.output, 0.5)

    # 最后一层:逻辑回归层分类判别,把500个神经元,压缩映射成10个神经元,分别对应于手写字体的0~9
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=8)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            y: y_test2[index * batch_size:(index + 1) * batch_size],
            x: x_test2[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: x_valid2[index * batch_size:(index + 1) * batch_size],
            y: y_valid2[index * batch_size:(index + 1) * batch_size]
        })

    #把所有的参数放在同一个列表里,可直接使用列表相加
    params = layer3.params + layer2.params + layer0.params

    #梯度求导
    grads = T.grad(cost, params)

    updates = [(param_i, param_i - learning_rate * grad_i)
               for param_i, grad_i in zip(params, grads)]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: x_train2[index * batch_size:(index + 1) * batch_size],
            y: y_train2[index * batch_size:(index + 1) * batch_size]
        })

    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.2  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        #while epoch < n_epochs:
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):  #每一批训练数据

            cost_ij = train_model(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index
            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i) for i in xrange(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    with open('param0.pkl', 'wb') as f0:
        pickle.dump(layer0.params, f0)
    f0.close()
    with open('param2.pkl', 'wb') as f2:
        pickle.dump(layer2.params, f2)
    f2.close()
    with open('param3.pkl', 'wb') as f3:
        pickle.dump(layer3.params, f3)
    f3.close()

    end_time = timeit.default_timer()
    print('Optimization complete.')
    print(
        'Best validation score of %f %% obtained at iteration %i, '
        'with test performance %f %%' %
        (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
示例#2
0
class ConvolutionalNeuralNetwork(Classifier):
    def __init__(self, rng, batch_size, nkerns=(20, 50)):
        self.batch_size = batch_size
        # 28x28 -> (24x24) // 2 = 12x12
        self.layer0 = LeNetConvPoolLayer(
            rng=rng,
            image_shape=(batch_size, 1, 28, 28),
            filter_shape=(nkerns[0], 1, 5, 5),
        )
        # 12x12 -> (8x8) // 2 = 4x4
        self.layer1 = LeNetConvPoolLayer(rng=rng,
                                         image_shape=(batch_size, nkerns[0],
                                                      12, 12),
                                         filter_shape=(nkerns[1], nkerns[0], 5,
                                                       5))
        # TODO: make this an MLP rather than a hidden layer -> LogReg
        # self.layer2 = MLP()
        self.layer2 = HiddenLayer(
            rng=rng,
            n_in=nkerns[1] * 4 * 4,
            n_out=500,
            activation=T.tanh,
        )
        self.layer3 = LogisticRegression(
            n_in=500,
            n_out=10,
        )

    def pre_logreg_output(self, x):
        layer0_input = x.reshape((self.batch_size, 1, 28, 28))
        l0_output = self.layer0.output(layer0_input)
        l1_output = self.layer1.output(l0_output)

        l2_input = l1_output.flatten(2)
        l2_output = self.layer2.output(l2_input)
        return l2_output

    def negative_log_likelihood(self, x, y):
        output = self.pre_logreg_output(x)
        return self.layer3.negative_log_likelihood(output, y)

    def pred_label(self, x):
        output = self.pre_logreg_output(x)
        output = output.flatten(1)
        return self.layer3.pred_label(output)

    def errors(self, x, y):
        output = self.pre_logreg_output(x)
        return self.layer3.errors(output, y)

    def train(self,
              train_x,
              train_y,
              test_x,
              test_y,
              valid_x,
              valid_y,
              alpha=0.13,
              batch_size=500,
              l1_reg=0.,
              l2_reg=0.0,
              n_epochs=1000):
        x = T.matrix('x')
        y = T.ivector('y')
        batch_size = self.batch_size

        layer0_input = x.reshape((batch_size, 1, 28, 28))
        cost = self.negative_log_likelihood(layer0_input, y)

        params = self.layer0.params + self.layer1.params + self.layer2.params + self.layer3.params
        grads = T.grad(cost, params)
        updates = [(param, param - alpha * grad)
                   for param, grad in zip(params, grads)]

        index = T.lscalar()
        train_func = theano.function(
            inputs=[index],
            outputs=cost,
            updates=updates,
            givens={
                x: train_x[index * batch_size:(index + 1) * batch_size],
                y: train_y[index * batch_size:(index + 1) * batch_size],
            })
        best_loss = self.run_batches(train_x,
                                     train_y,
                                     test_x,
                                     test_y,
                                     valid_x,
                                     valid_y,
                                     x,
                                     y,
                                     train_model_func=train_func,
                                     batch_size=batch_size,
                                     n_epochs=n_epochs)
        return best_loss
示例#3
0
def evaluate_lenet5(learning_rate=0.005, n_epochs=5,data = None,nkerns= 64, batch_size=30):


    #for i in range(len(x_val)):
        #if len(x_val[i]) == 490 and len(x_val[i][0]) == 640:
            #x1.append(x_val[i])
            #y1.append(y_val[i]-1)
            #if len(x1) == 80:
                #break

    from data_loader import load_data
    train, validate, test = load_data()
    x_train = np.array(train[0],'float32')
    y_train = train[1]
    x_valid = np.array(validate[0],'float32')
    y_valid = validate[1]
    x_test = np.array(test[0],'float32')
    y_test = test[1]
    x_train2 = theano.shared(numpy.asarray(x_train,dtype=theano.config.floatX))
    y_train_2 = theano.shared(numpy.asarray(y_train,dtype=theano.config.floatX))
    x_valid2 = theano.shared(numpy.asarray(x_valid,dtype=theano.config.floatX))
    y_valid_2 = theano.shared(numpy.asarray(y_valid,dtype=theano.config.floatX))
    x_test2 = theano.shared(numpy.asarray(x_test,dtype=theano.config.floatX))
    y_test_2 = theano.shared(numpy.asarray(y_test,dtype=theano.config.floatX))

    y_train2 = T.cast(y_train_2, 'int32')
    y_test2 = T.cast(y_test_2, 'int32')
    y_valid2 = T.cast(y_valid_2, 'int32')

    print len(x_train)
    print len(y_train)

    rng = numpy.random.RandomState(23455)

    n_train_batches = len(y_train)/batch_size
    n_valid_batches = len(y_valid)/batch_size
    n_test_batches = len(y_test)/batch_size
    index = T.lscalar()  # index to a [mini]batch

    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are p

    layer0_input = x.reshape((batch_size, 1, 64, 64))

    '''构建第一层网络:
    image_shape:输入大小为490*640的特征图,batch_size个训练数据,每个训练数据有1个特征图
    filter_shape:卷积核个数为nkernes=64,因此本层每个训练样本即将生成64个特征图
    经过卷积操作,图片大小变为(490-7+1 , 640-7+1) = (484, 634)
    经过池化操作,图片大小变为 (484/2, 634/2) = (242, 317)
    最后生成的本层image_shape为(batch_size, nklearn, 242, 317)'''

    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=(batch_size, 1, 64, 64),
        filter_shape=(nkerns, 1, 7, 7),
        poolsize=(2, 2)
    )

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns * 7 * 7),
    # (100, 64*7*7) with the default values.
    layer2_input = layer0.output.flatten(2)

    '''全链接:输入layer2_input是一个二维的矩阵,第一维表示样本,第二维表示上面经过卷积下采样后
    每个样本所得到的神经元,也就是每个样本的特征,HiddenLayer类是一个单层网络结构
    下面的layer2把神经元个数由800个压缩映射为500个'''
    layer2 = HiddenLayer(
        rng,
        input=layer2_input,
        n_in=nkerns * 29 * 29,
        n_out=500,
        activation=T.tanh
    )

    layer2.output = dropout_layer(layer2.output,0.5)

    # 最后一层:逻辑回归层分类判别,把500个神经元,压缩映射成10个神经元,分别对应于手写字体的0~9
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=8)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            y: y_test2[index * batch_size: (index + 1) * batch_size],
            x: x_test2[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: x_valid2[index * batch_size: (index + 1) * batch_size],
            y: y_valid2[index * batch_size: (index + 1) * batch_size]
        }
    )

    #把所有的参数放在同一个列表里,可直接使用列表相加
    params = layer3.params + layer2.params  + layer0.params

    #梯度求导
    grads = T.grad(cost, params)

    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: x_train2[index * batch_size: (index + 1) * batch_size],
            y: y_train2[index * batch_size: (index + 1) * batch_size]
        }
    )

    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.2  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
    #while epoch < n_epochs:
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):#每一批训练数据

            cost_ij = train_model(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index
            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i)
                        for i in xrange(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    with open('param0.pkl', 'wb') as f0:
                        pickle.dump(layer0.params, f0)
    f0.close()
    with open('param2.pkl', 'wb') as f2:
                        pickle.dump(layer2.params, f2)
    f2.close()
    with open('param3.pkl', 'wb') as f3:
                        pickle.dump(layer3.params, f3)
    f3.close()

    end_time = timeit.default_timer()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))