示例#1
0
def classify_lenet5(batch_size=500, output_size=20):
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)


    # start-snippet-1
    x = T.matrix('x')  # the data is presented as rasterized images
    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.
    layer0_input = x.reshape((batch_size, 1, 37, 23))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=(batch_size, 1, 37, 23),
        filter_shape=(20, 1, 4, 2),
        poolsize=(2, 2),
    )

    # layer1 = LeNetConvPoolLayer(
    #     rng,
    #     input=layer0.output,
    #     image_shape=(batch_size, 20, 17, 11),
    #     filter_shape=(50, 20, 4, 2),
    #     poolsize=(2, 2),
    # )
    #
    # layer4 = LeNetConvPoolLayer(
    #     rng,
    #     input=layer1.output,
    #     image_shape=(batch_size, 50, 7, 5),
    #     filter_shape=(100, 50, 4, 2),
    #     poolsize=(2, 2),
    # )

    layer2_input = layer0.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(
        rng,
        input=layer2_input,
        n_in=3740,
        n_out=output_size,
        activation=T.tanh,
        use_bias=True
    )

    # layer5 = HiddenLayer(
    #     rng,
    #     input=layer2.output,
    #     n_in=200,
    #     n_out=output_size,
    #     activation=T.tanh,
    #     use_bias=True
    # )

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=output_size, n_out=2)

    model_params = pickle.load(open('../model/cnn_dist_'+str(output_size)+'.pkl'))
    #
    layer0.W = theano.shared(
        value=numpy.array(
            model_params[2].get_value(True),
            dtype=theano.config.floatX
        ),
        name='W',
        borrow=True
    )

    layer0.b = theano.shared(
        value=numpy.array(
            model_params[3].get_value(True),
            dtype=theano.config.floatX
        ),
        name='b',
        borrow=True
    )

    # layer1.W = theano.shared(
    #     value=numpy.array(
    #         model_params[-4].get_value(True),
    #         dtype=theano.config.floatX
    #     ),
    #     name='W',
    #     borrow=True
    # )
    #
    # layer1.b = theano.shared(
    #     value=numpy.array(
    #         model_params[-3].get_value(True),
    #         dtype=theano.config.floatX
    #     ),
    #     name='b',
    #     borrow=True
    # )
    #
    # layer4.W = theano.shared(
    #     value=numpy.array(
    #         model_params[-6].get_value(True),
    #         dtype=theano.config.floatX
    #     ),
    #     name='W',
    #     borrow=True
    # )
    #
    # layer4.b = theano.shared(
    #     value=numpy.array(
    #         model_params[-5].get_value(True),
    #         dtype=theano.config.floatX
    #     ),
    #     name='b',
    #     borrow=True
    # )

    layer2.W = theano.shared(
        value=numpy.array(
            model_params[0].get_value(True),
            dtype=theano.config.floatX
        ),
        name='W',
        borrow=True
    )

    layer2.b = theano.shared(
        value=numpy.array(
            model_params[1].get_value(True),
            dtype=theano.config.floatX
        ),
        name='b',
        borrow=True
    )

    # layer5.W = theano.shared(
    #     value=numpy.array(
    #         model_params[-10].get_value(True),
    #         dtype=theano.config.floatX
    #     ),
    #     name='W',
    #     borrow=True
    # )
    #
    # layer5.b = theano.shared(
    #     value=numpy.array(
    #         model_params[-9].get_value(True),
    #         dtype=theano.config.floatX
    #     ),
    #     name='b',
    #     borrow=True
    # )

    layer3.W = theano.shared(
        value=numpy.array(
            model_params[4].get_value(True),
            dtype=theano.config.floatX
        ),
        name='W',
        borrow=True
    )

    layer3.b = theano.shared(
        value=numpy.array(
            model_params[5].get_value(True),
            dtype=theano.config.floatX
        ),
        name='b',
        borrow=True
    )

    # params = layer3.params + layer5.params + layer2.params +  layer4.params + layer1.params + layer0.params

    datasets = load_data(None)

    sets = ['train', 'dev', 'test']
    dimension = [20000, 20000, 20000]
    for k in range(3):
        if k == 0:
            classify_set_x, classify_set_y, classify_set_z, classify_set_m, classify_set_c, classify_set_b= datasets[k]
        else:
            classify_set_x, classify_set_y, classify_set_z= datasets[k]

        # compute number of minibatches for training, validation and testing
        n_classify_batches = classify_set_x.get_value(borrow=True).shape[0]
        n_classify_batches /= batch_size

        # allocate symbolic variables for the data
        index = T.lscalar()  # index to a [mini]batch
        classify = theano.function(
                [index],
                layer2.output,
                givens={
                    x: classify_set_x[index * batch_size: (index + 1) * batch_size],
                }
            )

        r = []

        for i in xrange(n_classify_batches):
            m = classify(i)
            r.extend(m)
        r = np.array(r)
        print r.shape
        r = np.append(r, np.reshape(classify_set_y.eval(),(dimension[k], 1)), 1)
        numpy.savetxt('../extractedInformation/cnn_dist_'+str(output_size)+'/'+sets[k]+'.csv', r, delimiter=",")
示例#2
0
def classify_lenet5(batch_size=500, output_size=20):
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)

    # start-snippet-1
    x = T.matrix('x')  # the data is presented as rasterized images
    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.
    layer0_input = x.reshape((batch_size, 1, 37, 23))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=(batch_size, 1, 37, 23),
        filter_shape=(20, 1, 4, 2),
        poolsize=(2, 2),
    )

    # layer1 = LeNetConvPoolLayer(
    #     rng,
    #     input=layer0.output,
    #     image_shape=(batch_size, 20, 17, 11),
    #     filter_shape=(50, 20, 4, 2),
    #     poolsize=(2, 2),
    # )
    #
    # layer4 = LeNetConvPoolLayer(
    #     rng,
    #     input=layer1.output,
    #     image_shape=(batch_size, 50, 7, 5),
    #     filter_shape=(100, 50, 4, 2),
    #     poolsize=(2, 2),
    # )

    layer2_input = layer0.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(rng,
                         input=layer2_input,
                         n_in=3740,
                         n_out=output_size,
                         activation=T.tanh,
                         use_bias=True)

    # layer5 = HiddenLayer(
    #     rng,
    #     input=layer2.output,
    #     n_in=200,
    #     n_out=output_size,
    #     activation=T.tanh,
    #     use_bias=True
    # )

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=output_size, n_out=2)

    model_params = pickle.load(
        open('../model/cnn_dist_' + str(output_size) + '.pkl'))
    #
    layer0.W = theano.shared(value=numpy.array(model_params[2].get_value(True),
                                               dtype=theano.config.floatX),
                             name='W',
                             borrow=True)

    layer0.b = theano.shared(value=numpy.array(model_params[3].get_value(True),
                                               dtype=theano.config.floatX),
                             name='b',
                             borrow=True)

    # layer1.W = theano.shared(
    #     value=numpy.array(
    #         model_params[-4].get_value(True),
    #         dtype=theano.config.floatX
    #     ),
    #     name='W',
    #     borrow=True
    # )
    #
    # layer1.b = theano.shared(
    #     value=numpy.array(
    #         model_params[-3].get_value(True),
    #         dtype=theano.config.floatX
    #     ),
    #     name='b',
    #     borrow=True
    # )
    #
    # layer4.W = theano.shared(
    #     value=numpy.array(
    #         model_params[-6].get_value(True),
    #         dtype=theano.config.floatX
    #     ),
    #     name='W',
    #     borrow=True
    # )
    #
    # layer4.b = theano.shared(
    #     value=numpy.array(
    #         model_params[-5].get_value(True),
    #         dtype=theano.config.floatX
    #     ),
    #     name='b',
    #     borrow=True
    # )

    layer2.W = theano.shared(value=numpy.array(model_params[0].get_value(True),
                                               dtype=theano.config.floatX),
                             name='W',
                             borrow=True)

    layer2.b = theano.shared(value=numpy.array(model_params[1].get_value(True),
                                               dtype=theano.config.floatX),
                             name='b',
                             borrow=True)

    # layer5.W = theano.shared(
    #     value=numpy.array(
    #         model_params[-10].get_value(True),
    #         dtype=theano.config.floatX
    #     ),
    #     name='W',
    #     borrow=True
    # )
    #
    # layer5.b = theano.shared(
    #     value=numpy.array(
    #         model_params[-9].get_value(True),
    #         dtype=theano.config.floatX
    #     ),
    #     name='b',
    #     borrow=True
    # )

    layer3.W = theano.shared(value=numpy.array(model_params[4].get_value(True),
                                               dtype=theano.config.floatX),
                             name='W',
                             borrow=True)

    layer3.b = theano.shared(value=numpy.array(model_params[5].get_value(True),
                                               dtype=theano.config.floatX),
                             name='b',
                             borrow=True)

    # params = layer3.params + layer5.params + layer2.params +  layer4.params + layer1.params + layer0.params

    datasets = load_data(None)

    sets = ['train', 'dev', 'test']
    dimension = [20000, 20000, 20000]
    for k in range(3):
        if k == 0:
            classify_set_x, classify_set_y, classify_set_z, classify_set_m, classify_set_c, classify_set_b = datasets[
                k]
        else:
            classify_set_x, classify_set_y, classify_set_z = datasets[k]

        # compute number of minibatches for training, validation and testing
        n_classify_batches = classify_set_x.get_value(borrow=True).shape[0]
        n_classify_batches /= batch_size

        # allocate symbolic variables for the data
        index = T.lscalar()  # index to a [mini]batch
        classify = theano.function(
            [index],
            layer2.output,
            givens={
                x: classify_set_x[index * batch_size:(index + 1) * batch_size],
            })

        r = []

        for i in xrange(n_classify_batches):
            m = classify(i)
            r.extend(m)
        r = np.array(r)
        print r.shape
        r = np.append(r, np.reshape(classify_set_y.eval(), (dimension[k], 1)),
                      1)
        numpy.savetxt('../extractedInformation/cnn_dist_' + str(output_size) +
                      '/' + sets[k] + '.csv',
                      r,
                      delimiter=",")
示例#3
0
def trainword(keyword, window_radius = 3, learning_rate = 0.1, n_epochs = 10,batch_size = 1,filter_height=3,filter_width = 50, pool_height=1,pool_width = 1, loginput_num = 50, vector_size = 50):

    print '==training parameters=='
    print 'window_radius: '+str(window_radius)
    print 'vector_size: '+str(vector_size)
    print 'filter_height: '+str(filter_height)
    print 'filter_width: '+str(filter_width)
    print 'pool_height: '+str(pool_height)
    print 'pool_width: '+str(pool_width)
    print 'loginput_num: '+str(loginput_num)
    print 'learning_rate: '+str(learning_rate)
    print 'n_epochs: '+str(n_epochs)
    print 'batch_size: '+str(batch_size)

    rng = numpy.random.RandomState(23455)
    datasets = load_data_word(keyword, window_radius, vector_size)

    train_set_x, train_set_y, trainsentence = datasets[0][0]
    valid_set_x, valid_set_y, validsentence = datasets[0][1]
    test_set_x, test_set_y, testsentence = datasets[0][2]

    senselist = datasets[1]

    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size
    print n_train_batches, n_valid_batches, n_test_batches

    index = T.lscalar()

    x = T.matrix('x')   
    y = T.ivector('y')

    print '... building the model for '+keyword

    layer0_input = x.reshape((batch_size, 1, 2*window_radius+1, vector_size))

    layer0 = WsdConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=(batch_size, 1, 2*window_radius+1, vector_size),
        filter_shape=(1, 1, filter_height, filter_width),
        poolsize=(pool_height, pool_width)
    )

    layer1_input = layer0.output.flatten(2)
    #layer1_input = layer0_input.flatten(2)
    
    layer1 = HiddenLayer(
        rng,
        input=layer1_input,
        #n_in=(2*window_radius+1)*(vector_size+1-filter_width+1-pool_width),
        n_in=int((2*window_radius+2-filter_height)/float(pool_height))*int((vector_size+1-filter_width)/float(pool_width)),
        n_out=loginput_num,
        activation=T.tanh
    )
    
    layer2 = LogisticRegression(input=layer1_input, n_in=int((2*window_radius+2-filter_height)/float(pool_height))*int((vector_size+1-filter_width)/float(pool_width)), n_out=20)

    cost = layer2.negative_log_likelihood(y)

    test_model = theano.function(
        [index],
        layer2.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        [index],
        layer2.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    output_size = theano.function(
        [index],
        [layer0.output.shape],
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size]
        }
    )

    output_model = theano.function(
        [index],
        [layer2.y_pred],
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size]
        }
    )

    output_test = theano.function(
        [index],
        [layer2.y_pred],
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size]
        }
    )

    params = layer2.params + layer0.params

    grads = T.grad(cost, params)

    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    best_params = 0
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print 'training @ iter = ', iter
            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in xrange(n_valid_batches)]
                #for index in range(0, n_valid_batches):
                #    print output_model(index)
                #    print valid_set_y[index * batch_size: (index + 1) * batch_size].eval()
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter
                    best_params = [copy.deepcopy(layer0.params), copy.deepcopy(layer1.params), copy.deepcopy(layer2.params)]

                    # test it on the test set
                    test_losses = [
                        test_model(i)
                        for i in xrange(n_test_batches)
                    ]
                    #print params[0].eval()
                    #print (params[0].eval() == layer2.params[0].eval())
                    #print validation_losses
                    for index in range(0, n_valid_batches):
                        for i in range(0, batch_size):
                            true_i = batch_size*index+i
                            #print output_model(index)
                            print validsentence[true_i], '\t',senselist[output_model(index)[0][i]], '\t', senselist[valid_set_y[true_i].eval()]
                    #print test_losses
                    test_score = numpy.mean(test_losses)
                    for index in range(0, n_test_batches):
                        for i in range(0, batch_size):
                            true_i = batch_size*index+i
                            #print output_model(index)
                            print testsentence[true_i], '\t',senselist[output_test(index)[0][i]], '\t', senselist[test_set_y[true_i].eval()]
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print('Optimization complete.')
    for index in range(0, n_test_batches):
        for i in range(0, batch_size):
            true_i = batch_size*index+i
            #print output_model(index)
            print testsentence[true_i], '\t',senselist[output_test(index)[0][i]], '\t', senselist[test_set_y[true_i].eval()]
    layer0.W = copy.deepcopy(best_params[0][0])
    layer0.b = copy.deepcopy(best_params[0][1])
    #layer0.params = [layer0.W, layer0.b]
    layer1.W = copy.deepcopy(best_params[1][0])
    layer1.b = copy.deepcopy(best_params[1][1])
    #layer1.params = [layer1.W, layer1.b]
    layer2.W = copy.deepcopy(best_params[2][0])
    layer2.b = copy.deepcopy(best_params[2][1])
    #layer2.params = [layer2.W, layer2.b]
    for index in range(0, n_test_batches):
        for i in range(0, batch_size):
            true_i = batch_size*index+i
            #print output_model(index)
            print testsentence[true_i], '\t',senselist[output_test(index)[0][i]], '\t', senselist[test_set_y[true_i].eval()]
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))