Exemplo n.º 1
0
def knn_regression(data_file_name, k_value=3, n_eigs=100, weighted=True, 
                   beta=1000000):
    train_data, test_data, file_names = old_load_images(data_file_name)
    eig_face = EigenFace.from_file(train_data[0], data_file_name, n_eigs)
    train_data[0] = get_face_space(data_file_name, 'train_x', train_data[0],
                                   eig_face)
    test_data[0] = get_face_space(data_file_name, 'test_x', test_data[0],
                                  eig_face)
    logging.info("Beginning knn regression")
    predictions = []
    errors = []
    n_test_examples = test_data[0].shape[1]
    real_scores = test_data[1].T.tolist()
    train_data = zip(train_data[0].T, train_data[1])
    test_data = zip(test_data[0].T, test_data[1])
    for iterations, (test_example, test_score) in enumerate(test_data):
        if iterations % 100 == 0:
            logging.info('on example %d of %d' % (iterations, n_test_examples))
        distances = []
        for train_example, train_score in train_data:
            distance = (numpy.sqrt(
                                numpy.sum(
                                    numpy.square(
                                        test_example - train_example))))
            distances.append((distance, train_score))
        distances.sort()
        k_nearest_neighbours = distances[:k_value]
        if weighted:
            sum_weighted_distances = sum(score * exp(-distance / beta) 
                                     for distance, score in 
                                     k_nearest_neighbours)
            normalisation = sum(exp(-distance / beta) for distance, score in 
                                k_nearest_neighbours)
            prediction = (1 / normalisation) * sum_weighted_distances
        else:
            prediction = sum(score for distance, score in k_nearest_neighbours)\
                         / k_value
        predictions.append(prediction)
        error = abs(test_score - prediction)
        errors.append(error)
    logging.info('mean error is %f' % (sum(errors) / len(errors)))
    return real_scores, predictions, file_names
Exemplo n.º 2
0
def train_nn(data_file_name, reg_lambda=0.01, learning_rate=0.01, n_eigs=100, 
        n_neurons_per_layer=100, batch_size=100, display=True):
    train_data, test_data, file_names = old_load_images(data_file_name)
    eig_face = EigenFace.from_file(train_data[0], data_file_name, n_eigs)
    train_data[0] = get_face_space(data_file_name, 'train_x', train_data[0],
                                   eig_face)
    test_data[0] = get_face_space(data_file_name, 'test_x', test_data[0],
                                  eig_face)
    n_features, n_training_examples = train_data[0].shape
    real_scores = test_data[1].T.tolist()

    train_data = to_theano_shared(train_data)
    test_data = to_theano_shared(test_data)

    rng = numpy.random.RandomState(1234)
    x = T.matrix('x')
    y = T.vector('y')

    mlp = MLP(rng, x, n_features, n_neurons_per_layer, n_training_examples)
    cost = mlp.cost(y) + reg_lambda * mlp.L2_sqr

    test_model =theano.function([],
            outputs=[cost, mlp.output],
            givens={x:test_data[0][:], y:test_data[1][:]})

    g_params = []
    for param in mlp.params:
        g_param = T.grad(cost, param)
        g_params.append(g_param)

    updates = {}

    for param, g_param in zip(mlp.params, g_params):
        updates[param] = param - learning_rate * g_param

    train_model = theano.function([],
            outputs=theano.Out(gpu_from_host(cost), borrow=True), updates=updates,
            givens={x:train_data[0][:], y:train_data[1][:]})

    current_cost = numpy.asarray(train_model())
    logging.info('initial cost %f' % current_cost)
    old_cost = 0
    iterations = 0
    logging.info('beginning stochastic gradient descent')
    while ((abs(current_cost- old_cost)) > 0.001):
        old_cost = current_cost
        current_cost = numpy.asarray(train_model())
        if iterations % 10 == 0:
            logging.info('iteration % 9d cost % 9f' % (iterations, current_cost))
        iterations += 1

    error, predictions = test_model()

    # Print the results
    logging.info('training cost minimised: %f' % current_cost)
    logging.info('test error: %f' % error)
    
    predictions = predictions[0].tolist()
    logging.debug('predictions %s', str(predictions))
    pearsons = pearsonr(real_scores, predictions)
    logging.info('pearsons correlation: %f, %f' % pearsons)
    # Save our weights should we ever need them again
    plot_title_data = (n_neurons_per_layer, learning_rate, reg_lambda,
            pearsons[0])
    plot_correlation(real_scores, predictions, file_names, 'neural network with %d neurons' \
            'learning rate %f and reg-lambda %f pearsons %f' % plot_title_data,
            'nn', show=True, pearsons=pearsons)
Exemplo n.º 3
0
def eigface_sgd(data_file_name, n_eigs=100, learning_rate=0.000000000000000001, 
                reg_lambda=0.1, display=False):
    train_data, test_data, image_names = old_load_images(data_file_name)
    eig_face = EigenFace.from_file(train_data[0], data_file_name, n_eigs)
    train_data[0] = eig_face.project_to_face_space(train_data[0])
    test_data[0] = eig_face.project_to_face_space(test_data[0])
    train_data[0] = get_face_space(data_file_name, 'train_x', train_data[0],
                                   eig_face)
    test_data[0] = get_face_space(data_file_name, 'test_x', test_data[0],
                                  eig_face)

    n_features, n_training_examples = train_data[0].shape
    #n_features += 1 # we're going to add the ones on
    n_test_examples = test_data[0].shape[1]
    #train_data[0] = prepend_ones(train_data[0])
    #test_data[0] = prepend_ones(test_data[0])

    train_data = to_theano_shared(train_data)
    test_data = to_theano_shared(test_data)

    x_train, y_train = train_data
    x_test, y_test = test_data

    x = T.matrix('x')
    y = T.vector('y')

    tlsr = TheanoLeastSquaresRegression(x, n_features, n_training_examples,
                                        reg_lambda=reg_lambda)
    cost = tlsr.cost(y)
    test_model = theano.function([], outputs=cost, givens={x:x_test[:],
        y:y_test[:]})
    
    g_theta = T.grad(cost, tlsr.theta)
    g_bias = T.grad(cost, tlsr.bias)
    updates = {
                tlsr.theta : tlsr.theta - learning_rate * g_theta,
                tlsr.bias : tlsr.bias - learning_rate * g_bias
              }
    train_model = theano.function([], outputs=cost, updates=updates,
            givens={x:x_train[:], y:y_train[:]})

    current_cost = train_model()
    logging.info('initial cost %f' % current_cost)
    old_cost = 0
    iterations = 0
    logging.info('beginning stochastic gradient descent')
    while ((abs(current_cost- old_cost)) > 0.000001):
        old_cost = current_cost
        current_cost = train_model()
        if iterations % 1000 == 0:
            logging.info('iteration % 9d cost % 9f' % (iterations, current_cost))
        iterations += 1

    error = test_model()
    theta = tlsr.theta.get_value()
    bias = tlsr.theta.get_value()

    # Print the results
    logging.info('training cost minimised: %f' % current_cost)
    logging.info('test error: %f' % error)

    # Save our weights should we ever need them again
    theta_file_name = '%s.pickle' % append_timestamp_to_file_name('weights')
    logging.info('writing weights to %s' % theta_file_name)
    save_pickle((theta, bias), theta_file_name)
    y = y_test.get_value().tolist()
    y = map(float, y)
    plot_correlation(x_test.get_value(), y, image_names,
    'linear regression', 'linear-regression')