Exemplo n.º 1
0
def run():
    # Part 2
    print('\n=====================================================================================')
    print('Problem 2 Preparing data:')
    # Transform each email in the training set into a feature vector
    (feature_vector_list_training,
     integer_list_training) = create_feature_vectors.run_mnist('./input_data/mnist_train.txt')

    (feature_vector_list_testing,
     integer_list_testing) = create_feature_vectors.run_mnist('./input_data/mnist_test.txt')
    
    # Part 2 a/b: Train a multi-class svm model using cross-validation and 
    print('\n=====================================================================================')
    print('Problem 2 Use cross-validation to find the best classifier:')
    cross_validation_error_list = []

    lambda_set = [pow(2, exponent) for exponent in range(-5, 2)]
    
    for lambda_ in lambda_set:
        print('Processing lamba: ' + str(lambda_))
        cross_validation_error = multi_class_prediction.multi_class_prediction_cross_validation(
                                    feature_vector_list_training, integer_list_training, lambda_, 5)
        print('Cross validation error: ' + str(cross_validation_error))
        cross_validation_error_list.append(cross_validation_error)

    cross_validation_error_list = [100*cross_validation_error 
                                   for cross_validation_error 
                                   in cross_validation_error_list]

    # Plot data ------------------------------------------------------------------------------------
    pylab.plot(lambda_set, cross_validation_error_list)
    
    pylab.xlabel('lambda')
    pylab.ylabel('Cross validation error (% out of 100')
    pylab.title('Cross validation error as a function of lambda')
    pylab.legend(['Cross Validation Error'], loc=2)
    pylab.grid(True)
    pylab.savefig("Cross_validation_error_lambda.png")
    #pylab.show()
    pylab.close()
    pylab.clf()
    # End plot data --------------------------------------------------------------------------------

    print('\n=====================================================================================')
    print('Problem 2 Find error on test set using classifier with least error:')
    (minimum_index, minimum_cross_validation_error) = min(enumerate(cross_validation_error_list),
                                                          key = itemgetter(1))
    minimum_lambda = lambda_set[minimum_index]
    print('The best lambda value is: ' + str(minimum_lambda) + ' (with error: ' +
            str(minimum_cross_validation_error) + '%)')

    classifier = multi_class_prediction.multi_class_prediction_train(
                    feature_vector_list_training, integer_list_training, minimum_lambda)

    test_error = multi_class_prediction.multi_class_prediction_test(
                    classifier, feature_vector_list_testing, integer_list_testing)
    
    print('The test error is: ' + str(100*test_error) + '%')
Exemplo n.º 2
0
def run_libsvm():
    # Part 2/libsvm
    print('\n=====================================================================================')
    print('Problem 2/libsvm Preparing data:')
    (feature_vector_list_training,
     integer_list_training) = create_feature_vectors.run_mnist('./input_data/mnist_train.txt')

    (feature_vector_list_testing,
     integer_list_testing) = create_feature_vectors.run_mnist('./input_data/mnist_test.txt')

    # Use libsvm to train on the data and get the testing error using default parameters
    classifier = svm.SVC()
    classifier.fit(feature_vector_list_training, integer_list_training)  
    predicted_integer_list_testing = classifier.predict(feature_vector_list_testing)

    prediction_error_rate = 100*np.mean(np.array(predicted_integer_list_testing) !=
                                    np.array(integer_list_testing))
    print('The default libsvm prediction error is: ' + str(prediction_error_rate) + '%')

    # Find the cross validation error on the training set
    classifier = svm.SVC()
    scores = cross_validation.cross_val_score(classifier, feature_vector_list_training,
                                              integer_list_training, cv=10)
    print('The 10-fold cross validation error on the test set is: ' + 
          str(100*(1-scores.mean())) + '%')

    # Cross Validation varying gamma and C
    c_list = [1, 1e2, 1e4]
    gamma_list = [1e-2, 1e-3, 1e-4]

    cross_validation_error_list = []
    gamma_c_list = []

    for c in c_list:
        for gamma in gamma_list:
            classifier = svm.SVC(C=c, gamma=gamma)
            scores = cross_validation.cross_val_score(classifier, feature_vector_list_training,
                                                      integer_list_training, cv=10)

            cross_validation_error = 100*(1-scores.mean())
            print('10-fold cross validation error for gamma=' + str(gamma) +
                  ' c=' + str(c) + ': ' + str(cross_validation_error) + '%')

            cross_validation_error_list.append(cross_validation_error)
            gamma_c_list.append((gamma, c))

    (minimum_index, minimum_cross_validation_error) = min(enumerate(cross_validation_error_list),
                                                          key = itemgetter(1))

    (minimum_gamma, minimum_c) = gamma_c_list[minimum_index]

    print('Minimum 10-fold cross validation error is: ' + str(minimum_cross_validation_error) + '%'
          + ', gamma=' + str(minimum_gamma) + ', c=' + str(c))

    # Train on the data with the minimum gamma and c specified. Find the testing error.
    classifier = svm.SVC(C=minimum_c, gamma=minimum_gamma)
    classifier.fit(feature_vector_list_training, integer_list_training)  
    predicted_integer_list_testing = classifier.predict(feature_vector_list_testing)

    prediction_error_rate = 100*np.mean(np.array(predicted_integer_list_testing) !=
                                    np.array(integer_list_testing))
    print('With parameters gamma=' + str(minimum_gamma) + ', c=' +str(c) + 
          ' libsvm prediction error is: ' + str(prediction_error_rate) + '%')