示例#1
0
def cross_corpus(train_set_name, test_set_name):
    # open data set and train model
    train_set = DataSet(train_set_name)
    x_train, y_train = train_set.load_training_data()
    model = train_model(x_train, y_train)

    # open test set and predict labels
    test_set = DataSet(test_set_name)
    x_test, y_test = test_set.load_training_data()
    predict = model.predict(x_test)

    print_classification_matrix(predict, y_test)

    return model
示例#2
0
def cross_validation(data_set_name):
    # open data set
    data_set = DataSet(data_set_name)

    # get train and test set
    x, y = data_set.load_training_data()
    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.1,
                                                        shuffle=True)

    # train and save model
    model = train_model(x_train, y_train)

    # get cross validation performance
    predict = model.predict(x_test)
    accuracy = accuracy_score(y_test, predict)
    print('Cross validation accuracy:', accuracy)

    # print_classification_matrix(predict, y_test)

    return accuracy, y_test, predict
示例#3
0
def leave_one_out_score(data_set_name):
    # open data set
    data_set = DataSet(data_set_name)
    x, y = data_set.load_training_data()

    correct_count = 0

    for i in range(len(y)):
        x_train = np.delete(x, i, axis=0)
        y_train = np.delete(y, i)
        x_test = [x[i]]
        y_test = [y[i]]

        # train model
        model = train_model(x_train, y_train)
        if model.predict(x_test)[0] == y_test:
            correct_count += 1
            print(i, 'correct,', correct_count / (i + 1))
        else:
            print(i, 'wrong,', correct_count / (i + 1))

    print('accuracy:', correct_count / len(y))
    # evaluate model on test data set (cross corpus)
    x, y = test_data_set.load_training_data()
    score = cross_val_score(model, x, y, scoring='neg_mean_squared_error')
    print('Mean squared error: {}'.format(-score))

    # plot
    predict = model.predict(x)
    plt.scatter(y, predict)
    plt.show()


if __name__ == '__main__':
    data_set = DataSet('cepp')

    x, y = data_set.load_training_data()

    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.4,
                                                        shuffle=True,
                                                        random_state=0)

    regr = linear_model.LinearRegression(normalize=True)
    #regr = linear_model.Ridge(alpha=0.001, normalize=True)

    regr.fit(x_train, y_train)
    predict = regr.predict(x_test)

    scores = cross_val_score(regr,
                             x_test,