def cross_corpus(train_set_name, test_set_name): # open data set and train model train_set = DataSet(train_set_name) x_train, y_train = train_set.load_training_data() model = train_model(x_train, y_train) # open test set and predict labels test_set = DataSet(test_set_name) x_test, y_test = test_set.load_training_data() predict = model.predict(x_test) print_classification_matrix(predict, y_test) return model
def cross_validation(data_set_name): # open data set data_set = DataSet(data_set_name) # get train and test set x, y = data_set.load_training_data() x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, shuffle=True) # train and save model model = train_model(x_train, y_train) # get cross validation performance predict = model.predict(x_test) accuracy = accuracy_score(y_test, predict) print('Cross validation accuracy:', accuracy) # print_classification_matrix(predict, y_test) return accuracy, y_test, predict
def leave_one_out_score(data_set_name): # open data set data_set = DataSet(data_set_name) x, y = data_set.load_training_data() correct_count = 0 for i in range(len(y)): x_train = np.delete(x, i, axis=0) y_train = np.delete(y, i) x_test = [x[i]] y_test = [y[i]] # train model model = train_model(x_train, y_train) if model.predict(x_test)[0] == y_test: correct_count += 1 print(i, 'correct,', correct_count / (i + 1)) else: print(i, 'wrong,', correct_count / (i + 1)) print('accuracy:', correct_count / len(y))
# evaluate model on test data set (cross corpus) x, y = test_data_set.load_training_data() score = cross_val_score(model, x, y, scoring='neg_mean_squared_error') print('Mean squared error: {}'.format(-score)) # plot predict = model.predict(x) plt.scatter(y, predict) plt.show() if __name__ == '__main__': data_set = DataSet('cepp') x, y = data_set.load_training_data() x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.4, shuffle=True, random_state=0) regr = linear_model.LinearRegression(normalize=True) #regr = linear_model.Ridge(alpha=0.001, normalize=True) regr.fit(x_train, y_train) predict = regr.predict(x_test) scores = cross_val_score(regr, x_test,