def predict(train): tr_train, tr_test = load_ml100k.get_train_test(train, random_state=34) tr_predicted0 = regression.predict(tr_train) tr_predicted1 = regression.predict(tr_train.T).T tr_predicted2 = corrneighbours.predict(tr_train) tr_predicted3 = corrneighbours.predict(tr_train.T).T tr_predicted4 = norm.predict(tr_train) tr_predicted5 = norm.predict(tr_train.T).T stack_tr = np.array( [ tr_predicted0[tr_test > 0], tr_predicted1[tr_test > 0], tr_predicted2[tr_test > 0], tr_predicted3[tr_test > 0], tr_predicted4[tr_test > 0], tr_predicted5[tr_test > 0], ] ).T lr = linear_model.LinearRegression() lr.fit(stack_tr, tr_test[tr_test > 0]) stack_te = np.array( [ tr_predicted0.ravel(), tr_predicted1.ravel(), tr_predicted2.ravel(), tr_predicted3.ravel(), tr_predicted4.ravel(), tr_predicted5.ravel(), ] ).T return lr.predict(stack_te).reshape(train.shape)
def predict(train): tr_train, tr_test = load_ml100k.get_train_test(train, random_state=34) tr_predicted0 = regression.predict(tr_train) tr_predicted1 = regression.predict(tr_train.T).T tr_predicted2 = corrneighbours.predict(tr_train) tr_predicted3 = corrneighbours.predict(tr_train.T).T tr_predicted4 = norm.predict(tr_train) tr_predicted5 = norm.predict(tr_train.T).T stack_tr = np.array([ tr_predicted0[tr_test > 0], tr_predicted1[tr_test > 0], tr_predicted2[tr_test > 0], tr_predicted3[tr_test > 0], tr_predicted4[tr_test > 0], tr_predicted5[tr_test > 0], ]).T lr = linear_model.LinearRegression() lr.fit(stack_tr, tr_test[tr_test > 0]) stack_te = np.array([ tr_predicted0.ravel(), tr_predicted1.ravel(), tr_predicted2.ravel(), tr_predicted3.ravel(), tr_predicted4.ravel(), tr_predicted5.ravel(), ]).T return lr.predict(stack_te).reshape(train.shape)
def main(transpose_inputs=False): train, test = get_train_test(random_state=12) if transpose_inputs: train = train.T test = test.T predicted = predict(train, plot_matrix=True) r2 = metrics.r2_score(test[test > 0], predicted[test > 0]) print('R2 score (binary {} neighbours): {:.1%}'.format( ('movie' if transpose_inputs else 'user'), r2))
def main(transpose_inputs=False): from load_ml100k import get_train_test train, test = get_train_test(random_state=12) if transpose_inputs: train = train.T test = test.T filled = predict(train) r2 = metrics.r2_score(test[test > 0], filled[test > 0]) print('R2 score ({} regression): {:.1%}'.format( ('movie' if transpose_inputs else 'user'), r2))
def main(transpose_inputs=False): train, test = get_train_test(random_state=12) if transpose_inputs: train = train.T test = test.T predicted = predict(train) r2 = metrics.r2_score(test[test > 0], predicted[test > 0]) print('R2 score (binary {} neighbours): {:.1%}'.format( ('movie' if transpose_inputs else 'user'), r2))
def main(transpose_inputs=False): from load_ml100k import get_train_test from sklearn import metrics train,test = get_train_test(random_state=12) if transpose_inputs: train = train.T test = test.T predicted = predict(train) r2 = metrics.r2_score(test[test > 0], predicted[test > 0]) print('R2 score ({} normalization): {:.1%}'.format( ('movie' if transpose_inputs else 'user'), r2))
def main(transpose_inputs=False): from load_ml100k import get_train_test from sklearn import metrics train, test = get_train_test(random_state=12) if transpose_inputs: train = train.T test = test.T predicted = predict(train) r2_score = metrics.r2_score(test[test > 0], predicted[test > 0]) print('R2 score ({} normalization): {:.1%}'.format( ('movie' if transpose_inputs else 'user'), r2_score))
def stacked_predict(train_data): # Stacked prediction: when fitting hyperparameters, though, we need two layers of training/testing splits: a first, higher-level split, # and then, inside the training split, a second split to be able to fit the stacked learner. tr_train, tr_test = load_ml100k.get_train_test(train_data, random_state=34) # Call all the methods we previously defined: # these have been implemented as functions: tr_prediction_0 = regression.predict(tr_train) tr_prediction_1 = regression.predict(tr_train.T).T tr_prediction_2 = corr_neighbours.predict(tr_train) tr_prediction_3 = corr_neighbours.predict(tr_train.T).T tr_prediction_4 = normalization.predict(tr_train) tr_prediction_5 = normalization.predict(tr_train.T).T # Now assemble these predictions into a single array stacked_learner = np.array([ tr_prediction_0[tr_test > 0], tr_prediction_1[tr_test > 0], tr_prediction_2[tr_test > 0], tr_prediction_3[tr_test > 0], tr_prediction_4[tr_test > 0], tr_prediction_5[tr_test > 0], ]).T # Fit a simple linear regression linear_leaner = linear_model.LinearRegression() linear_leaner.fit(stacked_learner, tr_test[tr_test > 0]) # apply the whole process to the testing split and evaluate stacked_te = np.array([ tr_prediction_0.ravel(), tr_prediction_1.ravel(), tr_prediction_2.ravel(), tr_prediction_3.ravel(), tr_prediction_4.ravel(), tr_prediction_5.ravel(), ]).T return linear_leaner.predict(stacked_te).reshape(tr_train.shape)
def main(): train, test = load_ml100k.get_train_test(random_state=12) predicted = predict(train) r2 = metrics.r2_score(test[test > 0], predicted[test > 0]) print("R2 stacked: {:.2%}".format(r2))
def main(): train, test = load_ml100k.get_train_test(random_state=12) predicted = predict(train) r2 = metrics.r2_score(test[test > 0], predicted[test > 0]) print('R2 averaged: {:.2%}'.format(r2))
def main(): train,test = load_ml100k.get_train_test(random_state=12) predicted = stacked_predict(train) r2 = metrics.r2_score(test[test > 0], predicted[test > 0]) print('Results from ensemble_learner.py stacked prediction') print('R2 stacked: {:.2%}'.format(r2))