x_train = x_train.reshape(len(x_train),1) y_train = train.loc[:,'Direction'] test = Weekly[Weekly['Year'] >= 2009] x_test = test.iloc[:,3] x_test = x_test.reshape(len(x_test),1) y_test = test.loc[:,'Direction'] glm2 = LogisticRegression() glm2.pred = glm2.fit(x_train, y_train).predict(x_test) print(pd.DataFrame(confusion_matrix(y_test, glm2.pred), index=['y=0', 'y=1'], columns=['y_pred=0', 'y_pred=1'])) print('error rate: ', accuracy_score(y_test, glm2.pred)) # 62.5% # (e) lda = LinearDiscriminantAnalysis() lda.pred = lda.fit(x_train, y_train).predict(x_test) print(pd.DataFrame(confusion_matrix(y_test, lda.pred), index=['y=0', 'y=1'], columns=['y_pred=0', 'y_pred=1'])) print('error rate: ', accuracy_score(y_test, lda.pred)) # 62.5% # (f) qda = QuadraticDiscriminantAnalysis() qda.pred = qda.fit(x_train, y_train).predict(x_test) print(pd.DataFrame(confusion_matrix(y_test, qda.pred), index=['y=0', 'y=1'], columns=['y_pred=0', 'y_pred=1'])) print('error rate: ', accuracy_score(y_test, qda.pred)) # 58.7% # (g) knn = KNeighborsClassifier(n_neighbors=1) knn.pred = knn.fit(x_train, y_train).predict(x_test) print('error rate: ', accuracy_score(y_test, knn.pred)) # 49% # (h): Logistic and LDA models are the best.