def create_symbol_forecast_model(self): # Create a lagged series of the S&P500 US stock market index snpret = create_lagged_series( self.symbol_list[0], self.model_start_date, self.model_end_date, lags=5 ) # Use the prior two days of returns as predictor # values, with direction as the response x = snpret[["Lag1", "Lag2"]] y = snpret["Direction"] # Create training and test sets, each of them is series start_test = self.model_start_test_date x_train = x[x.index < start_test] x_test = x[x.index >= start_test] y_train = y[y.index < start_test] y_test = y[y.index >= start_test] model = QuadraticDiscriminantAnalysis() model.fit(x_train, y_train) # return nd array pred_test = model.predict(x_test) print("Error Rate is {0}".format((y_test != pred_test).sum() * 1. / len(y_test))) return model
def create_symbol_forecast_model(self): snpret = create_lagged_series(self.symbol_list[0], self.model_start_date, self.model_end_date, lags=5) X = snpret[["Lag1", "Lag2"]] y = snpret["Direction"] start_test = self.model_start_test_date X_train = X[X.index < start_test] X_test = X[X.index >= start_test] y_train = y[y.index < start_test] y_test = y[y.index > start_test] model = QDA() model.fit(X_train, y_train) return model
def create_symbol_forecast_model(self): # Create a lagged series of the S&P500 US stock market index snpret = create_lagged_series(self.symbol_list[0], self.model_start_date, self.model_end_date, lags=5) # Use the prior two days of returns as predictor # values, with direction as the response X = snpret[["Lag1", "Lag2"]] y = snpret["Direction"] # Create training and test sets start_test = self.model_start_test_date X_train = X[X.index < start_test] X_test = X[X.index >= start_test] y_train = y[y.index < start_test] y_test = y[y.index >= start_test] model = QDA() model.fit(X_train, y_train) return model
def create_symbol_forecast_model( self ): # Create a lagged series of the S&P500 US stock market index snpret = create_lagged_series(self.symbol_list[0], self.model_start_date, self.model_end_date, lags=5) # Use the prior two days of returns as predictor # values, with direction as the response X = snpret[["Lag1", "Lag2"]] y = snpret["Direction"] # Create training and test sets start_test = self.model_start_test_date X_train = X[X.index < start_test] X_test = X[X.index >= start_test] y_train = y[y.index < start_test] y_test = y[y.index >= start_test] model = QDA() model.fit(X_train, y_train) return model
from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier from sklearn.linear_model import LogisticRegression from sklearn.discriminant_analysis import (LinearDiscriminantAnalysis as LDA, QuadraticDiscriminantAnalysis as QDA) from sklearn.metrics import confusion_matrix from sklearn.svm import LinearSVC, SVC from create_lagged_series import create_lagged_series if __name__ == "__main__": # Create a lagged series of the S&P500 US stock market index snpret = create_lagged_series("SPY", dt(2016, 1, 10), dt(2017, 12, 31), lags=5) # Use the prior two days of returns as predictor # values, with direction as the response X = snpret[["Lag1", "Lag2"]] y = snpret["Direction"] # Train/test split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42) # Create the (parametrised) models print("Hit Rates/Confusion Matrices:\n")
# -*- coding: utf-8 -*- # k_fold_cross_val.py import datetime import pandas as pd import sklearn from sklearn import cross_validation from sklearn.metrics import confusion_matrix from sklearn.svm import SVC from create_lagged_series import create_lagged_series if __name__ == "__main__": # Create a lagged series of the S&P500 US stock market index snpret = create_lagged_series("^GSPC", datetime.datetime(2001, 1, 10), datetime.datetime(2005, 12, 31), lags=5) # Use the prior two days of returns as predictor # values, with direction as the response X = snpret[["Lag1", "Lag2"]] y = snpret["Direction"] # Create a k-fold cross validation object kf = cross_validation.KFold(len(snpret), n_folds=10, shuffle=True, random_state=42) # Use the kf object to create index arrays that # state which elements have been retained for training
from sklearn.cross_validation import train_test_split from sklearn.ensemble import RandomForestClassifier from sklearn.linear_model import LogisticRegression from sklearn.lda import LDA from sklearn.metrics import confusion_matrix from sklearn.qda import QDA from sklearn.svm import LinearSVC, SVC from create_lagged_series import create_lagged_series if __name__ == "__main__": # Create a lagged series of the S&P500 US stock market index snpret = create_lagged_series( "^GSPC", datetime.datetime(2001,1,10), datetime.datetime(2005,12,31), lags=5 ) # Use the prior two days of returns as predictor # values, with direction as the response X = snpret[["Lag1","Lag2"]] y = snpret["Direction"] # Train/test split X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.8, random_state=42 ) # Create the (parametrised) models print "Hit Rates/Confusion Matrices:\n" models = [("LR", LogisticRegression()),
# train_test_split.py import datetime from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier from sklearn.linear_model import LogisticRegression from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA from sklearn.metrics import confusion_matrix from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA from sklearn.svm import LinearSVC, SVC from create_lagged_series import create_lagged_series if __name__ == "__main__": # Create a lagged series of the S&P500 US stock market index snpret = create_lagged_series("^GSPC", datetime.datetime(2000, 1, 1), datetime.date.today(), lags=5) # Use the prior two days of returns as predictor # values, with direction as the response X = snpret[["Lag1", "Lag2"]] y = snpret["Direction"] # Train/test split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42) # Create the (parametrised) models print("Hit Rates/Confusion Matrices:\n")