def load_data(): #import data X, Y = data.import_data(set='train') #do not plug in returns, but residuals #plug in residuals VAR_model = VAR(X) results = VAR_model.fit(1) ar_returns = results.fittedvalues #columns to drop from dataframe columns = [ 'XMRspread', 'XMRvolume', 'XMRbasevolume', 'XRPspread', 'XRPvolume', 'XRPbasevolume', 'LTCspread', 'LTCvolume', 'LTCbasevolume', 'DASHspread', 'DASHvolume', 'DASHbasevolume', 'ETHspread', 'ETHvolume', 'ETHbasevolume' ] ar_returns.drop(columns, 1, inplace=True) X = X.loc[ar_returns.index] x_returns = X[ar_returns.columns] residual_df = x_returns - ar_returns X = X.join(residual_df, how='inner', rsuffix='residual') y_ar_returns = ar_returns y_ar_returns.columns = Y.columns Y = (Y.loc[X.index] - y_ar_returns.shift(-1)).dropna() X = X.loc[Y.index] x = X.as_matrix() y = Y.as_matrix() return x, y, X, Y
def fit_VAR(results, set_str): X_test, Y_test = data.import_data(set=set_str) # predict on test set predictions_test = np.zeros((X_test.shape[0], X_test.shape[1])) # turn into numpy array X_test_matrix = X_test.values # predict one-step ahead out-of-sample for i in range(0, X_test.shape[0]): try: predictions_test[i] = results.forecast(X_test_matrix[(i), :].reshape(1, 20), steps=1) except: pass # Turn back into panda dataframe and save to csv Test_pred = pd.DataFrame(data=predictions_test, index=X_test.index, columns=X_test.columns) columns = ['XMRspread', 'XMRvolume', 'XMRbasevolume', 'XRPspread', 'XRPvolume', 'XRPbasevolume', 'LTCspread', 'LTCvolume', 'LTCbasevolume', 'DASHspread', 'DASHvolume', 'DASHbasevolume', 'ETHspread', 'ETHvolume', 'ETHbasevolume'] Test_pred.drop(columns, 1, inplace=True) Y = Y_test Y_pred = Test_pred flat_pred = np.clip(Y_pred.as_matrix().flatten() + 0.5, 0, 1) flat_actual = np.where(Y.as_matrix().flatten() > 0, 1, 0) auc = roc_auc_score(flat_actual, flat_pred) mse = mean_squared_error(Y.as_matrix(), Y_pred.as_matrix()) return Test_pred, auc, mse
def run_model(model_name, hidden_size): # import data # X, Y = data.import_data(set='cross_val') X, Y = data.import_data(set='train') # do not plug in returns, but residuals # plug in residuals VAR_model = VAR(X) results = VAR_model.fit(1) ar_returns = results.fittedvalues # columns to drop from dataframe columns = ['XMRspread', 'XMRvolume', 'XMRbasevolume', 'XRPspread', 'XRPvolume', 'XRPbasevolume', 'LTCspread', 'LTCvolume', 'LTCbasevolume', 'DASHspread', 'DASHvolume', 'DASHbasevolume', 'ETHspread', 'ETHvolume', 'ETHbasevolume'] ar_returns.drop(columns, 1, inplace=True) X = X.loc[ar_returns.index] x_returns = X[ar_returns.columns] residual_df = x_returns - ar_returns X = X.join(residual_df, how='inner', rsuffix='residual') y_ar_returns = ar_returns y_ar_returns.columns = Y.columns Y = (Y.loc[X.index] - y_ar_returns.shift(-1)).dropna() y_ar_returns = y_ar_returns.shift(-1).dropna() X = X.loc[Y.index] x = X.as_matrix() y = Y.as_matrix() # set preditcion matrix y_pred = np.zeros(shape=y.shape) # set model model = RNN(hidden_size=hidden_size, input_size=len(X.iloc[0:1].values[0]), output_size=len(Y.iloc[0:1].values[0])) model.load_state_dict( torch.load(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) + '/model_params/{}.pth.tar'.format(model_name))) for iter in range(len(x)): input = Variable(torch.from_numpy(x[iter]).float()) output = model.forward(input) y_pred[iter] = output.data.numpy() y_pred = y_pred + y_ar_returns.as_matrix() Y_pred = pd.DataFrame(data=y_pred, index=Y.index, columns=Y.columns) return Y_pred, Y
import statsmodels as sm import pandas as pd import numpy as np import matplotlib.pyplot as plt import os from statsmodels.tsa.api import VAR from Data.scripts.data import data X,Y = data.import_data(set= 'train') X_test,Y_test = data.import_data(set= 'test') VAR_model = VAR(X) results = VAR_model.fit(1) predictions = results.fittedvalues #save the 1-order VAR model results.save("One_order_VARmodel.pickle") # initialize predict on test set predictions_test = np.zeros((X_test.shape[0],X_test.shape[1])) predictions_test_stress = np.zeros((X_test.shape[0],X_test.shape[1])) # turn into numpy array X_test_matrix = X_test.values # predict one-step ahead out-of-sample for i in range(0,X_test.shape[0]): predictions_test[i] = results.forecast(X_test_matrix[i,:].reshape(1,20), steps=1) # stress test for VAR
import numpy as np import pandas as pd import torch import torch.nn as nn import torch.optim as optim from torch.autograd import Variable from Data.scripts.data import data # from Models.RNN.scripts.SimpleRNN import SimpleRNN from Models.RNN.scripts.LayerRNN import LayerRNN torch.manual_seed(1) #import data X,Y = data.import_data(set= 'train') x = X.as_matrix() y = Y.as_matrix() #set model, loss, and optimization hidden_size = 10 # optim_string = 'SGD' optim_string = 'SGDM' model_string = 'EXTRA_Mom_Layer1_hiddenfor' # model_string = 'Simple RNN' n_epochs = 300 learning = 10 lr = learning * 10e-3 # model = SimpleRNN(hidden_size= hidden_size, input_size=len(X.iloc[0:1].values[0]), output_size= len(Y.iloc[0:1].values[0])) model = LayerRNN(hidden_size= hidden_size, input_size=len(X.iloc[0:1].values[0]), output_size= len(Y.iloc[0:1].values[0])) criterion = nn.MSELoss()
Y = Y_test Y_pred = Test_pred flat_pred = np.clip(Y_pred.as_matrix().flatten() + 0.5, 0, 1) flat_actual = np.where(Y.as_matrix().flatten() > 0, 1, 0) auc = roc_auc_score(flat_actual, flat_pred) mse = mean_squared_error(Y.as_matrix(), Y_pred.as_matrix()) return Test_pred, auc, mse X_train_df, Y_train_df = data.import_data(set='train') X_train_matrix = X_train_df.as_matrix() Y_train_matrix = Y_train_df.as_matrix() X_dev_df, Y_dev_df = data.import_data(set='cross_val') X_dev_matrix = X_dev_df.as_matrix() Y_dev_matrix = Y_dev_df.as_matrix() VAR_model = VAR(X_train_df) results = VAR_model.fit(1) Y_train_pred_df, train_auc, train_mse = fit_VAR(results, 'train') Y_dev_pred_df, dev_auc, dev_mse = fit_VAR(results, 'cross_val')
metrics['return'] = strat_series[-1] risk_free = 0 metrics['sharpe'] = ( (strat_series[-1] - 1) - risk_free) / (np.std(strat_series)) metrics['max_drawdown'] = (1 - strat_series.div(strat_series.cummax())).max() return metrics # X,Y = data.import_data(set= 'train') X, Y = data.import_data(set='cross_val') # Y_pred = pd.read_csv(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) + '/csvs/y_pred.csv', index_col= 0) Y_pred = pd.read_csv( os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) + '/csvs/y_pred_cross_val.csv', index_col=0) coins = ['ETH', 'XRP', 'LTC', 'DASH', 'XMR'] strat_series = (run_strategy(Y_pred=Y_pred, Returns_df=Y)) strat_series = strat_series.cumprod() print(strat_metrics(strat_series))
import os import time import numpy as np import pandas as pd import torch from torch.autograd import Variable from Data.scripts.data import data from Models.Extra.scripts.RNN import RNN from Models.Evaluation.eval import eval_model from statsmodels.tsa.api import VAR from sklearn.metrics import mean_squared_error, roc_auc_score, roc_curve, auc X_test, Y_test = data.import_data(set='test') Y_pred_df = ((-1) * Y_test.shift(1)).dropna() Y_test = Y_test.loc[Y_pred_df.index] check_model = eval_model(y_pred_df=Y_pred_df, y_actual_df=Y_test) check_model.backtest(printer=False) check_model.accuracy(printer=False) dev_metrics_dict = check_model.metrics dev_acc_score = check_model.accuracy_score
__author__ = 'Ian' import matplotlib.pyplot as plt import numpy as np import os import pandas as pd from Data.scripts.data import data from sklearn.metrics import mean_squared_error, roc_auc_score, roc_curve, auc from Models.Evaluation.eval import eval_model _, Y_actual = data.import_data(set='test') y_pred_df = Y_actual.shift(1) y_pred_df = y_pred_df.dropna() Y_actual = Y_actual.loc[y_pred_df.index] tester = eval_model(y_pred_df=y_pred_df, y_actual_df=Y_actual) tester.backtest(printer=False) out_dict = tester.metrics out_dict['mse'] = mean_squared_error(Y_actual.as_matrix(), y_pred_df.as_matrix()) flat_pred = np.clip(y_pred_df.as_matrix().flatten() + 0.5, 0, 1) flat_actual = np.where(Y_actual.as_matrix().flatten() > 0, 1, 0)
__author__ = 'Ian' import matplotlib.pyplot as plt import matplotlib.dates as mdates import os import pandas as pd import numpy as np from sklearn.metrics import accuracy_score from Data.scripts.data import data from pandas.tools.plotting import autocorrelation_plot X,Y = data.import_data(set= 'test') coins = ['ETH', 'XRP','LTC', 'DASH', 'XMR'] Y_pred = (Y*0) + 1 Y_pred.to_csv(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) + '/csvs/baseline_test_extra.csv')
import os import time import numpy as np import pandas as pd import torch from torch.autograd import Variable from Data.scripts.data import data from Models.Extra.scripts.RNN import RNN from Models.Evaluation.eval import eval_model from statsmodels.tsa.api import VAR from sklearn.metrics import mean_squared_error, roc_auc_score, roc_curve, auc X_train_df, Y_train_df = data.import_data(set='train') X_train_matrix = X_train_df.as_matrix() Y_train_matrix = Y_train_df.as_matrix() #use just the returns, no other data columns = [ 'XMRspread', 'XMRvolume', 'XMRbasevolume', 'XRPspread', 'XRPvolume', 'XRPbasevolume', 'LTCspread', 'LTCvolume', 'LTCbasevolume', 'DASHspread', 'DASHvolume', 'DASHbasevolume', 'ETHspread', 'ETHvolume', 'ETHbasevolume' ] X_train_df.drop(columns, 1, inplace=True) VAR_model = VAR(X_train_df) results = VAR_model.fit(1)