Пример #1
0
def load_data():

    #import data
    X, Y = data.import_data(set='train')

    #do not plug in returns, but residuals
    #plug in residuals
    VAR_model = VAR(X)

    results = VAR_model.fit(1)
    ar_returns = results.fittedvalues

    #columns to drop from dataframe
    columns = [
        'XMRspread', 'XMRvolume', 'XMRbasevolume', 'XRPspread', 'XRPvolume',
        'XRPbasevolume', 'LTCspread', 'LTCvolume', 'LTCbasevolume',
        'DASHspread', 'DASHvolume', 'DASHbasevolume', 'ETHspread', 'ETHvolume',
        'ETHbasevolume'
    ]
    ar_returns.drop(columns, 1, inplace=True)

    X = X.loc[ar_returns.index]
    x_returns = X[ar_returns.columns]
    residual_df = x_returns - ar_returns
    X = X.join(residual_df, how='inner', rsuffix='residual')

    y_ar_returns = ar_returns
    y_ar_returns.columns = Y.columns
    Y = (Y.loc[X.index] - y_ar_returns.shift(-1)).dropna()
    X = X.loc[Y.index]

    x = X.as_matrix()
    y = Y.as_matrix()

    return x, y, X, Y
def fit_VAR(results, set_str):

    X_test, Y_test = data.import_data(set=set_str)

    # predict on test set
    predictions_test = np.zeros((X_test.shape[0], X_test.shape[1]))
    # turn into numpy array
    X_test_matrix = X_test.values
    # predict one-step ahead out-of-sample
    for i in range(0, X_test.shape[0]):
        try:
            predictions_test[i] = results.forecast(X_test_matrix[(i), :].reshape(1, 20), steps=1)
        except:
            pass

    # Turn back into panda dataframe and save to csv
    Test_pred = pd.DataFrame(data=predictions_test, index=X_test.index, columns=X_test.columns)
    columns = ['XMRspread', 'XMRvolume', 'XMRbasevolume', 'XRPspread', 'XRPvolume', 'XRPbasevolume', 'LTCspread',
               'LTCvolume', 'LTCbasevolume', 'DASHspread', 'DASHvolume', 'DASHbasevolume', 'ETHspread', 'ETHvolume',
               'ETHbasevolume']
    Test_pred.drop(columns, 1, inplace=True)

    Y = Y_test

    Y_pred = Test_pred

    flat_pred = np.clip(Y_pred.as_matrix().flatten() + 0.5, 0, 1)

    flat_actual = np.where(Y.as_matrix().flatten() > 0, 1, 0)

    auc = roc_auc_score(flat_actual, flat_pred)

    mse = mean_squared_error(Y.as_matrix(), Y_pred.as_matrix())

    return Test_pred, auc, mse
Пример #3
0
def run_model(model_name, hidden_size):

    # import data
    # X, Y = data.import_data(set='cross_val')
    X, Y = data.import_data(set='train')

    # do not plug in returns, but residuals
    # plug in residuals
    VAR_model = VAR(X)

    results = VAR_model.fit(1)
    ar_returns = results.fittedvalues

    # columns to drop from dataframe
    columns = ['XMRspread', 'XMRvolume', 'XMRbasevolume', 'XRPspread', 'XRPvolume', 'XRPbasevolume', 'LTCspread',
               'LTCvolume', 'LTCbasevolume', 'DASHspread', 'DASHvolume', 'DASHbasevolume', 'ETHspread', 'ETHvolume',
               'ETHbasevolume']
    ar_returns.drop(columns, 1, inplace=True)

    X = X.loc[ar_returns.index]
    x_returns = X[ar_returns.columns]
    residual_df = x_returns - ar_returns
    X = X.join(residual_df, how='inner', rsuffix='residual')

    y_ar_returns = ar_returns
    y_ar_returns.columns = Y.columns
    Y = (Y.loc[X.index] - y_ar_returns.shift(-1)).dropna()
    y_ar_returns = y_ar_returns.shift(-1).dropna()
    X = X.loc[Y.index]

    x = X.as_matrix()
    y = Y.as_matrix()

    # set preditcion matrix
    y_pred = np.zeros(shape=y.shape)

    # set model
    model = RNN(hidden_size=hidden_size, input_size=len(X.iloc[0:1].values[0]), output_size=len(Y.iloc[0:1].values[0]))
    model.load_state_dict(
        torch.load(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) +
                   '/model_params/{}.pth.tar'.format(model_name)))

    for iter in range(len(x)):
        input = Variable(torch.from_numpy(x[iter]).float())

        output = model.forward(input)

        y_pred[iter] = output.data.numpy()

    y_pred = y_pred + y_ar_returns.as_matrix()

    Y_pred = pd.DataFrame(data=y_pred, index=Y.index, columns=Y.columns)

    return Y_pred, Y
Пример #4
0
import statsmodels as sm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from statsmodels.tsa.api import VAR
from Data.scripts.data import data

X,Y = data.import_data(set= 'train')
X_test,Y_test = data.import_data(set= 'test')

VAR_model = VAR(X)

results = VAR_model.fit(1)
predictions = results.fittedvalues

#save the 1-order VAR model
results.save("One_order_VARmodel.pickle")

# initialize predict on test set
predictions_test = np.zeros((X_test.shape[0],X_test.shape[1]))
predictions_test_stress = np.zeros((X_test.shape[0],X_test.shape[1]))

# turn into numpy array
X_test_matrix = X_test.values

# predict one-step ahead out-of-sample
for i in range(0,X_test.shape[0]):
    predictions_test[i] = results.forecast(X_test_matrix[i,:].reshape(1,20), steps=1)

# stress test for VAR
Пример #5
0
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable

from Data.scripts.data import data
# from Models.RNN.scripts.SimpleRNN import SimpleRNN
from Models.RNN.scripts.LayerRNN import LayerRNN

torch.manual_seed(1)

#import data
X,Y = data.import_data(set= 'train')
x = X.as_matrix()
y = Y.as_matrix()

#set model, loss, and optimization
hidden_size = 10
# optim_string = 'SGD'
optim_string = 'SGDM'
model_string = 'EXTRA_Mom_Layer1_hiddenfor'
# model_string = 'Simple RNN'
n_epochs = 300
learning = 10
lr = learning * 10e-3
# model = SimpleRNN(hidden_size= hidden_size, input_size=len(X.iloc[0:1].values[0]), output_size= len(Y.iloc[0:1].values[0]))
model = LayerRNN(hidden_size= hidden_size, input_size=len(X.iloc[0:1].values[0]), output_size= len(Y.iloc[0:1].values[0]))
criterion = nn.MSELoss()
    Y = Y_test

    Y_pred = Test_pred

    flat_pred = np.clip(Y_pred.as_matrix().flatten() + 0.5, 0, 1)

    flat_actual = np.where(Y.as_matrix().flatten() > 0, 1, 0)

    auc = roc_auc_score(flat_actual, flat_pred)

    mse = mean_squared_error(Y.as_matrix(), Y_pred.as_matrix())

    return Test_pred, auc, mse

X_train_df, Y_train_df = data.import_data(set='train')
X_train_matrix = X_train_df.as_matrix()
Y_train_matrix = Y_train_df.as_matrix()

X_dev_df, Y_dev_df = data.import_data(set='cross_val')
X_dev_matrix = X_dev_df.as_matrix()
Y_dev_matrix = Y_dev_df.as_matrix()

VAR_model = VAR(X_train_df)

results = VAR_model.fit(1)


Y_train_pred_df, train_auc, train_mse = fit_VAR(results, 'train')

Y_dev_pred_df, dev_auc, dev_mse = fit_VAR(results, 'cross_val')
Пример #7
0
    metrics['return'] = strat_series[-1]

    risk_free = 0

    metrics['sharpe'] = (
        (strat_series[-1] - 1) - risk_free) / (np.std(strat_series))

    metrics['max_drawdown'] = (1 -
                               strat_series.div(strat_series.cummax())).max()

    return metrics


# X,Y = data.import_data(set= 'train')
X, Y = data.import_data(set='cross_val')

# Y_pred = pd.read_csv(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) + '/csvs/y_pred.csv', index_col= 0)
Y_pred = pd.read_csv(
    os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) +
    '/csvs/y_pred_cross_val.csv',
    index_col=0)

coins = ['ETH', 'XRP', 'LTC', 'DASH', 'XMR']

strat_series = (run_strategy(Y_pred=Y_pred, Returns_df=Y))

strat_series = strat_series.cumprod()

print(strat_metrics(strat_series))
Пример #8
0
import os
import time

import numpy as np
import pandas as pd
import torch
from torch.autograd import Variable

from Data.scripts.data import data
from Models.Extra.scripts.RNN import RNN
from Models.Evaluation.eval import eval_model
from statsmodels.tsa.api import VAR
from sklearn.metrics import mean_squared_error, roc_auc_score, roc_curve, auc

X_test, Y_test = data.import_data(set='test')

Y_pred_df = ((-1) * Y_test.shift(1)).dropna()

Y_test = Y_test.loc[Y_pred_df.index]

check_model = eval_model(y_pred_df=Y_pred_df, y_actual_df=Y_test)

check_model.backtest(printer=False)

check_model.accuracy(printer=False)

dev_metrics_dict = check_model.metrics

dev_acc_score = check_model.accuracy_score
Пример #9
0
__author__ = 'Ian'

import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
from Data.scripts.data import data
from sklearn.metrics import mean_squared_error, roc_auc_score, roc_curve, auc
from Models.Evaluation.eval import eval_model

_, Y_actual = data.import_data(set='test')

y_pred_df = Y_actual.shift(1)

y_pred_df = y_pred_df.dropna()

Y_actual = Y_actual.loc[y_pred_df.index]

tester = eval_model(y_pred_df=y_pred_df, y_actual_df=Y_actual)

tester.backtest(printer=False)

out_dict = tester.metrics

out_dict['mse'] = mean_squared_error(Y_actual.as_matrix(),
                                     y_pred_df.as_matrix())

flat_pred = np.clip(y_pred_df.as_matrix().flatten() + 0.5, 0, 1)

flat_actual = np.where(Y_actual.as_matrix().flatten() > 0, 1, 0)
Пример #10
0
__author__ = 'Ian'

import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import os
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
from Data.scripts.data import data
from pandas.tools.plotting import autocorrelation_plot

X,Y = data.import_data(set= 'test')

coins = ['ETH', 'XRP','LTC', 'DASH', 'XMR']

Y_pred = (Y*0) + 1

Y_pred.to_csv(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) + '/csvs/baseline_test_extra.csv')
Пример #11
0
import os
import time

import numpy as np
import pandas as pd
import torch
from torch.autograd import Variable

from Data.scripts.data import data
from Models.Extra.scripts.RNN import RNN
from Models.Evaluation.eval import eval_model
from statsmodels.tsa.api import VAR
from sklearn.metrics import mean_squared_error, roc_auc_score, roc_curve, auc

X_train_df, Y_train_df = data.import_data(set='train')
X_train_matrix = X_train_df.as_matrix()
Y_train_matrix = Y_train_df.as_matrix()

#use just the returns, no other data
columns = [
    'XMRspread', 'XMRvolume', 'XMRbasevolume', 'XRPspread', 'XRPvolume',
    'XRPbasevolume', 'LTCspread', 'LTCvolume', 'LTCbasevolume', 'DASHspread',
    'DASHvolume', 'DASHbasevolume', 'ETHspread', 'ETHvolume', 'ETHbasevolume'
]
X_train_df.drop(columns, 1, inplace=True)

VAR_model = VAR(X_train_df)

results = VAR_model.fit(1)