示例#1
0
def VAR_IRF(df, n=10, future=20):
    m = VAR(df)
    m.select_order(n)
    n = int(input('order:'))
    model = m.fit(maxlags=n)
    print('\n\n', model.summary())
    model.irf(10).plot()
 def decide_degree_best(self):
     # make a VAR model
     model = VAR(self.X)
     model.select_order(15)
     
     # determine the optimal VAR model order using AIC
     print(model.select_order(15))
     results = model.fit(maxlags=15, ic='aic')
     print(results.summary())
def vector_autoregression_example():
	mdata = sm.datasets.macrodata.load_pandas().data

	# Prepare the dates index.
	dates = mdata[['year', 'quarter']].astype(int).astype(str)
	quarterly = dates['year'] + 'Q' + dates['quarter']
	quarterly = dates_from_str(quarterly)
	mdata = mdata[['realgdp', 'realcons', 'realinv']]
	mdata.index = pd.DatetimeIndex(quarterly)
	data = np.log(mdata).diff().dropna()

	# Make a VAR model.
	model = VAR(data)

	results = model.fit(2)
	print(results.summary())

	# Plots input time series.
	results.plot()

	# Plots time series autocorrelation function.
	results.plot_acorr()

	# Lag order selection.
	model.select_order(15)
	results = model.fit(maxlags=15, ic='aic')

	# Forecast.
	lag_order = results.k_ar
	results.forecast(data.values[-lag_order:], 5)

	results.plot_forecast(10)

	# Impulse response analysis.
	# Impulse responses are the estimated responses to a unit impulse in one of the variables.
	# They are computed in practice using the MA(infinity) representation of the VAR(p) process.
	irf = results.irf(10)

	irf.plot(orth=False)
	irf.plot(impulse='realgdp')
	irf.plot_cum_effects(orth=False)

	# Forecast error variance decomposition (FEVD).
	fevd = results.fevd(5)
	print(fevd.summary())

	results.fevd(20).plot()

	# Statistical tests.

	# Granger causality.
	results.test_causality('realgdp', ['realinv', 'realcons'], kind='f')
	# Normality.
	results.test_normality()
	# Whiteness of residuals.
	results.test_whiteness()
示例#4
0
def VARprocess(df, log=False):
    # Log transformation, relative difference and drop NULL values
    if (log):
        df = np.log(df + 0.1).diff().dropna()
    # Vector Autoregression Process generation
    maxAttr = len(df.columns)
    # Find the right lag order
    orderFound = False
    while orderFound != True:
        try:
            model = VAR(df.ix[:, 0:maxAttr])
            order = model.select_order()
            orderFound = True
        except:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            if str(exc_obj) == "data already contains a constant.":
                maxAttr = maxAttr - 1
            else:
                maxAttr = int(str(exc_obj).split("-th")[0]) - 1
            print "Exception, reducing to n_attributes ", maxAttr
            orderFound = False

    n_lags = max(order.iteritems(), key=operator.itemgetter(1))[1]
    method = max(order.iteritems(), key=operator.itemgetter(1))[0]
    print "n_lags ", n_lags
    print "method ", method
    results = model.fit(maxlags=n_lags, ic=method)
    return results
示例#5
0
def VARprocess(df,log=False):
    # Log transformation, relative difference and drop NULL values
    if (log):    
        df = np.log(df+0.1).diff().dropna()
    # Vector Autoregression Process generation     
    maxAttr = len(df.columns) 
    # Find the right lag order
    orderFound = False
    while orderFound!=True:   
        try:
            model = VAR(df.ix[:,0:maxAttr])
            order = model.select_order() 
            orderFound = True
        except:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            if str(exc_obj)=="data already contains a constant.":
                maxAttr = maxAttr - 1
            else:
                maxAttr = int(str(exc_obj).split("-th")[0])-1
            print "Exception, reducing to n_attributes ",maxAttr
            orderFound = False
 
    n_lags = max(order.iteritems(), key=operator.itemgetter(1))[1]
    method = max(order.iteritems(), key=operator.itemgetter(1))[0]
    print "n_lags ",n_lags
    print "method ",method    
    results = model.fit(maxlags=n_lags, ic=method)
    return results
def compute_pair_metrics(security, candidates):
    security = security.div(security.iloc[0])
    ticker = security.name
    candidates = candidates.div(candidates.iloc[0])
    spreads = candidates.sub(security, axis=0)
    n, m = spreads.shape
    X = np.ones(shape=(n, 2))
    X[:, 1] = np.arange(1, n + 1)
    drift = ((
        np.linalg.inv(X.T @ X) @ X.T @ spreads).iloc[1].to_frame('drift'))
    vol = spreads.std().to_frame('vol')
    corr_ret = (candidates.pct_change().corrwith(
        security.pct_change()).to_frame('corr_ret'))
    corr = candidates.corrwith(security).to_frame('corr')
    metrics = drift.join(vol).join(corr).join(corr_ret).assign(n=n)
    tests = []
    for candidate, prices in candidates.items():
        df = pd.DataFrame({'s1': security, 's2': prices})
        var = VAR(df.values)
        lags = var.select_order()  # select VAR order
        k_ar_diff = lags.selected_orders['aic']
        # Johansen Test with constant Term and estd. lag order
        cj0 = coint_johansen(df, det_order=0, k_ar_diff=k_ar_diff)
        # Engle-Granger Tests
        t1, p1 = coint(security, prices, trend='c')[:2]
        t2, p2 = coint(prices, security, trend='c')[:2]
        tests.append([ticker, candidate, t1, p1, t2, p2, k_ar_diff, *cj0.lr1])
    columns = [
        's1', 's2', 't1', 'p1', 't2', 'p2', 'k_ar_diff', 'trace0', 'trace1'
    ]
    tests = pd.DataFrame(tests, columns=columns).set_index('s2')
    return metrics.join(tests)
示例#7
0
    def deocmpose(self):
        dataset = pd.read_csv('Raotbl6.csv', index_col=0)
        num_vars = dataset.shape[1]
        fig, axes = pyplot.subplots(num_vars, 1, figsize=(16, 12))
        for i in range(num_vars):
            col = dataset.columns[i]
            axes[i].plot(dataset[col])
            axes[i].set_xticks([], [])
            axes[i].set_title(col)

        # Apply VAR model to data set, summarize
        model = VAR(dataset)
        var_selected = model.select_order(maxlags=10)
        print(var_selected.summary())

        # Select model with highest AIC value
        model_fitted = model.fit(1)
        print(model_fitted.summary())
        lag_order = model_fitted.k_ar
        forecast_input = dataset.values[-lag_order:]
        quarters_to_predict = 24
        predicted_values = model_fitted.forecast(forecast_input,
                                                 quarters_to_predict)
        dataset_dates = [
            datetime.datetime.strptime(date, '%Y-%m-%d')
            for date in dataset.index
        ]
        last_date = dataset_dates[-1]
        predicted_dates = []
        for _ in range(quarters_to_predict):
            next_date = last_date + dateutil.relativedelta.relativedelta(
                months=3)
            predicted_dates.append(next_date)
            last_date = next_date
        predicted_index = [
            date.strftime('%Y-%m-%d') for date in predicted_dates
        ]

        fig, axes = pyplot.subplots(num_vars, 1, figsize=(16, 12))
        for i in range(num_vars):
            col = dataset.columns[i]
            axes[i].plot(dataset[col], color='blue')
            axes[i].plot(predicted_index,
                         predicted_values[:, i],
                         color='green')
            axes[i].set_xticks([], [])
            axes[i].set_title(col)
        pyplot.show()

        fit = pm.auto_arima(dataset['gdfce'],
                            seasonal=True,
                            stepwise=True,
                            error_action='ignore',
                            m=12,
                            max_order=6)
        print(fit.summary())
示例#8
0
def causality_VAR(post_ts, max_order):
    
    model =  VAR(post_ts)
    best_lag = model.select_order(max_order, verbose= False)
    
    print 'best lag: ', best_lag
    
    result = model.fit(best_lag['aic'])
    
    return result, best_lag
示例#9
0
def forecast_DNS_VARm(ts,pred):
    model = VAR(ts)
    x = model.select_order(maxlags=3)
    lag_order = x.selected_orders["bic"] #we select best model based on the BIC criterion
    if lag_order==0:    #constrains not turning into a random walk
        lag_order=1
    model_fitted = model.fit(lag_order)
        
    
    return model_fitted.forecast(ts.values[-lag_order:],pred)
示例#10
0
文件: models.py 项目: ruzzzzz/Citrus
    def var(self, df, host):

        df_diffed, no_diffs = Helper.diff_test(df)

        print(df_diffed)
        df_diffed.replace([np.inf, -np.inf], np.nan)
        cols = df_diffed.columns
        df_diffed = df_diffed.dropna()

        print("Length  : " + str(len(df_diffed)))
        nobs = int(len(df_diffed) / 10) + 2
        train = df_diffed[:-nobs]
        test = df_diffed[-nobs:]
        #print(train)
        model = VAR(train)

        maxlags = int(nobs / 2) + 1

        aic = model.select_order(maxlags).selected_orders['aic']

        results = model.fit(aic)
        print(results.summary())

        lagged_values = train.values[-maxlags:]
        #print(lagged_values)
        forecast = results.forecast(y=lagged_values, steps=nobs)

        idx = pd.date_range(test.first_valid_index(), periods=nobs)
 
        df_forecast = pd.DataFrame(data=forecast, index = idx, columns=cols)
        #print(df_forecast)

        df_fixed = Helper.reverse_diff(df_forecast, df, nobs, no_diffs)

        


        test_range = df[-nobs:]
        print("-- TEST Result -- \n")
        print(test_range)
        print("-- TEST Result END -- \n")
        print("-- Forecast Result -- \n")
        print(df_fixed)
        print("-- Forecast Result END -- \n")

        for col in df.columns:
            print("-- RMSE --")
            print(rmse(test_range[col], df_fixed[col + '_forecast']))
            print("-- Mean --")
            print(test_range[col].mean())
            df[col].plot(legend=True)
            df_fixed[col + '_forecast'].plot(legend=True)
            plt.show()
def get_optimal_lag_exper(p_src_index, src_neighbor_indices,
                          normalized_cells_response_curve):
    from statsmodels.tsa.api import VAR

    #get the src neighbors
    number_of_points = len(src_neighbor_indices)

    optimal_lag_vector = dict()

    for p_dst_index in src_neighbor_indices:
        src_dst_data = None
        try:
            src_dst_data = normalized_cells_response_curve[
                [p_src_index, p_dst_index], :]
            src_dst_data = np.transpose(src_dst_data)
            model = VAR(src_dst_data)
            maxlags = None

            lag_order_results = model.select_order(maxlags=maxlags)

            lags = [
                lag_order_results.aic, lag_order_results.bic,
                lag_order_results.fpe, lag_order_results.hqic
            ]

            min_i = np.argmin(lags)

            model = model.fit(maxlags=lags[min_i], ic=None)

            p_value_whiteness = model.test_whiteness(nlags=lags[min_i]).pvalue

            if p_value_whiteness == float('nan') or p_value_whiteness < 0.05:
                raise ValueError('found autocorrelation in residuals.')

                #i = models[min_i].k_ar + 1
                #while i < 12 * (models[min_i].nobs/100.)**(1./4):
                #    result_auto_co = model._estimate_var(i,  trend='c')
                #    if result_auto_co.test_whiteness(nlags=i).pvalue > 0.05:
                #        break
                #    i += 1

                #    print 'error order:' + str(models[min_i].k_ar)
                #    print 'found correlation ' + str(i)

            optimal_lag_vector[p_dst_index] = lags[min_i]
        except:
            print('src index: ' + str(p_src_index) + ' dst index: ' +
                  str(p_dst_index))
            if src_dst_data is not None:
                print(src_dst_data)
            raise

    return optimal_lag_vector
def get_optimal_lag(p_src_index, neighbor_indices,
                    normalized_cells_response_curve):
    #get the src neighbors
    number_of_points = len(neighbor_indices)

    src_neighbor_indices = neighbor_indices[p_src_index]

    optimal_lag_vector = np.zeros((number_of_points))

    for p_dst_index in src_neighbor_indices:
        #find the common neighbours
        dst_neighbor_indices = neighbor_indices[p_dst_index]
        disjoint_neighbours = get_disjoint_neighbours(p_src_index, p_dst_index,
                                                      neighbor_indices)

        src_dst_data = normalized_cells_response_curve[
            [p_src_index, p_dst_index], :]
        src_dst_data = np.transpose(src_dst_data)
        model = VAR(src_dst_data)
        maxlags = None

        lag_order_results = model.select_order(maxlags=maxlags)

        lags = [
            lag_order_results.aic, lag_order_results.bic,
            lag_order_results.fpe, lag_order_results.hqic
        ]

        min_i = np.argmin(lags)

        model = model.fit(maxlags=lags[min_i], ic=None)

        if model.test_whiteness(nlags=lags[min_i]).pvalue < 0.05:
            raise ValueError('found autocorrelation in residuals.')

            #i = models[min_i].k_ar + 1
            #while i < 12 * (models[min_i].nobs/100.)**(1./4):
            #    result_auto_co = model._estimate_var(i,  trend='c')
            #    if result_auto_co.test_whiteness(nlags=i).pvalue > 0.05:
            #        break
            #    i += 1

            #    print 'error order:' + str(models[min_i].k_ar)
            #    print 'found correlation ' + str(i)

        optimal_lag_vector[p_dst_index] = lags[min_i]

        break

    return optimal_lag_vector
示例#13
0
def get_optimal_lag(p_src_index, neighbor_indices,
                    normalized_cells_response_curve):
    #get the src neighbors
    number_of_points = len(neighbor_indices)

    src_neighbor_indices = neighbor_indices[p_src_index]

    optimal_lag_vector = np.zeros((number_of_points))

    for p_dst_index in src_neighbor_indices:
        src_dst_data = normalized_cells_response_curve[
            [p_src_index, p_dst_index], :]
        src_dst_data = np.transpose(src_dst_data)
        model = VAR(src_dst_data)
        maxlags = None

        lag_order_results = model.select_order(maxlags=maxlags)

        lags = [
            lag_order_results.aic, lag_order_results.bic,
            lag_order_results.fpe, lag_order_results.hqic
        ]

        min_i = np.argmin(lags)

        var_result = model.fit(maxlags=lags[min_i], ic=None)

        portmanteau_test = var_result.test_whiteness(lags[min_i]).pvalue
        if portmanteau_test < 0.05:
            raise ValueError('found autocorrelation in residuals.' +
                             str(portmanteau_test))
            '''                        
            i = lags[min_i] + 1
            while i < 12 * (model.nobs/100.)**(1./4):                
                var_result = model.fit(i, ic=None)
                if var_result.test_whiteness(max(10, i + 1)).pvalue >= 0.05:
                    break
                i += 1
                
                #print('error order:' + str(lags[min_i]))
                #print('found correlation ' + str(i))

            optimal_lag_vector[p_dst_index] = i    
        
            else:
            '''
        optimal_lag_vector[p_dst_index] = lags[min_i]

    return optimal_lag_vector
示例#14
0
    def select_order_of_VAR_model(self):
        model = VAR(self.df)
        print("\n*********checking different orders of lag************\n")
        for i in [1, 2, 3, 4, 5, 6, 7, 8, 9]:
            result = model.fit(i)
            print('Lag Order =', i)
            print('AIC : ', result.aic)
            print('BIC : ', result.bic)
            print('FPE : ', result.fpe)
            print('HQIC: ', result.hqic, '\n')

        #alternative
        print("\n*********select_order method used: ************\n")
        x = model.select_order(maxlags=self.max_lags)
        print(x.summary())
示例#15
0
def VARprocess(df, log=False):
    """
    Description: This function applies Vector Auto Regression
    Input: dataframe
    Output: VARresults object
    """
    # Log transformation, relative difference and drop NULL values
    if (log):
        df = np.log(df + 0.1).diff().dropna()
    # Vector Autoregression Process generation
    maxAttr = len(df.columns)
    # Find the right lag order
    orderFound = False
    print "7.1.0 ----- Finding an order for the VAR"
    maxIter = 0
    while orderFound != True and maxIter < 15:
        maxIter = maxIter + 1
        try:
            model = VAR(df)
            order = model.select_order()
            orderFound = True
            print " !!! loop stuck"
        except:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            #if str(exc_obj)=="data already contains a constant.":
            maxAttr = maxAttr - 1
            #else:
            #maxAttr = int(str(exc_obj).split("-th")[0])-1
            #print "Exception, reducing to n_attributes ",maxAttr
            orderFound = False
    print "7.1.1 ----- Model fitting"
    if orderFound:
        n_lags = max(order.iteritems(), key=operator.itemgetter(1))[1]
        method = max(order.iteritems(), key=operator.itemgetter(1))[0]
        results = model.fit(maxlags=n_lags, ic=method)
    else:
        results = model.fit()
    return results
示例#16
0
def temporal_detect_individual(target_idx, dta, maxlag):
    
    num_ts = len(dta[0])
    len_ts = len(dta)
     
    tmp_target = [ dta[j][target_idx] for j in range(len_ts) ] 
    
    res_lag = []
    
    for i in range(num_ts):
        if i != target_idx:
            
            tmp_ts = [ dta[j][i] for j in range(len_ts) ]
            tmp_x = zip(tmp_target, tmp_ts )
            
#             print np.shape(tmp_x)
            
            model =  VAR(tmp_x)
            best_lag = model.select_order(maxlag, verbose= False)
            
            res_lag.append(best_lag)
    
    return res_lag 
def multi_forecast(
        sd_log, variables,
        n_period):  # sd_log object, variables list of features column names
    """

    :param sd_log: sd_log object
    :param variables: features you would like to use for the multivariate forecast
    :param n_period: steps you would like to predict
    :return:
    """
    max_lag = 5
    # Check for stationary
    if sd_log.isStationary:
        data = sd_log.data[variables]
    else:
        data = sd_log.data_diff[0][variables]
        ndiff = sd_log.data_diff[1]

    #  Split into train (0.9) and test (0.1)
    #data_train = data[:int(0.9*(len(data)))]
    #data_test = data[int(0.9*(len(data))):]
    model = VAR(data)
    # Look for minimum AIC/BIC and corresponding lag to fit model
    lag = min(model.select_order(maxlags=max_lag).selected_orders.values())
    results = model.fit(lag)
    print(results.summary())
    var_diagnostic(results)
    results.plot_forecast(n_period)
    lag_order = results.k_ar
    fc = results.forecast(data.values[-lag_order:], n_period)
    df_fc = pd.DataFrame(fc, index=data.index[-n_period:])
    # TODO inverting forecast
    #inv_diff(sd_log.data[sd_log.finish_rate], data[sd_log.finish_rate], ndiff)
    plt.show()

    return df_fc
示例#18
0
import pandas as pd
from statsmodels.tsa.api import VAR
from  statsmodels.tsa.vector_ar.vecm import select_order

series = (
    pd.read_csv("../dados/series_log.csv", parse_dates=["date"], index_col=["date"])
    .loc[:, ["spread", "selic", "inad", "ibc"]]
    .dropna()
)

var = VAR(endog=series)
var_model = var.fit(maxlags=4, verbose=True)
print(var_model.test_whiteness(nlags=12).summary())

print(var.select_order(12).summary())

print(" & ".join(series.columns))
for linha in series.columns:
    resultados = []
    for coluna in series.columns:
        test = var_model.test_causality(caused=linha, causing=coluna, kind="wald")
        if coluna == linha:
            resultados.append(" - & - ")
        else:
            resultados.append(f"{test.test_statistic:.3f} & {test.pvalue:.3f}")
    print(linha + " & " + " & ".join(resultados))

total = []
for linha in series.columns:
    resultados_total = var_model.test_causality(
        causing=[serie for serie in series.columns if serie != linha],
示例#19
0
print(grangers_causation_matrix(dataFrame, variables=dataFrame.columns))
cointegration_test(dataFrame)

# select the order of VAR model
model = VAR(df_differenced)

for i in range(1, 10):
    result = model.fit(i)
    print('Lag Order =', i)
    print('AIC : ', result.aic)
    print('BIC : ', result.bic)
    print('FPE : ', result.fpe)
    print('HQIC: ', result.hqic, '\n')

x = model.select_order(maxlags=10)
print(x.summary())

model_fitted = model.fit(10)
print(model_fitted.summary())

# use Durbin Watson Statistic to check for errors
out = durbin_watson(model_fitted.resid)

for col, val in zip(dataFrame.columns, out):
    print(adjust(col), ':', round(val, 2))

# get the lag order
lag_order = model_fitted.k_ar
print(lag_order)
pred_garch.residual_variance[-1:]

# ### Multvariate Regression Model

# In[59]:

from statsmodels.tsa.api import VAR

# In[60]:

df_ret = df[['ret_spx', 'ret_dax', 'ret_ftse', 'ret_nikkei']][1:]

# In[61]:

model_var_ret = VAR(df_ret)
model_var_ret.select_order(20)
results_var_ret = model_var_ret.fit(ic='aic')

# In[62]:

results_var_ret.summary()

# In[63]:

lag_order_ret = results_var_ret.k_ar
var_pred_ret = results_var_ret.forecast(df_ret.values[-lag_order_ret:],
                                        len(df_test[start_date:end_date]))

df_ret_pred = pd.DataFrame(data=var_pred_ret,
                           index=df_test[start_date:end_date].index,
                           columns=df_test[start_date:end_date].columns[4:8])
示例#21
0
def optimal_lag(data, maxlag):
    model = VAR(data)
    result = model.select_order(maxlag)
    return result.aic
示例#22
0
    return model, loss.data, loss_rmse, training_hist


def predict(model, inputs):
    
    outputs = model.forward(inputs)
    
    return outputs


#QuantAR
import statsmodels.formula.api as smf
mod = smf.quantreg('y ~ x', data)
res = mod.fit(q=.5) print(res.summary())

#VAR
import statsmodels.api as sm

from statsmodels.tsa.api import VAR, DynamicVAR
model = VAR(data)
results = model.fit(2)
model.select_order(15)
results = model.fit(maxlags=15, ic='aic')

lag_order = results.k_ar

results.forecast(data.values[-lag_order:], 5)


示例#23
0
obs
trainset.tail()
# testset.head()
#using trainset to modeling var
from statsmodels.tsa.api import VAR
help(VAR)
modelvar = VAR(trainset)

bestmodelaic =modelvar.fit(maxlags =15,ic = 'aic')
bestmodelaic.summary()
bestmodelbic = modelvar.fit(maxlags =15,ic = 'bic')
bestmodelbic.summary()
bestmodelhqic = modelvar.fit(maxlags =15,ic ='hqic')
bestmodelhqic.summary()

modelvar.select_order(15)
#using CV to choose the best model
def rolling_forecast(trainset,testset,lags):

    Pmse = []
    forecastreturn = []
    accuracys = []
    ntest = len(testset)
    for i in range(0,ntest):
        if i == 0:
            X_in = trainset
        else:
            X_in = trainset.append(testset.iloc[:i,:])

        X_out = testset.iloc[i,0]
示例#24
0
    columns=a.columns)  # re-applying column names

# plt.figure(figsize=(15, 5))
# plt.ylabel("Returns")
# plt.plot(a_returns)
# plt.show()

# plt.figure(figsize=(15, 5))
# plt.ylabel("Log Value")
# plt.plot(a_ts)
# plt.show()

from statsmodels.tsa.api import VAR

model = VAR(a_diff[:'2016-01-01'])
model.select_order(
)  # uses information criteria to select the order of the model
reg = model.fit(5)  # number of AR terms to include

sample = a_diff[:'2016-01-04'].values
fcast = reg.forecast(y=sample, steps=10)

# plt.plot(fcast[:,3])
reg.plot_forecast(20)


def dediff(end, forecast):
    future = np.copy(forecast)
    for i in range(np.shape(forecast)[0]):
        if (i == 0):
            future[i] = end + forecast[0]
        else:
示例#25
0
                regression="c",
                autolag="AIC")
            if p_val < 0.05:
                unit_roots.append(fw_1w_prices)

    i = 1
    for fw_1w_prices_1 in unit_roots:
        ## Set up nested for loop like this to avoid testing twice for cointegration on any two pairs
        for fw_1w_prices_2 in unit_roots[i:]:
            if not fw_1w_prices_1.equals(fw_1w_prices_2):
                grouped_fw_1w_prices = (pd.concat(
                    (fw_1w_prices_1, fw_1w_prices_2), axis=1))
                ## VAR model
                model = VAR(grouped_fw_1w_prices)
                ## Optimal VAR(p) lag structure
                p = find_max_lag_var(model.select_order(5).summary().data[1:])
                ## Include p-1 lags in cointegration test for control for any existing autocorrelation in u_t (disturbance term)
                coint_result = coint_johansen(grouped_fw_1w_prices, 1,
                                              max(p - 1, 0))
                ## Statistically significant proof of cointegration at 5% level
                if find_coint_relationship(trace_stats=coint_result.lr1,
                                           crit_vals=coint_result.cvm) == 1:
                    coint_relationships.append(grouped_fw_1w_prices)
        i += 1

    if coint_relationships:
        for relationship in coint_relationships:
            fw_1w_prices_1, fw_1w_prices_2 = relationship.iloc[:,
                                                               0], relationship.iloc[:,
                                                                                     1]
            N = len(fw_1w_prices_1.index)
示例#26
0
pd.Series(names).to_hdf('data.h5', 'tickers')

corr = pd.DataFrame(index=stocks.columns)
for etf, data in etfs.items():
    corr[etf] = stocks.corrwith(data)

#cmap = sns.diverging_palette( 220, 10, as_cmap = True)
#sns.clustermap( corr, cmap= cmap, center = 0)

#stocks.shape

security = etfs['AAXJ.US'].loc['2010':'2020']
candidates = stocks.loc['2010':'2020']

security = security.div(security.iloc[0])
candidates = candidates.div(candidates.iloc[0])
spreads = candidates.sub(security, axis=0)

n, m = spreads.shape
X = np.ones(shape=(n, 2))
X[:, 1] = np.arange(1, n + 1)

for candidate, prices in candidates.items():
    df = pd.DataFrame({'s1': security, 's2': prices})
    var = VAR(df.values)
    lags = var.select_order()
    k_ar_diff = lags.selected_orders['aic']
    coint_johansen(df, det_order=0, k_ar_diff=k_ar_diff)
    coint(security, prices, trend='c')[:2]
    coint(prices, security, trend='c')[:2]
示例#27
0
def cross_validation_VAR(train):
    model_VAR = VAR(train)
    x = model_VAR.select_order(maxlags=24)
def create_var_model(training_set):
    var_model = VAR(training_set)
    lag_order = var_model.select_order()
    lag_order_selected = lag_order.selected_orders['aic']
    var_model_results = var_model.fit(lag_order_selected)
    return var_model_results
示例#29
0
def var_forecast(coin, data_stats, train_data, actual_df, nobs, verbose=False):
    """
    This function performs the following:
        - forecast the time-series using VAR
        - durbin watson testing on the residual from the model
        - obtain normaltest, kurtosis and skewness of the residual from the model
        - compute the forecast accuracy

    The number of days forecast is the minimum value between the lag order and the nobs.

    Args:
        coin: The cryptocurrency the time-series belongs to
        data_stats: The data_stats dataframe for storing the durbin_watson, norm_stat, norm_p, kurtosis and skewness value
        train_data: Train data containing the features for VAR forecast
        actual_df: The actual value to be compared against the forecasted results
        nobs: Number of observations to forecast
        verbose: To print the debugging statements

    Returns:
        fitted_df: Dataframe containing residual of the features
        data_stats: Dataframe containing durbin_watson, norm_stat, norm_p, kurtosis and skewness value
        accuracy_prod: measures of accuracy for the forecast
        pred_df: predicted results
    """
    # standardizing features
    scal = StandardScaler()
    df_scaled = pd.DataFrame(
        scal.fit_transform(train_data.values),
        columns=train_data.columns,
        index=train_data.index,
    )

    mod = VAR(df_scaled, freq="D")

    selected_orders = mod.select_order().selected_orders
    max_lag = selected_orders["aic"]
    res = mod.fit(maxlags=max_lag, ic="aic")

    if verbose:
        print(coin, res.summary())

    fitted_df = res.resid.rename(columns={"Returns": "Returns residual"})[
        "Returns residual"
    ]
    # check for auto-correlation of the residual
    out = durbin_watson(res.resid)

    # collect the auto-correlatio results to be loaded into atoti later on
    for col, val in zip(df_scaled.columns, out):

        # get the residual values
        metric = res.resid[col]
        stat, p = stats.normaltest(metric)
        kurtosis = stats.kurtosis(metric)
        skewness = stats.skew(metric)

        data_stats.loc[
            (data_stats["coin_symbol"] == coin) & (data_stats["metric_name"] == col),
            ["durbin_watson", "norm_stat", "norm_p", "kurtosis", "skewness"],
        ] = [val, stat, p, kurtosis, skewness]

        if verbose:
            print(
                "+++++++++++++ data_stats",
                data_stats.loc[
                    (data_stats["coin_symbol"] == coin)
                    & (data_stats["metric_name"] == col)
                ],
            )

            autocorrelation(val)

    # Get the lag order
    lag_order = res.k_ar

    if lag_order > 0:
        # Forecasting
        input_data = df_scaled.values[-lag_order:]
        # take the minimal forecast between the lag order and the number of observations required
        forecast_steps = lag_order if lag_order < nobs else nobs
        pred = res.forecast(y=input_data, steps=forecast_steps)
        pred_transform = scal.inverse_transform(pred)

        # we generate index from the last date for a period equivalent to the size of the forecast
        last_date = df_scaled.tail(1).index.get_level_values("date").to_pydatetime()[0]
        date_indices = pd.date_range(
            start=last_date, periods=(forecast_steps + 1), closed="right"
        )
        pred_df = pd.DataFrame(
            pred_transform,
            index=date_indices,
            columns=df_scaled.columns,
        ).reset_index()

        accuracy_prod = forecast_accuracy(
            pred_df["Returns"].values, actual_df["Returns"][:forecast_steps]
        )
        accuracy_prod = pd.DataFrame(accuracy_prod, index=[coin])
        accuracy_prod["lag_order"] = lag_order
        accuracy_prod["Observations"] = forecast_steps

        if verbose:
            for k, v in accuracy_prod.items():
                print(adjust(k), ": ", v)

        pred_df["coin_symbol"] = coin
        pred_df["Subset"] = "Test"
        pred_df.rename(columns={"index": "date"}, inplace=True)

        fitted_df = fitted_df.reset_index()
        fitted_df["coin_symbol"] = coin
        fitted_df["Subset"] = "Train"
        fitted_df["date"] = fitted_df["date"].apply(lambda x: x.strftime("%Y-%m-%d"))

        return (
            fitted_df,
            data_stats.loc[~data_stats["norm_stat"].isnull()],
            accuracy_prod,
            pred_df,
        )
示例#30
0
# VAR Vector
variables = [
    model_data['total_sales'],
    model_data['inflation'],
    model_data['interest_rate'],
]
vector = np.column_stack(variables)

# Fit a VAR regression.
model = VAR(vector)
results = model.fit(1)
print(results.summary())

# Fit the best in-sample predicting VAR.
model.select_order(6)
results = model.fit(maxlags=6, ic='bic')
print('Best lag order:', results.k_ar)

# Create a forecast. (FIXME:)
# forecast = results.forecast(data.values[-lag_order:], 5)

# Show the data!
forecasts = results.plot_forecast(9)

#-----------------------------------------------------------------------
# Drafts
#-----------------------------------------------------------------------

# Local imports.
# from VAR import VAR, VAR_forecast
示例#31
0
results.plot()

# In[8]:

# Autocorrelation

results.plot_acorr()

# ## Lag order selection

# In[9]:

# Adding maximum lag

model.select_order(5)

# Estimate model with maximum lag according to the AIC criterion

results = model.fit(maxlags=5, ic='aic')
results.summary()

# ## Forecasting

# In[10]:

lag_order = results.k_ar

# Specify the initial value of forecast

results.forecast(data.values[-lag_order:], 5)