Пример #1
0
def coint_test(df,max_lag=60,signif=0.01,deterministic='ci',output = "all",rule ="bic"):

    orders = vecm.select_order(np.array(df),max_lag,deterministic=deterministic)

    k_ar_diff = orders.__getattribute__(rule)

    k_ar_diff = k_ar_diff if k_ar_diff>0 else 1

    results = vecm.coint_johansen(df,0,k_ar_diff)

    possible_signif_values = [0.1, 0.05, 0.01]

    if output == "all":

        trace = np.vstack((results.cvt.T,results.lr1))
        MaxEig = np.vstack((results.cvm.T,results.lr2))

        return {"trace":trace,"MaxEig":MaxEig},k_ar_diff

    elif output == "standard":

        signif_index = possible_signif_values.index(signif)

        # crit_vals = results.cvt
        crit_vals = np.vstack((results.cvt[:,signif_index],results.cvm[:,signif_index])).T

        test_stat = np.vstack((results.lr1,results.lr2)).T

        masks = test_stat>crit_vals

        test_stat = np.round(pd.DataFrame(test_stat,columns=['trace',' Maxeig'],index=np.arange(1,df.shape[1]+1,1)),4)
        return test_stat.where(~masks,test_stat.astype(str)+"*"),orders
    else:
        # for i in range(len(possible_signif_values)):
        #     results.cvt[:,i]<results.lr1

        if any(results.lr1[0]>results.cvt[0]) or any(results.lr2[0]>results.cvm[0]):
            return True,k_ar_diff
        else:
            return False,k_ar_diff
Пример #2
0
#-----------------------------------------------------------------
#6. test for cointergration
result = sm.tsa.stattools.coint(gold_data, stock_data)
print(result)
# p-value =  0.4474903899530853 < 5% 10%
# then we can reject the null hypothesis that there is no cointegrating relationship

#-----------------------------------------------------------------
#7. VAR model
#built data
newdiff = {'goldPrice': gold_diff,'stockIndex': stock_diff}
newdiffSeries = pd.DataFrame(newdiff)
dataframe = newdiffSeries[['goldPrice','stockIndex']]

#choose lag
lag = select_order(dataframe, 12)
print(lag)

#model result
mod = sm.tsa.VAR(dataframe)
fitMod = mod.fit(2)
print(fitMod.summary())

#-----------------------------------------------------------------
#8. granger causality test 
granger_result = grangercausalitytests(dataframe, maxlag=2)
print(granger_result)

#The Null hypothesis for grangercausalitytests is that the time series
#in the second column, x2, does NOT Granger cause the time series in
#the first column, x1.
Пример #3
0
    def select_k_ar_diff(self,maxLag):
        res = vecm.select_order(self.y_t, maxlags=maxLag, deterministic=self.deterministic)
        k_ar_diff = res.__getattribute__(self.k_ar_diff)

        k_ar_diff += 1 if k_ar_diff == 0 else k_ar_diff
        return k_ar_diff
Пример #4
0
from pandas_datareader.data import DataReader
consumer_df = DataReader([
    'PCE', 'UMCSENT', 'UNRATE', 'LCEAMN01USM189S', 'TOTALSL', 'MRTSSM44X72USS',
    'HOUST'
], 'fred', start_data, today)
consumer_df = consumer_df.dropna()
consumer_df.columns = [
    'PCE', 'ConConf', 'Unempl', 'HourlyEarning', 'CCredit', 'RetSales',
    'HouseStarts'
]
consumer_df = consumer_df.resample('1M').mean()
type(consumer_df)

# lag order selection
lag_order = select_order(data=consumer_df,
                         maxlags=10,
                         deterministic="ci",
                         seasons=12)
print(lag_order.summary())
print(lag_order)

# Cointegration rank
rank_test = select_coint_rank(consumer_df, 0, 2, method="trace", signif=0.05)
rank_test.rank
print(rank_test.summary())
print(rank_test)

# Parameter Estimation
model = VECM(consumer_df,
             deterministic="ci",
             seasons=12,
             k_ar_diff=lag_order.aic,
Пример #5
0
def analyze(df, start_time, end_time, symbols):
  file_name = "{}_{}_{}_coint_series".format(
    '_'.join(list(map(lambda s: s.replace('/', '_'), symbols))),
    start_time if isinstance(start_time, int) else start_time.replace(' ', '_'),
    end_time if isinstance(end_time, int) else end_time.replace(' ', '_')
  )

  results = {
    'symbols': str(symbols),
    'ts_start': start_time if isinstance(start_time, int) else string_to_timestamp(start_time),
    'ts_end': end_time if isinstance(end_time, int) else string_to_timestamp(end_time)
  }

  data = df.to_numpy()
  max_order = int(data.shape[0] / 8)
  select_order_res = select_order(data, max_order, deterministic="ci")
  # print(select_order_res.summary())
  selected_aic_order = select_order_res.selected_orders['aic']
  results['selected_order'] = selected_aic_order
  select_coint_rank_result = select_coint_rank(data, 0, selected_aic_order)
  print(select_coint_rank_result.summary())
  selected_coint_rank = 0
  for i in range(len(select_coint_rank_result.test_stats)):
    if select_coint_rank_result.test_stats[i] < select_coint_rank_result.crit_vals[i]:
      selected_coint_rank = i
      break
  results['selected_rank'] = selected_coint_rank
  if selected_coint_rank != 0:
    model = VECM(data, deterministic="ci", k_ar_diff=selected_aic_order, coint_rank=selected_coint_rank)
    res = model.fit()
    results['cointegrated_alpha'] = np.array2string(res.alpha.flatten())
    results['cointegrated_beta'] = np.array2string(res.beta.flatten())
    results['cointegrated_constant'] = res.det_coef_coint.flatten()[0]
    cointegrated_series = np.dot(data, res.beta).flatten() + results['cointegrated_constant']
    cointegrated_mean = np.mean(cointegrated_series)
    results['cointegrated_mean'] = cointegrated_mean
    cointegrated_std = np.std(cointegrated_series)
    results['cointegrated_std'] = cointegrated_std
    max_deviation = np.amax(np.absolute(cointegrated_series - cointegrated_mean))
    results['cointegrated_max_deviation'] = max_deviation

    adf_res = adfuller(cointegrated_series, maxlag=max_order, store=True, regresults=True)
    cointegrated_adf_p_value = adf_res[1]
    results['cointegrated_adf_p_value'] = cointegrated_adf_p_value
    cointegrated_adf_lag = adf_res[3].usedlag
    results['cointegrated_adf_lag'] = cointegrated_adf_lag
    cointegrated_half_life = -math.log(2) / adf_res[3].resols.params[0]
    results['cointegrated_half_life'] = cointegrated_half_life

    fig, ax = plt.subplots()
    ax.plot(df.index.to_numpy(), cointegrated_series)
    path = "figures/{}.png".format(file_name)
    results['cointegrated_series_img_path'] = path
    plt.savefig(path)
  with open("results/{}.txt".format(file_name), "w") as result_file:
    result_file.write("{}".format(results))
  return results

# from dataloader import load_data

# dataframe = load_data('2019-07-28', '2019-07-29', ['BTC', 'ETH'], [])
# print(analyze(dataframe, '2019-07-28', '2019-07-29', ['BTC', 'ETH']))
Пример #6
0
def anomaly_vecm(list_var,
                 num_fut=5,
                 desv_mse=2,
                 train=True,
                 name='model-name'):
    df_var = pd.DataFrame()
    for i in range(len(list_var)):
        df_var['var_{}'.format(i)] = list_var[i]

    # split
    tam_train = int(len(df_var) * 0.7)
    #print tam_train
    df_train = df_var[:tam_train]
    print('Tamanio train: {}'.format(df_train.shape))
    df_test = df_var[tam_train:]

    lag_order = vecm.select_order(data=df_train,
                                  maxlags=10,
                                  deterministic="ci",
                                  seasons=0)
    rank_test = vecm.select_coint_rank(df_train,
                                       0,
                                       3,
                                       method="trace",
                                       signif=0.01)
    print("pasa")
    model = vecm.VECM(df_train,
                      deterministic="ci",
                      seasons=4,
                      coint_rank=rank_test.rank)  # =1
    print("define")
    vecm_res = model.fit()
    futures = vecm_res.predict(steps=len(df_test))
    # lag_order.summary()
    result = []
    for list in futures:
        result.append(list[0])

    engine = engine_output_creation('vecm')
    print("empieza")
    df_test['puntos'] = df_test.index
    df_test['valores'] = df_test[df_var.columns[0]]

    engine.alerts_creation(result, df_test)
    # # print("empieza")

    engine.metrics_generation(df_test[df_test.columns[0]].values, result)
    # print("empieza")

    engine.debug_creation(result, df_test)

    lag_order = vecm.select_order(data=df_var,
                                  maxlags=10,
                                  deterministic="ci",
                                  seasons=4)
    rank_test = vecm.select_coint_rank(df_var,
                                       0,
                                       3,
                                       method="trace",
                                       signif=0.01)
    print("pasa")
    model = vecm.VECM(df_var,
                      deterministic="ci",
                      seasons=4,
                      coint_rank=rank_test.rank)  # =1
    print("define")
    vecm_res = model.fit()
    futures = vecm_res.predict(steps=num_fut)
    # lag_order.summary()
    result = []
    for list in futures:
        result.append(list[0])

    engine.forecast_creation(result, df_var.shape[0], num_fut)

    return (engine.engine_output)
Пример #7
0
# In[7]:

nobs = 15
train_ecm, test_ecm = X[0:-nobs], X[-nobs:]

# Check size
print(train_ecm.shape)
print(test_ecm.shape)

# In[8]:

# VECM model fitting
from statsmodels.tsa.vector_ar import vecm
# pass "1min" frequency
train_ecm.index = pd.DatetimeIndex(train_ecm.index).to_period('1min')
model = vecm.select_order(train_ecm, maxlags=8)
print(model.summary())

# In[10]:

pd.options.display.float_format = "{:.2f}".format
"""definition of det_orderint:
-1 - no deterministic terms; 0 - constant term; 1 - linear trend"""
pd.options.display.float_format = "{:.2f}".format
model = coint_johansen(endog=train_ecm, det_order=1, k_ar_diff=3)
print('Eigen statistic:')
print(model.eig)
print()
print('Critical values:')
d = DataFrame(model.cvt)
d.rename(columns={0: '90%', 1: '95%', 2: '99%'}, inplace=True)
Пример #8
0
data = pd.concat([exchange_rate['AEXCAUS'],
           np.log(oil['DCOILWTICO']),
           np.log(tot['tot']),
           cpi_ir['CPI_IR'],
           debt['rel_debt'],
           ri_ir['INT_IR']
          ],
          axis=1).dropna()


# In[10]:


from statsmodels.tsa.vector_ar.vecm import VECM, select_coint_rank, select_order
#print the k_ar_diff order using aic for nc model
nc_lags = select_order(data,maxlags=2,deterministic='nc')
print(nc_lags.summary())
print("nc lags based on aic: ",nc_lags.aic)


# In[11]:


#trace rank
#k_ar_diff set to 1 since all variables are unit root at diff = 1
vec_rank = select_coint_rank(data, det_order = -1, k_ar_diff = nc_lags.aic, method = 'trace', signif=0.05)
print(vec_rank.summary())


# In[12]:
Пример #9
0
sliced_prices = list(
    map(
        lambda price_with_idx: price_with_idx[1][start_indices[price_with_idx[
            0]]:end_indices[price_with_idx[0]] + 1], enumerate(prices)))
data = np.asarray(
    list(
        map(lambda price: list(map(lambda p: p['close'], price)),
            sliced_prices)))
times = list(map(lambda p: p['time'], sliced_prices[0]))
for series in data:
    results = adfuller(series.reshape(len(series, )),
                       store=True,
                       regresults=True)
    print(results[0:3])
    print(results[3].usedlag)
    print(results[3].resols.summary())
    print(-math.log(2) / results[3].resols.params[0])

exit()

data_to_fit = data.transpose()
print(select_order(data_to_fit, 50, deterministic="co").selected_orders)
print(select_coint_rank(data_to_fit, 0, 15).test_stats)
# exit()
model = VECM(data_to_fit, deterministic="co", k_ar_diff=15)
res = model.fit()

ax.plot(times, np.dot(data_to_fit, res.beta))
plt.show()