Exemplo n.º 1
0
 def runCointegrationJohansen(self, johansenSignificant):
     vec_rank = vecm.select_coint_rank(self.data, det_order = 0 , k_ar_diff = 1, method = 'trace', signif=johansenSignificant)
     num_tests = min(vec_rank.rank, vec_rank.neqs-1)
     data = [[i, vec_rank.r_1[i], vec_rank.test_stats[i], vec_rank.crit_vals[i]] for i in range(num_tests + 1)]
     if (len(data) == data[0][1]):
         return True
     else:
         return False
Exemplo n.º 2
0
    'PCE', 'ConConf', 'Unempl', 'HourlyEarning', 'CCredit', 'RetSales',
    'HouseStarts'
]
consumer_df = consumer_df.resample('1M').mean()
type(consumer_df)

# lag order selection
lag_order = select_order(data=consumer_df,
                         maxlags=10,
                         deterministic="ci",
                         seasons=12)
print(lag_order.summary())
print(lag_order)

# Cointegration rank
rank_test = select_coint_rank(consumer_df, 0, 2, method="trace", signif=0.05)
rank_test.rank
print(rank_test.summary())
print(rank_test)

# Parameter Estimation
model = VECM(consumer_df,
             deterministic="ci",
             seasons=12,
             k_ar_diff=lag_order.aic,
             coint_rank=rank_test.rank)
vecm_res = model.fit()
vecm_res.summary()

vecm_res.predict(steps=5)
vecm_res.predict(steps=5, alpha=0.05)
Exemplo n.º 3
0
def anomaly_vecm(list_var,
                 num_fut=5,
                 desv_mse=2,
                 train=True,
                 name='model-name'):
    df_var = pd.DataFrame()
    for i in range(len(list_var)):
        df_var['var_{}'.format(i)] = list_var[i]

    # split
    tam_train = int(len(df_var) * 0.7)
    #print tam_train
    df_train = df_var[:tam_train]
    print('Tamanio train: {}'.format(df_train.shape))
    df_test = df_var[tam_train:]

    lag_order = vecm.select_order(data=df_train,
                                  maxlags=10,
                                  deterministic="ci",
                                  seasons=0)
    rank_test = vecm.select_coint_rank(df_train,
                                       0,
                                       3,
                                       method="trace",
                                       signif=0.01)
    print("pasa")
    model = vecm.VECM(df_train,
                      deterministic="ci",
                      seasons=4,
                      coint_rank=rank_test.rank)  # =1
    print("define")
    vecm_res = model.fit()
    futures = vecm_res.predict(steps=len(df_test))
    # lag_order.summary()
    result = []
    for list in futures:
        result.append(list[0])

    engine = engine_output_creation('vecm')
    print("empieza")
    df_test['puntos'] = df_test.index
    df_test['valores'] = df_test[df_var.columns[0]]

    engine.alerts_creation(result, df_test)
    # # print("empieza")

    engine.metrics_generation(df_test[df_test.columns[0]].values, result)
    # print("empieza")

    engine.debug_creation(result, df_test)

    lag_order = vecm.select_order(data=df_var,
                                  maxlags=10,
                                  deterministic="ci",
                                  seasons=4)
    rank_test = vecm.select_coint_rank(df_var,
                                       0,
                                       3,
                                       method="trace",
                                       signif=0.01)
    print("pasa")
    model = vecm.VECM(df_var,
                      deterministic="ci",
                      seasons=4,
                      coint_rank=rank_test.rank)  # =1
    print("define")
    vecm_res = model.fit()
    futures = vecm_res.predict(steps=num_fut)
    # lag_order.summary()
    result = []
    for list in futures:
        result.append(list[0])

    engine.forecast_creation(result, df_var.shape[0], num_fut)

    return (engine.engine_output)
Exemplo n.º 4
0
def analyze(df, start_time, end_time, symbols):
  file_name = "{}_{}_{}_coint_series".format(
    '_'.join(list(map(lambda s: s.replace('/', '_'), symbols))),
    start_time if isinstance(start_time, int) else start_time.replace(' ', '_'),
    end_time if isinstance(end_time, int) else end_time.replace(' ', '_')
  )

  results = {
    'symbols': str(symbols),
    'ts_start': start_time if isinstance(start_time, int) else string_to_timestamp(start_time),
    'ts_end': end_time if isinstance(end_time, int) else string_to_timestamp(end_time)
  }

  data = df.to_numpy()
  max_order = int(data.shape[0] / 8)
  select_order_res = select_order(data, max_order, deterministic="ci")
  # print(select_order_res.summary())
  selected_aic_order = select_order_res.selected_orders['aic']
  results['selected_order'] = selected_aic_order
  select_coint_rank_result = select_coint_rank(data, 0, selected_aic_order)
  print(select_coint_rank_result.summary())
  selected_coint_rank = 0
  for i in range(len(select_coint_rank_result.test_stats)):
    if select_coint_rank_result.test_stats[i] < select_coint_rank_result.crit_vals[i]:
      selected_coint_rank = i
      break
  results['selected_rank'] = selected_coint_rank
  if selected_coint_rank != 0:
    model = VECM(data, deterministic="ci", k_ar_diff=selected_aic_order, coint_rank=selected_coint_rank)
    res = model.fit()
    results['cointegrated_alpha'] = np.array2string(res.alpha.flatten())
    results['cointegrated_beta'] = np.array2string(res.beta.flatten())
    results['cointegrated_constant'] = res.det_coef_coint.flatten()[0]
    cointegrated_series = np.dot(data, res.beta).flatten() + results['cointegrated_constant']
    cointegrated_mean = np.mean(cointegrated_series)
    results['cointegrated_mean'] = cointegrated_mean
    cointegrated_std = np.std(cointegrated_series)
    results['cointegrated_std'] = cointegrated_std
    max_deviation = np.amax(np.absolute(cointegrated_series - cointegrated_mean))
    results['cointegrated_max_deviation'] = max_deviation

    adf_res = adfuller(cointegrated_series, maxlag=max_order, store=True, regresults=True)
    cointegrated_adf_p_value = adf_res[1]
    results['cointegrated_adf_p_value'] = cointegrated_adf_p_value
    cointegrated_adf_lag = adf_res[3].usedlag
    results['cointegrated_adf_lag'] = cointegrated_adf_lag
    cointegrated_half_life = -math.log(2) / adf_res[3].resols.params[0]
    results['cointegrated_half_life'] = cointegrated_half_life

    fig, ax = plt.subplots()
    ax.plot(df.index.to_numpy(), cointegrated_series)
    path = "figures/{}.png".format(file_name)
    results['cointegrated_series_img_path'] = path
    plt.savefig(path)
  with open("results/{}.txt".format(file_name), "w") as result_file:
    result_file.write("{}".format(results))
  return results

# from dataloader import load_data

# dataframe = load_data('2019-07-28', '2019-07-29', ['BTC', 'ETH'], [])
# print(analyze(dataframe, '2019-07-28', '2019-07-29', ['BTC', 'ETH']))
Exemplo n.º 5
0
plot_predict(var_predict, df_tran)
forecast_evaluate(var_predict, df_tran)
(var_predict - df_tran).mean()



"""# VECM orginal data"""

from statsmodels.tsa.vector_ar import vecm

steps = 12
train = data[:-steps]
test = data[-steps:]

vec_rank = vecm.select_coint_rank(train, det_order = 1, k_ar_diff = 1, method = 'trace', signif=0.01)
print(vec_rank.summary())

from statsmodels.tsa.api import VECM
vecm_model = VECM(endog = train, k_ar_diff = 1, coint_rank = 1, deterministic = 'ci')
vecm_fit = vecm_model.fit()
preds = vecm_fit.predict(steps=steps)

vecm_predict = data.copy()
start = len(train)
for i in range(steps):
  for j in range(len(test.columns)):
    vecm_predict.iloc[start+i,j] = preds[i][j]

plot_predict(vecm_predict, data)
forecast_evaluate(vecm_predict, data)
Exemplo n.º 6
0
# - shows the long-run equilibrium relationships of variables.
# - includes a short-run dynamic adjustment mechanism that describes how variables adjust when they are out of equilibrium.
# - uses adjustment coefficients to measure the forces that push the relationship towards long-run equilibrium.
#
# ### Cointegration rank of a VECM

# In[19]:

from statsmodels.tsa.vector_ar.vecm import select_coint_rank

# In[68]:

from statsmodels.tsa.vector_ar.vecm import select_coint_rank
rank1 = select_coint_rank(train_ecm,
                          det_order=1,
                          k_ar_diff=3,
                          method='trace',
                          signif=0.01)
print(rank1.summary())

# - first column in the table shows the rank which is the number of cointegrating relationships for the dataset, while the second reports the number of equations in total.
# - λtrace statistics in the third column, together with the corresponding critical values.
# - first row of the table tests the null hypothesis of at most one cointegrating vector, against the alternative hypothesis that the number of cointegrating equations is strictly larger than the number assumed under the null hypothesis, i.e., larger than one.
# - test statistic of 254.9 considerably exceeds the critical value (117.0) and so the null of at most one cointegrating vector is rejected.
# - test statistic (89.92) also exceeds the critical value (87.77), so the null of at most two cointegrating vectors is rejected at the 1% level
# - test statistic (48.59) do not exceeds the critical value (62.52), so the null of at most three cointegrating vectors cannot be rejected at the 1% level
#
# Below test statistic on maximum eigen value:
#
# Maximum-eigenvalue statistic assumes a given number of r cointegrating relations under the null hypothesis and tests this against the alternative that there are r + 1 cointegrating equations.
Exemplo n.º 7
0
# In[10]:


from statsmodels.tsa.vector_ar.vecm import VECM, select_coint_rank, select_order
#print the k_ar_diff order using aic for nc model
nc_lags = select_order(data,maxlags=2,deterministic='nc')
print(nc_lags.summary())
print("nc lags based on aic: ",nc_lags.aic)


# In[11]:


#trace rank
#k_ar_diff set to 1 since all variables are unit root at diff = 1
vec_rank = select_coint_rank(data, det_order = -1, k_ar_diff = nc_lags.aic, method = 'trace', signif=0.05)
print(vec_rank.summary())


# In[12]:


#print trace rank
print("trace cointegration rank: ", vec_rank.rank)


# In[13]:


#eigen rank
#k_ar_diff set to 1 since all variables are unit root at diff = 1
Exemplo n.º 8
0
sliced_prices = list(
    map(
        lambda price_with_idx: price_with_idx[1][start_indices[price_with_idx[
            0]]:end_indices[price_with_idx[0]] + 1], enumerate(prices)))
data = np.asarray(
    list(
        map(lambda price: list(map(lambda p: p['close'], price)),
            sliced_prices)))
times = list(map(lambda p: p['time'], sliced_prices[0]))
for series in data:
    results = adfuller(series.reshape(len(series, )),
                       store=True,
                       regresults=True)
    print(results[0:3])
    print(results[3].usedlag)
    print(results[3].resols.summary())
    print(-math.log(2) / results[3].resols.params[0])

exit()

data_to_fit = data.transpose()
print(select_order(data_to_fit, 50, deterministic="co").selected_orders)
print(select_coint_rank(data_to_fit, 0, 15).test_stats)
# exit()
model = VECM(data_to_fit, deterministic="co", k_ar_diff=15)
res = model.fit()

ax.plot(times, np.dot(data_to_fit, res.beta))
plt.show()
Exemplo n.º 9
0
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.api import VECM
from statsmodels.tsa.vector_ar.vecm import select_coint_rank
from matplotlib.ticker import FuncFormatter

series = (pd.read_csv("../dados/series_log.csv",
                      parse_dates=[0]).set_index("date").dropna())

# endogenas = series.loc[:, ["spread", "selic", "ibc", "inad"]]
endogenas = series.loc[:, ["selic", "inad", "ibc", "spread"]]
exogenas = series.loc[:, ["igp"]].fillna(0)

print(
    select_coint_rank(endog=endogenas,
                      det_order=1,
                      k_ar_diff=2,
                      method="trace").summary().as_latex_tabular())

model = VECM(endog=endogenas,
             exog=exogenas,
             deterministic="co",
             k_ar_diff=2,
             coint_rank=1,
             dates=series.index,
             freq="MS",
             seasons=12,
             first_season=3)

vecm = model.fit()

print(vecm.summary())