示例#1
0
def irgendwas():
    pdf = pd.DataFrame(expdata.T, columns=[0, 1, 2, 3])
    pdf = pd.DataFrame(exp_data.T, columns=[0, 1, 2, 3])
    out = coint_johansen(pdf)
    from statsmodels.tsa.vector_ar.vecm import coint_johansen
    out = coint_johansen(transform_data. - 1, 5)
    out = coint_johansen(transform_data, -1, 5)
    out = coint_johansen(pdf, -1, 5)
    out
    out.lr1
    out.cvt[:, 1]
    out.cvt
    alpha = 0.01
    cvt = out.cvt[:, int(np.round((0.1 - alpha) / alpha))]
    cvt = out.cvt[:, int(np.round((0.1 - alpha) / 0.05))]
    int(np.round((0.1 - alpha) / 0.05))
    alpha = 0.05
    int(np.round((0.1 - alpha) / 0.05))
    alpha = 0.1
    int(np.round((0.1 - alpha) / 0.05))
    alpha = 0.05
    cvt = out.cvt[:, int(np.round((0.1 - alpha) / 0.05))]
    cvt
    traces
    out.lr1
    import statsmodels.tsa.api as smt
    from statsmodels.tsa.api import VAR
def runCointTestBasketsJoh(etf, tickers, start, end):
    coint_data = pd.DataFrame(
        columns=['ticker', 'critical-values', 'trace-stat'])
    etf_data = yf.download(etf, start=start, end=end)
    etf_data = etf_data[['Close']]
    etfLogPrice = np.log(etf_data['Close'].values)

    tickers_data = yf.download(tickers, start=start, end=end)

    tickers_subsets = []
    for i in range(2, len(tickers) + 1):
        for subset in itertools.combinations(tickers, i):
            tickers_subsets.append(list(subset))

    for i, t_list in enumerate(tickers_subsets):
        if i % 500 == 0:
            print(i, "done, out of a total of ", len(tickers_subsets))
        df = tickers_data['Close'][t_list]
        df = df.apply(np.log)
        df['etf'] = etfLogPrice
        if df.isnull().values.any():
            print('err')
            continue
        jres = coint_johansen(df, det_order=0, k_ar_diff=1)
        coint_data.loc[i] = [
            t_list, jres.trace_stat_crit_vals[-1], jres.trace_stat[-1]
        ]
    return coint_data.sort_values(by='trace-stat', ascending=True)
示例#3
0
def cointegration_test_result(data, num_lag_diff=2):
    """
    Perform Johanson's Cointegration Test and Report Summary
    """
    output_coint = coint_johansen(data, -1, num_lag_diff)
    critical_val_dict = {"0.9": 0, "0.95": 1, "0.99": 2}
    # read each variable trace value
    traces_value = output_coint.lr1

    def adjust(str_char, lengtht=6):
        return str(str_char).ljust(lengtht)

    max_char_len = max([len(var) for var in data.columns]) + 1

    # read the corresponding columns of critical values for each variable
    alpha_val = [0.10, 0.05, 0.01]
    print(
        "\n Significance of granger-casuality at different critical values level.\n "
    )
    for alpha in alpha_val:
        coint_crit_val = output_coint.cvt[:, critical_val_dict[str(1 - alpha)]]
        print(
            adjust("Name", max_char_len), " :: ",
            "Test Stat > C(%.1f%s)  =>    Signif \n" %
            ((1 - alpha) * 100, "%"), "---" * max_char_len)
        for col_name, trace, cvt in zip(data.columns, traces_value,
                                        coint_crit_val):
            print(adjust(col_name, max_char_len), " :: ",
                  adjust(round(trace, 2), 9), ">", adjust(cvt, 8), " => ",
                  trace > cvt)
        print("\n")
示例#4
0
def find_pairs(prices, coint_set_amount, johansen_lag):
    # Check all pairs inside one cluster for cointegration
    result = {}
    traded_assets = prices.columns
    total_pairs = 1
    for i in range(coint_set_amount):
        total_pairs *= (len(traded_assets) - i)
    for i in range(1, coint_set_amount + 1):
        total_pairs /= i
    with tqdm_notebook(total=total_pairs) as pbar:
        for combination in itertools.combinations(traded_assets,
                                                  coint_set_amount):
            combination_prices = prices[list(combination)]
            johansen_result = coint_johansen(combination_prices.values,
                                             det_order=0,
                                             k_ar_diff=johansen_lag)
            weights = johansen_result.lr1.reshape(-1, 1) >= johansen_result.cvt
            weights = weights.any(axis=1)
            weights = johansen_result.evec[weights]
            if (johansen_result.lr1.reshape(-1, 1) >=
                    johansen_result.cvt).any():
                result[combination] = weights[0]
            pbar.update(1)

    return result
示例#5
0
    def fit(self, ts: pd.DataFrame):
        """
        Use the Johansen test to calculate the portfolio shares for each instrument.
        This test uses some nice linear algebra to test wether "A", the first autoregression coefficient matrix (of course a matrix, we have multiple timeseries vectors here) is zero (null hypothesis) or not.
        To achieve this an eigenvalue decomposition of "A" is carried out. The rank of the matrix is given by and the Johansen test sequentially tests whether this rank is equal to zero, equal to one, through to r=n-1, where n is the number of time series under test.
        The eigenvalue decomposition results in a set of eigenvectors. 
        The eigenvectors generated from the Johansen test can be used as hedge ratios to form a stationary portfolio out of the input price series, and the one with the largest eigenvalue is the one with the shortest half-life.
        :param ts: dataframe with each column being an instrument ts
        """
        jh = vm.coint_johansen(ts.values, det_order=0,
                               k_ar_diff=1)  # constant term, 1-lag difference

        # assert that the trace statistics are greater than their 90% critical value
        curred_sign = sign(
            jh.cvt[0, 2], jh.cvt[0, 0]
        )  # note that the 0 index corresponds to the 90% cv and the 2 index is the 99% cv
        # assert (curred_sign(jh.lr1, jh.cvt[:, 0])).all()

        # assert that the maximum eigenvalue statistics are greater than their 90% critical value
        curred_sign = sign(jh.cvm[0, 2], jh.cvm[0, 0])
        # assert (curred_sign(jh.lr2 > jh.cvm[:, 0])).all()

        # E.P.Chan: the eigenvectors (represented as column vectors in r.evec) are ordered in decreasing order of their corresponding eigenvalues. So we should expect the first cointegrating relation to be the “strongest”; that is, have the shortest half-life for mean reversion.
        # assert np.argmax(jh.eig) == 0  # check it to be sure eheh
        self.ws = jh.evec[:, 0]

        # create the mean reverting time series
        yport = np.dot(
            ts.values,
            self.ws)  # it's also the (net) market value of portfolio
        self.hl = halflife(yport)

        self._fitted = True
        return self
示例#6
0
def johansen_coint_(merged_s, pvalues=0.05):
    merged_s = merged_s.dropna()
    result = coint_johansen(merged_s, 0, 1)
    trace_stat = result.lr1
    max_stat = result.lr2
    cvm = result.cvm
    cvt = result.cvt

    def crit_range(st, crits):
        for i, _ in enumerate(st):
            print("The t-stat of it is {}".format(st[i]))
            if (st[i] <= crits[i][0]):
                print('r<{} failed being rejected.'.format(i + 1))
            elif (st[i] <= crits[i][1]):
                print('r<{} rejected at 90%.'.format(i + 1))
            elif (st[i] <= crits[i][2]):
                print('r<{} rejected at 95%.'.format(i + 1))
            else:
                print('r<{} rejected at 99%.'.format(i + 1))

    print("Maximum statistic testing...")
    crit_range(max_stat, cvm)
    print("Tracing statistic testing...")
    crit_range(trace_stat, cvt)
    return result.eig
def compute_pair_metrics(security, candidates):
    security = security.div(security.iloc[0])
    ticker = security.name
    candidates = candidates.div(candidates.iloc[0])
    spreads = candidates.sub(security, axis=0)
    n, m = spreads.shape
    X = np.ones(shape=(n, 2))
    X[:, 1] = np.arange(1, n + 1)
    drift = ((
        np.linalg.inv(X.T @ X) @ X.T @ spreads).iloc[1].to_frame('drift'))
    vol = spreads.std().to_frame('vol')
    corr_ret = (candidates.pct_change().corrwith(
        security.pct_change()).to_frame('corr_ret'))
    corr = candidates.corrwith(security).to_frame('corr')
    metrics = drift.join(vol).join(corr).join(corr_ret).assign(n=n)
    tests = []
    for candidate, prices in candidates.items():
        df = pd.DataFrame({'s1': security, 's2': prices})
        var = VAR(df.values)
        lags = var.select_order()  # select VAR order
        k_ar_diff = lags.selected_orders['aic']
        # Johansen Test with constant Term and estd. lag order
        cj0 = coint_johansen(df, det_order=0, k_ar_diff=k_ar_diff)
        # Engle-Granger Tests
        t1, p1 = coint(security, prices, trend='c')[:2]
        t2, p2 = coint(prices, security, trend='c')[:2]
        tests.append([ticker, candidate, t1, p1, t2, p2, k_ar_diff, *cj0.lr1])
    columns = [
        's1', 's2', 't1', 'p1', 't2', 'p2', 'k_ar_diff', 'trace0', 'trace1'
    ]
    tests = pd.DataFrame(tests, columns=columns).set_index('s2')
    return metrics.join(tests)
def johansen_coint(df, report=True):
    samples = data_frame_to_samples(df)
    m, _  = samples.shape

    df = pandas.DataFrame(samples.T)
    result = vecm.coint_johansen(df, 0, 1)

    l = result.lr1
    cv = result.cvt

    # 0: 90%  1:95% 2: 99%
    rank = None
    for r in range(m):
        if report:
            print(f"Critical Value: {cv[r, 2]}, Trace Statistic: {l[r]}")
        if l[r] < cv[r, 2]:
            rank = r
            break

    ρ2 = result.eig
    M = numpy.matrix(result.evec)

    if report:
        print(f"Rank={rank}")
        print("Eigen Values\n", ρ2)
        print("Eigen Vectors\n", M)

    if rank is None:
        print("Reduced Rank Solution Does Not Exist")
        return None

    return ρ2[:rank], M[:,:rank]
示例#9
0
    def cointegration_test(self, df, signif=0.05):
        """Perform Johanson's Cointegration Test and Report Summary"""
        st.subheader('cointegration test')
        out = coint_johansen(df, -1, 5)
        d = {'0.90': 0, '0.95': 1, '0.99': 2}
        traces = out.lr1
        cvts = out.cvt[:, d[str(1 - signif)]]

        def adjust(val, length=6):
            return str(val).ljust(length)

        # Summary
        # print('Name   ::  Test Stat > C(95%)    =>   Signif  \n', '--'*20)
        vet_name = []
        vet_test = []
        vet_c = []
        vet_sign = []
        for col, trace, cvt in zip(df.columns, traces, cvts):
            vet_name.append(adjust(col))
            vet_test.append(adjust(round(trace, 2), 9))
            vet_c.append(adjust(cvt, 8))
            vet_sign.append(trace > cvt)
            # print(adjust(col), ':: ', adjust(round(trace,2), 9), ">", adjust(cvt, 8), ' =>  ' , trace > cvt)
        df_cointegration = pd.DataFrame()
        df_cointegration['name'] = vet_name
        df_cointegration['test'] = vet_test
        df_cointegration['c(95%)'] = vet_c
        df_cointegration['signif'] = vet_sign
        st.dataframe(df_cointegration)
示例#10
0
    def setup_class(cls):
        cls.res = coint_johansen(dta, 1, 2)
        cls.nobs_r = 173 - 1 - 2

        cls.res1_m = np.array([241.985452556075,  166.4781461662553,  110.3298006342814,  70.79801574443575,  44.90887371527634,  27.22385073668511,  11.74205493173769,  3.295435325623445,           169.0618,           133.7852,           102.4674,            75.1027,            51.6492,            32.0645,            16.1619,             2.7055,           175.1584,            139.278,           107.3429,  79.34220000000001,            55.2459,            35.0116,            18.3985,             3.8415,           187.1891,           150.0778,           116.9829,            87.7748,            62.5202,            41.0815,            23.1485,             6.6349])
        cls.res2_m = np.array([75.50730638981975,  56.14834553197396,   39.5317848898456,   25.8891420291594,  17.68502297859124,  15.48179580494741,  8.446619606114249,  3.295435325623445,            52.5858,            46.5583,            40.5244,            34.4202,            28.2398,            21.8731,            15.0006,             2.7055,            55.7302,            49.5875,            43.4183,            37.1646,            30.8151,            24.2522,            17.1481,             3.8415,            62.1741,            55.8171,            49.4095,            42.8612,             36.193,            29.2631,            21.7465,             6.6349,])

        evec = np.array([
            0.01102517075074406, -0.2185481584930077, 0.04565819524210763, -0.06556394587400775,
            0.04711496306104131, -0.1500111976629196, 0.03775327003706507, 0.03479475877437702,
            
            0.007517888890275335, -0.2014629352546497, 0.01526001455616041, 0.0707900418057458,
            -0.002388919695513273, 0.04486516694838273, -0.02936314422571188, 0.009900554050392113,
            
            0.02846074144367176, 0.02021385478834498, -0.04276914888645468, 0.1738024290422287,
            0.07821155002012749, -0.1066523077111768, -0.3011042488399306, 0.04965189679477353,
            
            0.07141291326159237, -0.01406702689857725, -0.07842109866080313, -0.04773566072362181,
            -0.04768640728128824, -0.04428737926285261, 0.4143225656833862, 0.04512787132114879,
            
            -0.06817130121837202, 0.2246249779872569, -0.009356548567565763, 0.006685350535849125,
            -0.02040894506833539, 0.008131690308487425, -0.2503209797396666, 0.01560186979508953,
            
            0.03327070126502506, -0.263036624535624, -0.04669882107497259, 0.0146457545413255,
            0.01408691619062709, 0.1004753600191269, -0.02239205763487946, -0.02169291468272568,
            
            0.08782313160608619, -0.07696508791577318, 0.008925177304198475, -0.06230900392092828,
            -0.01548907461158638, 0.04574831652028973, -0.2972228156126774, 0.003469819004961912,
            
            -0.001868995544352928, 0.05993345996347871, 0.01213394328069316, 0.02096614212178651,
            -0.08624395993789938, 0.02108183181049973, -0.08470307289295617, -5.135072530480897e-005])
        cls.evec_m = evec.reshape(cls.res.evec.shape, order='F')

        cls.eig_m = np.array([0.3586376068088151, 0.2812806889719111, 0.2074818815675726,  0.141259991767926, 0.09880133062878599, 0.08704563854307619,  0.048471840356709, 0.01919823444066367])
示例#11
0
 def setup_class(cls):
     cls.res = coint_johansen(dta, 0, 9)
     cls.nobs_r = 173 - 1 - 9
     #fprintf(1, '%18.16g, ', r1)
     cls.res1_m = np.array([307.6888935095814,  205.3839229398245,  129.1330243009336,   83.3101865760208,  52.51955460357912,  30.20027050520502,  13.84158157562689, 0.4117390188204866,           153.6341,           120.3673,             91.109,            65.8202,            44.4929,            27.0669,            13.4294,             2.7055,            159.529,           125.6185,            95.7542,            69.8189,            47.8545,            29.7961,            15.4943,             3.8415,           171.0905,           135.9825,           104.9637,            77.8202,            54.6815,            35.4628,            19.9349,             6.6349])
     #r2 = [res.lr2 res.cvm]
     cls.res2_m = np.array([102.3049705697569,  76.25089863889085,  45.82283772491284,   30.7906319724417,  22.31928409837409,  16.35868892957814,   13.4298425568064, 0.4117390188204866,            49.2855,            43.2947,            37.2786,            31.2379,            25.1236,            18.8928,            12.2971,             2.7055,            52.3622,            46.2299,            40.0763,            33.8777,            27.5858,            21.1314,            14.2639,             3.8415,            58.6634,            52.3069,            45.8662,            39.3693,            32.7172,             25.865,              18.52,             6.6349])
    def Johansen(self, p, verbose):
        """
            Get the cointegration vectors at 95% level of significance
            given by the trace statistic test.
        """
        y = self.data[self.name_lyst]
        N, l = y.shape
        jres = coint_johansen(y, 0, p)

        tr_stats = pd.DataFrame(jres.lr1, columns={"Trace Statistic"})
        tr_stats.index.names = ["NULL: r <= "]
        tr_stats["Criti_90%"], tr_stats["Criti_95%"], tr_stats[
            "Criti_99%"] = jres.cvt[:, 0], jres.cvt[:, 1], jres.cvt[:, 2]

        eign_stats = pd.DataFrame(jres.lr2, columns={"Eigen Statistic"})
        eign_stats.index.names = ["NULL: r <= "]
        eign_stats["Criti_90%"], eign_stats["Criti_95%"], eign_stats[
            "Criti_99%"] = jres.cvm[:, 0], jres.cvm[:, 1], jres.cvm[:, 2]

        eigen = pd.DataFrame(jres.eig, columns={"Eigen Value"})
        EVEC = pd.DataFrame(jres.evec)

        if verbose == True:
            print(tr_stats, "\n")
            print(eign_stats, "\n")
            print(eigen, "\n")
            print(EVEC)

        jres.trace = (tr_stats["Trace Statistic"] > tr_stats["Criti_95%"])
        jres.eigen = (eign_stats["Eigen Statistic"] > eign_stats["Criti_95%"])

        jres.max_eigen_ix = np.argmax(jres.eig)
        jres.max_evec = EVEC[jres.max_eigen_ix]

        return jres, np.dot(y, jres.max_evec)
示例#13
0
def get_johansen(y, p):
    """
        Get the cointegration vectors at 95% level of significance
        given by the trace statistic test.
        """
    return_vec = []
    try:
        result = coint_johansen(y, det_order=0, k_ar_diff=p)
        result_table = np.hstack((np.expand_dims(np.round(result.lr2, 4),
                                                 axis=1), result.cvm))
        result_evec = np.round(result.evec, 4)
        #print('This is the result table {}'.format(result_table))
        for i in range(result_table.shape[0]):
            if result_table[i][0] > result_table[i][2]:
                continue
            else:
                return_vec.append(i)
                break
#            if return_vec is not None:
#                highest_eigval_indx = np.argmax(np.max(result.eig, axis=0))
#                highest_eigvec = result_evec[:,highest_eigval_indx]
#                return_vec.append(list(highest_eigvec))
        return return_vec
    except np.linalg.LinAlgError:
        return None
示例#14
0
def cointegration_test(data, alpha=0.05):
    '''
    Cointegration test:
    
    To find out how many lagging terms are required for a TS to become stationary.
    With two or more TS, they are considered cointegrated if they have a statistically significant relationship.
    This means, there exists a linear combination of them that has an order of integration less than that of the individual series.

    - https://en.wikipedia.org/wiki/Cointegration
    - http://www-stat.wharton.upenn.edu/~steele/Courses/434/434Context/Co-integration/Murray93DrunkAndDog.pdf
    - https://en.m.wikipedia.org/wiki/Johansen_test
    - https://en.wikipedia.org/wiki/Error_correction_model

    null hypothesis: no cointegrating equations, alternate hypothesis: at least 1 cointegrating relationship
    
    '''
    out = coint_johansen(data, -1, 5)
    d = {'0.90': 0, '0.95': 1, '0.99': 2}
    traces = out.lr1
    cvts = out.cvt[:, d[str(1 - alpha)]]

    def adjust(val, length=6):
        return str(val).ljust(length)

    # Summary
    print('Name   ::  Test Stat > C(95%)    =>   Signif  \n', '--' * 20)
    for col, trace, cvt in zip(data.columns, traces, cvts):
        print(adjust(col), ':: ', adjust(round(trace, 2), 9), ">",
              adjust(cvt, 8), ' =>  ', trace > cvt)
示例#15
0
    def check(self):
        _log_price_a = np.log(self._prices_a)
        _log_price_b = np.log(self._prices_b)

        _values = np.stack((_log_price_b, _log_price_a), axis=-1)

        rst = coint_johansen(_values, det_order=0, k_ar_diff=1)

        beta_b, beta_a = rst.evec[0]
        # self._spread = _log_price_b * beta_b + _log_price_a * beta_a
        # res_adf = adfuller(self._spread, maxlag=1, regression='c', autolag=None)
        # print(res_adf)

        self._beta_b = beta_b
        self._beta_a = beta_a

        self._beta = beta_a / beta_b
        self._spread = _log_price_b + _log_price_a * beta_a / beta_b
        res_adf = adfuller(self._spread,
                           maxlag=1,
                           regression='c',
                           autolag=None)
        # ipdb.set_trace()

        self._p_value = mackinnonp(res_adf[0], regression='c', N=2)
        self._t_stats = res_adf[0]
        def cointegration_test(data):
            #checking stationarity
            from statsmodels.tsa.vector_ar.vecm import coint_johansen

            # if all absolute eigen values are less than 1 data are stationary
            res = coint_johansen(data, -1, 1).eig

            return res
示例#17
0
    def johansen_test_result(self):

        self.data = self.import_data()
        result = coint_johansen(self.data, 0, 1)
        self.share_allocation = result.evec[:, 0]
        self.data['port'] = pd.DataFrame.sum(self.share_allocation * self.data,
                                             axis=1)
        return self.data
示例#18
0
def test_coint_johansen_0lag(reset_randomstate):
    # GH 5731
    x_diff = np.random.normal(0, 1, 1000)
    x = pd.Series(np.cumsum(x_diff))
    e1 = np.random.normal(0, 1, 1000)
    y = x + 5 + e1
    data = pd.concat([x, y], axis=1)
    result = coint_johansen(data, det_order=-1, k_ar_diff=0)
    assert result.eig.shape == (2, )
示例#19
0
    def setup_class(cls):
        cls.res = coint_johansen(dta, 2, 5)
        cls.nobs_r = 173 - 1 - 5

        #Note: critical values not available if trend>1
        cls.res1_m = np.array([270.1887263915158,  171.6870096307863,  107.8613367358704,  70.82424032233558,  44.62551818267534,  25.74352073857572,  14.17882426926978,  4.288656185006764,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0])
        cls.res1_m[cls.res1_m == 0] = np.nan
        cls.res2_m = np.array([98.50171676072955,  63.82567289491584,  37.03709641353485,  26.19872213966024,  18.88199744409963,  11.56469646930594,  9.890168084263012,  4.288656185006764,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0])
        cls.res2_m[cls.res2_m == 0] = np.nan
def johansen_trace(y, p):
        N, l = y.shape
        joh_trace = coint_johansen(y, 0, p)
        r = 0
        for i in range(l):
            if joh_trace.lr1[i] > joh_trace.cvt[i, 1]:
                r = i + 1
        joh_trace.r = r

        return joh_trace
示例#21
0
def johansen_Test(data,det_order,lagged_diff):

    results = vecm.coint_johansen(data, det_order, lagged_diff)
    format_res = []
    format_res.append(results.eig)
    format_res.append(results.lr2)
    cols = ["eig","max eig",'90%',"95%","90%"]
    df = pd.DataFrame(np.hstack((np.array(format_res).T,results.cvm)))
    df.columns=cols
    df.index=["H(0)","H(1)"]
    return df
示例#22
0
    def setup_class(cls):
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=HypothesisTestWarning)
            cls.res = coint_johansen(dta, 2, 5)
        cls.nobs_r = 173 - 1 - 5

        #Note: critical values not available if trend>1
        cls.res1_m = np.array([270.1887263915158,  171.6870096307863,  107.8613367358704,  70.82424032233558,  44.62551818267534,  25.74352073857572,  14.17882426926978,  4.288656185006764,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0])
        cls.res1_m[cls.res1_m == 0] = np.nan
        cls.res2_m = np.array([98.50171676072955,  63.82567289491584,  37.03709641353485,  26.19872213966024,  18.88199744409963,  11.56469646930594,  9.890168084263012,  4.288656185006764,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  0])
        cls.res2_m[cls.res2_m == 0] = np.nan
示例#23
0
def cointegration_test(df, alpha=0.05):
    out = coint_johansen(df,-1,5)
    d = {'0.90':0, '0.95':1, '0.99':2}
    traces = out.lr1
    cvts = out.cvt[:, d[str(1-alpha)]]
    def adjust(val, length= 6): return str(val).ljust(length)

    # Summary
    print('Name   ::  Test Stat > C(95%)    =>   Signif  \n', '--'*20)
    for col, trace, cvt in zip(df.columns, traces, cvts):
        print(adjust(col), ':: ', adjust(round(trace,2), 9), ">", adjust(cvt, 8), ' =>  ' , trace > cvt)
示例#24
0
def coint_Johansen(data, det_order, k_ar_diff, return_pvalue=True):
    res = coint_johansen(data, det_order, k_ar_diff)
    stat = res.lr1[0]
    if not return_pvalue:
        return stat
    else:
        levels = (0.1 - 1e-6, 0.05 - 1e-6, 0.01 - 1e-6)
        critical_values = res.cvt[0]
        where = (stat > critical_values).nonzero()[0]
        if len(where):
            pvalue = levels[where[-1]]
        else:
            pvalue = 1.
        return stat, pvalue
示例#25
0
def calculate_cointegration_johansen2(dataframe, k=1):
    """Checks for cointegration between two or MORE data series"""
    coint_data = dataframe
    #coint_data = coint_data.dropna()
    #coint_data.to_csv(store_dir + '/' + 'cointdata.csv')

    johansen_test = coint_johansen(coint_data, det_order=0, k_ar_diff=k)
    print('Johansen Test')
    print('Trace Stat: ', johansen_test.trace_stat)
    print('Trace critical values: \n ', johansen_test.trace_stat_crit_vals)
    print('Max EigenVectors : ', johansen_test.max_eig_stat)
    print('Max EigenVectors critical values: \n',
          johansen_test.max_eig_stat_crit_vals)
    print('EigenValues: \n', johansen_test.eig)
示例#26
0
def calculate_cointegration_johansen(depseries, indepseries, k=1):
    """Checks for cointegration between two or MORE data series"""
    coint_data = pd.concat([depseries, indepseries],
                           axis=1,
                           keys=['<DEPSERIES>', '<INDEPSERIES>'],
                           join='outer')
    coint_data = coint_data.dropna()
    coint_data.to_csv(store_dir + '/' + 'cointdata.csv')
    johansen_test = coint_johansen(coint_data, det_order=0, k_ar_diff=k)
    print('Johansen Test')
    print('Trace Stat: ', johansen_test.trace_stat)
    print('Trace critical values: \n ', johansen_test.trace_stat_crit_vals)
    print('Max EigenVectors : ', johansen_test.max_eig_stat)
    print('Max EigenVectors critical values: \n',
          johansen_test.max_eig_stat_crit_vals)
示例#27
0
def run_johansen_test(data):
    result = coint_johansen(data, det_order=0, k_ar_diff=1)
    """r = 0 means no cointegration, r<=1 means up to one cointegration relationship etc
    We have m hypothesised numbers of cointegrated equations: here at most 0, at most 1 
    cvt - Critical values (90%, 95%, 99%) of trace statistic
    lr1 - Trace statistic
    Trace test:
    H0: 0 cointegration equations
    H1: coint. eq. exist > 0 
    explanation https://www.youtube.com/watch?v=TB4m9M1sIJ0
    """
    stat_r0 = result.lr1[0]
    crits_r0 = result.cvt[0]
    # eig_stat_r0 = result.lr2[0]
    # eig_crits_r0 = result.cvm[0]
    stat_res = trace_results(stat_r0, crits_r0)  # there are 0 coint. equations. pass if rejected
    # eig_res = trace_results(eig_stat_r0, eig_crits_r0)  # there are 0 coint. equations. pass if rejected
    return stat_res
示例#28
0
    def _search_best_coint_vec(self, comb):

        self._logger.info("Processing {0}...".format(",".join(comb)))
        #comb = ('EURJPY Index','GBPJPY Index', 'CHFJPY Index', 'AUDJPY Index', 'NZDJPY Index')
        comb = np.sort(comb).tolist()
        weight_df = pd.DataFrame()
        pvalue_list = []
        for i in tqdm(range(self._term, self._fx_rate_df.shape[0])):
            value_date = self._fx_rate_df.index[i]
            start_date = self._fx_rate_df.index[i-self._term]
            #value_date = date(2019,1,4)
            #start_date = value_date - relativedelta(weeks=self._term)
            target_fx = self._fx_rate_df[list(comb)].query("index>@start_date & index<=@value_date")
            min_pvalue = 1.0
            target_vec = []
            eigen_vec = coint_johansen(#endog=self._fx_rate_df[list(comb)].query("index>@start_date & index<=@value_date"),#.iloc[i - self._term:i], 
                                        endog=target_fx,
                                        det_order=self._order, 
                                        k_ar_diff=self._ar_diff).evec

            for j in range(len(eigen_vec)):
                try:
                    pvalue = sm.tsa.stattools.adfuller((target_fx*eigen_vec[j]).sum(axis=1),
                                                       #(self._fx_rate_df[list(comb)].iloc[i - self._term:i] * eigen_vec[j]).sum(axis=1),
                                                       #(self._fx_rate_df[list(comb)].query("index>@start_date & index<=@value_date") * eigen_vec[j]).sum(axis=1),
                                                       regression=self._reg)[1]
                except:
                    pvalue = 1.0
                if min_pvalue >= pvalue:
                    min_pvalue = pvalue
                    target_vec = eigen_vec[j]
            
            pvalue_list.append(min_pvalue)
            #import pdb;pdb.set_trace()
            weight_df = weight_df.append(pd.DataFrame(np.array([np.repeat(','.join(comb), len(target_vec)),
                                                                comb,
                                                                target_vec]).T, 
                                                      index=np.repeat(value_date, len(target_vec)),
                                                      columns=['Portfolio', 'Ccy', 'Weight']))
        
        weight_df.index.name='ValueDate'
        pvalue_df = pd.DataFrame(pvalue_list, columns=[",".join(comb)], 
                                 index=self._fx_rate_df.index[self._term:])
        return pvalue_df, weight_df
    def get_johansen(self, y, p):
        """
        Get the cointegration vectors at 95% level of significance
        given by the trace statistic test.
        """

        N, l = y.shape
        jres = coint_johansen(y, 0, p)
        trstat = jres.lr1  # trace statistic
        tsignf = jres.cvt  # critical values
        print(trstat)
        print(tsignf)
        for i in range(l):
            if trstat[i] > tsignf[i, 1]:  # 0: 90%  1:95% 2: 99%
                r = i + 1
        jres.r = r
        jres.evecr = jres.evec[:, :r]

        return jres
示例#30
0
    def get_hedge_ratio(self, pair_prices):
        """
        Helper function that uses the Johansen test to calculate hedge ratio. This is applied
        to the pair prices on a rolling basis in prices_to_signals.
        """
        pair_prices = pair_prices.dropna()

        # Skip if we don't have at least 75% of the expected observations
        if len(pair_prices) < self.LOOKBACK_WINDOW * 0.75:
            return pd.Series(0, index=pair_prices.columns)

        # The second and third parameters indicate constant term, with a lag of 1.
        # See Chan, Algorithmic Trading, chapter 2.
        result = coint_johansen(pair_prices, 0, 1)

        # The first column of eigenvectors contains the best weights
        weights = list(result.evec[0])

        return pd.Series(weights, index=pair_prices.columns)
示例#31
0
def generate_hedge_ratio_from_df(df):
    """
    Uses matrix generated from df
    to calcuate hedge ratio with coint_johansen
    statistical test

    Parameters:
    :param df: pd.DataFrame to generate hedge_ratio for
    :type df: pd.DataFrame

    :return: hedge ratio
    :rtype: List
    """
    ts_row, ts_col = df.shape
    matrix = np.zeros((ts_row, ts_col))
    for i, sec in enumerate(df):
        matrix[:, i] = df[sec]
    results = jh.coint_johansen(matrix, 0, 1)
    return results.evec[:, 0]
示例#32
0
    def setup_class(cls):
        cls.res = coint_johansen(dta, -1, 8)
        cls.nobs_r = 173 - 1 - 8

        cls.res1_m = np.array([260.6786029744658,  162.7966072512681,  105.8253545950566,  71.16133060790817,  47.68490211260372,  28.11843682526138,  13.03968537077271,   2.25398078597622,           137.9954,           106.7351,            79.5329,            56.2839,            37.0339,            21.7781,            10.4741,             2.9762,           143.6691,           111.7797,            83.9383,            60.0627,            40.1749,            24.2761,            12.3212,             4.1296,           154.7977,           121.7375,            92.7136,  67.63670000000001,            46.5716,            29.5147,             16.364,             6.9406])
        cls.res2_m = np.array([97.88199572319769,  56.97125265621156,  34.66402398714837,  23.47642849530445,  19.56646528734234,  15.07875145448866,   10.7857045847965,   2.25398078597622,             45.893,            39.9085,            33.9271,             27.916,             21.837,            15.7175,             9.4748,             2.9762,            48.8795,            42.7679,            36.6301,            30.4428,            24.1592,            17.7961,            11.2246,             4.1296,            55.0335,            48.6606,            42.2333,            35.7359,            29.0609,            22.2519,            15.0923,             6.9406])