Пример #1
0
def kci_invariance_test(
    suffstat: Dict,
    context,
    i: int,
    cond_set: Optional[Union[List[int], int]] = None,
    width: float = 0,
    alpha: float = 0.05,
    unbiased: bool = False,
    regress: bool = True,
    gamma_approx: bool = True,
    n_draws: int = 500,
    lam: float = 1e-3,
    thresh: float = 1e-5,
    num_eig: int = 0,
):
    cond_set = to_list(cond_set)
    obs_samples = suffstat['obs_samples']
    iv_samples = suffstat[context]

    mat = combined_mat(obs_samples, iv_samples, i, cond_set)
    return kci_test(
        mat,
        0,
        1,
        list(range(2, 2 + len(cond_set))),
        width=width,
        alpha=alpha,
        unbiased=unbiased,
        gamma_approx=gamma_approx,
        regress=regress,
        n_draws=n_draws,
        lam=lam,
        thresh=thresh,
        num_eig=num_eig,
    )
Пример #2
0
def hsic_test(suffstat: np.ndarray,
              i: int,
              j: int,
              cond_set: Union[List[int], int] = None,
              alpha: float = 0.05) -> Dict:
    """
    Test for (conditional) independence using the Hilbert-Schmidt Information Criterion. If a conditioning set is
    specified, first perform non-parametric regression, then test residuals.

    Parameters
    ----------
    suffstat:
        Matrix of samples.
    i:
        column position of first variable.
    j:
        column position of second variable.
    cond_set:
        column positions of conditioning set.
    alpha:
        Significance level of the test.

    Returns
    -------

    """
    cond_set = to_list(cond_set)
    if len(cond_set) == 0:
        return hsic_test_vector(suffstat[:, i], suffstat[:, j], alpha=alpha)
    else:
        residuals_i, residuals_j = residuals(suffstat, i, j, cond_set)
        return hsic_test_vector(residuals_i, residuals_j, alpha=alpha)
Пример #3
0
def hsic_test(suffstat: Any,
              i: int,
              j: int,
              cond_set: Union[List[int], int] = None,
              alpha: float = 0.05):
    cond_set = to_list(cond_set)
    if len(cond_set) == 0:
        return hsic_test_vector(suffstat[:, i], suffstat[:, j], alpha=alpha)
    else:
        residuals_i, residuals_j = residuals(suffstat, i, j, cond_set)
        return hsic_test_vector(residuals_i, residuals_j, alpha=alpha)
Пример #4
0
def hsic_invariance_test(suffstat,
                         context,
                         i: int,
                         cond_set: Optional[Union[List[int], int]] = None,
                         alpha: float = 0.05):
    """
    TODO

    Parameters
    ----------
    TODO

    Examples
    --------
    TODO
    """
    cond_set = to_list(cond_set)
    obs_samples = suffstat['obs_samples']
    iv_samples = suffstat[context]

    mat = combined_mat(obs_samples, iv_samples, i, cond_set)
    return hsic_test(mat, 0, 1, list(range(2, 2 + len(cond_set))), alpha=alpha)
Пример #5
0
def gauss_invariance_test(suffstat,
                          context,
                          i: int,
                          cond_set: Optional[Union[List[int], int]] = None,
                          alpha: float = 0.05,
                          new=True,
                          zero_mean=False,
                          zero_coeffs=False):
    """
    Test the null hypothesis that two Gaussian distributions are equal.

    Parameters
    ----------
    suffstat:
        dictionary containing:
        'obs' -- number of samples
            'G' -- Gram matrix
        'contexts'
    context:
        which context to test.
    i:
        position of marginal distribution.
    cond_set:
        positions of conditioning set in correlation matrix.
    alpha:
        Significance level.

    Return
    ------
    dictionary containing ttest_stat, ftest_stat, f_pvalue, t_pvalue, and reject.
    """
    cond_set = to_list(cond_set)
    obs_samples = suffstat['obs']['samples']
    iv_samples = suffstat['contexts'][context]['samples']
    n1, p = obs_samples.shape
    n2 = iv_samples.shape[0]

    # === FIND REGRESSION COEFFICIENTS AND RESIDUALS
    if len(cond_set) != 0:
        cond_ix = cond_set if zero_mean else [*cond_set, -1]
        gram1 = suffstat['obs']['G'][np.ix_(cond_ix, cond_ix)]
        gram2 = suffstat['contexts'][context]['G'][np.ix_(cond_ix, cond_ix)]
        coefs1 = np.linalg.inv(gram1) @ obs_samples[:,
                                                    cond_ix].T @ obs_samples[:,
                                                                             i]
        coefs2 = np.linalg.inv(gram2) @ iv_samples[:,
                                                   cond_ix].T @ iv_samples[:,
                                                                           i]

        residuals1 = obs_samples[:, i] - obs_samples[:, cond_ix] @ coefs1
        residuals2 = iv_samples[:, i] - iv_samples[:, cond_ix] @ coefs2
    elif not zero_mean:
        gram1 = n1 * np.ones([1, 1])
        gram2 = n2 * np.ones([1, 1])
        cond_ix = [-1]
        coefs1 = np.array([np.mean(obs_samples[:, i])]) if not zero_mean else 0
        coefs2 = np.array([np.mean(iv_samples[:, i])]) if not zero_mean else 0
        residuals1 = obs_samples[:, i] - coefs1
        residuals2 = iv_samples[:, i] - coefs2
    else:
        residuals1 = obs_samples[:, i]
        residuals2 = iv_samples[:, i]

    # means and variances of residuals
    var1, var2 = np.var(residuals1,
                        ddof=len(cond_ix)), np.var(residuals2,
                                                   ddof=len(cond_ix))

    # calculate regression coefficient invariance statistic
    if len(cond_ix) != 0:
        p = len(cond_ix)
        rc_stat = (coefs1 - coefs2) @ inv(var1 * inv(gram1) + var2 *
                                          inv(gram2)) @ (coefs1 - coefs2).T / p
        rc_pvalue = ncfdtr(p, n1 + n2 - p, 0, rc_stat)
        rc_pvalue = 2 * min(rc_pvalue, 1 - rc_pvalue)

    # calculate statistic for F-Test
    ftest_stat = var1 / var2
    f_pvalue = ncfdtr(n1 - 1, n2 - 1, 0, ftest_stat)
    f_pvalue = 2 * min(f_pvalue, 1 - f_pvalue)

    # === ACCEPT/REJECT INVARIANCE HYPOTHESIS BASED ON P-VALUES WITH BONFERRONI CORRECTION
    if len(cond_ix) != 0:
        reject = f_pvalue < alpha / 2 or rc_pvalue < alpha / 2
    else:
        reject = f_pvalue < alpha

    # === FORM RESULT DICT AND RETUR
    result_dict = dict(ftest_stat=ftest_stat, f_pvalue=f_pvalue, reject=reject)
    if len(cond_ix) > 0:
        result_dict['rc_stat'] = rc_stat
        result_dict['rc_pvalue'] = rc_pvalue

    return result_dict