示例#1
0
def _run_simulation(X: np.ndarray, Y: np.ndarray, corr_func, method: str,
                    alpha: float, random_seed: int) -> str:
    np.random.seed(random_seed)
    (X_A, Y_A), _, _, (X_D, Y_D) = sample_submatrices(X, Y)
    lower, upper = corr_ci(corr_func, X_A, Y_A, method, alpha=alpha)
    corr_D = corr_func(X_D, Y_D)
    if corr_D < lower:
        # The CI is too high
        return 'too_high'
    elif lower <= corr_D <= upper:
        return 'contains'
    else:
        # The CI is too low
        return 'too_low'
示例#2
0
    def test_corr_ci(self):
        # Regression test
        np.random.seed(3)
        X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
        Y = np.array([[5, 2, 7], [1, 7, 3], [4, 2, 2]])
        corr_func = functools.partial(global_corr, pearsonr)

        # Make sure we get the same result going through bootstrap_ci and corr_ci
        expected_lower, expected_upper = bootstrap_ci(corr_func, X, Y,
                                                      bootstrap_system_sample)
        lower, upper = corr_ci(corr_func, X, Y, 'bootstrap-system')
        self.assertAlmostEqual(lower, expected_lower, places=4)
        self.assertAlmostEqual(upper, expected_upper, places=4)

        expected_lower, expected_upper = bootstrap_ci(corr_func, X, Y,
                                                      bootstrap_input_sample)
        lower, upper = corr_ci(corr_func, X, Y, 'bootstrap-input')
        self.assertAlmostEqual(lower, expected_lower, places=4)
        self.assertAlmostEqual(upper, expected_upper, places=4)

        expected_lower, expected_upper = bootstrap_ci(corr_func, X, Y,
                                                      bootstrap_both_sample)
        lower, upper = corr_ci(corr_func, X, Y, 'bootstrap-both')
        self.assertAlmostEqual(lower, expected_lower, places=4)
        self.assertAlmostEqual(upper, expected_upper, places=4)

        # If we do a single tail, the result should be the same with alpha / 2
        expected_lower, expected_upper = bootstrap_ci(corr_func, X, Y,
                                                      bootstrap_system_sample)
        lower, upper = corr_ci(corr_func,
                               X,
                               Y,
                               'bootstrap-system',
                               alpha=0.025,
                               two_tailed=False)
        self.assertAlmostEqual(lower, expected_lower, places=4)
        self.assertAlmostEqual(upper, expected_upper, places=4)

        # None cases
        assert corr_ci(corr_func, X, Y, None) == (None, None)
        assert corr_ci(corr_func, X, Y, 'none') == (None, None)

        with self.assertRaises(Exception):
            corr_ci(corr_func, X, Y, 'does-not-exist')
示例#3
0
def compute_system_level_correlations(X: np.ndarray,
                                      Y: np.ndarray,
                                      ci_method: str,
                                      alpha: float,
                                      two_tailed: bool,
                                      ci_kwargs: Dict = None) -> Dict:
    ci_kwargs = ci_kwargs or {}
    pearson_kwargs, spearman_kwargs, kendall_kwargs = _split_correlation_kwargs(
        ci_kwargs)

    pearson = functools.partial(system_level_corr, pearsonr)
    spearman = functools.partial(system_level_corr, spearmanr)
    kendall = functools.partial(system_level_corr, kendalltau)

    r, r_pvalue = pearson(X, Y, return_pvalue=True)
    r_lower, r_upper = corr_ci(pearson,
                               X,
                               Y,
                               ci_method,
                               alpha,
                               two_tailed,
                               kwargs=pearson_kwargs)

    rho, rho_pvalue = spearman(X, Y, return_pvalue=True)
    rho_lower, rho_upper = corr_ci(spearman,
                                   X,
                                   Y,
                                   ci_method,
                                   alpha,
                                   two_tailed,
                                   kwargs=spearman_kwargs)

    tau, tau_pvalue = kendall(X, Y, return_pvalue=True)
    tau_lower, tau_upper = corr_ci(kendall,
                                   X,
                                   Y,
                                   ci_method,
                                   alpha,
                                   two_tailed,
                                   kwargs=kendall_kwargs)

    num_summarizers, num_instances = X.shape

    return {
        'num_summarizers': num_summarizers,
        'num_instances': num_instances,
        'ci_method': ci_method,
        'alpha': alpha,
        'two_tailed': two_tailed,
        'pearson': {
            'r': r,
            'p_value': r_pvalue,
            'lower': r_lower,
            'upper': r_upper
        },
        'spearman': {
            'rho': rho,
            'p_value': rho_pvalue,
            'lower': rho_lower,
            'upper': rho_upper
        },
        'kendall': {
            'tau': tau,
            'p_value': tau_pvalue,
            'lower': tau_lower,
            'upper': tau_upper
        },
    }
示例#4
0
def compute_global_correlations(X: np.ndarray,
                                Y: np.ndarray,
                                ci_method: str,
                                alpha: float,
                                two_tailed: bool,
                                ci_kwargs: Dict = None) -> Dict:
    ci_kwargs = ci_kwargs or {}
    pearson_kwargs, spearman_kwargs, kendall_kwargs = _split_correlation_kwargs(
        ci_kwargs)

    pearson = functools.partial(global_corr, pearsonr)
    spearman = functools.partial(global_corr, spearmanr)
    kendall = functools.partial(global_corr, kendalltau)

    r, r_pvalue = pearson(X, Y, return_pvalue=True)
    r_lower, r_upper = corr_ci(pearson,
                               X,
                               Y,
                               ci_method,
                               alpha,
                               two_tailed,
                               kwargs=pearson_kwargs)

    rho, rho_pvalue = spearman(X, Y, return_pvalue=True)
    rho_lower, rho_upper = corr_ci(spearman,
                                   X,
                                   Y,
                                   ci_method,
                                   alpha,
                                   two_tailed,
                                   kwargs=spearman_kwargs)

    tau, tau_pvalue = kendall(X, Y, return_pvalue=True)
    tau_lower, tau_upper = corr_ci(kendall,
                                   X,
                                   Y,
                                   ci_method,
                                   alpha,
                                   two_tailed,
                                   kwargs=kendall_kwargs)

    num_summaries = int((~np.isnan(X)).sum())  # number of non-NaN scores

    return {
        'num_summaries': num_summaries,
        'ci_method': ci_method,
        'alpha': alpha,
        'two_tailed': two_tailed,
        'pearson': {
            'r': r,
            'p_value': r_pvalue,
            'lower': r_lower,
            'upper': r_upper
        },
        'spearman': {
            'rho': rho,
            'p_value': rho_pvalue,
            'lower': rho_lower,
            'upper': rho_upper
        },
        'kendall': {
            'tau': tau,
            'p_value': tau_pvalue,
            'lower': tau_lower,
            'upper': tau_upper
        },
    }
示例#5
0
def compute_summary_level_correlations(X: np.ndarray,
                                       Y: np.ndarray,
                                       ci_method: str,
                                       alpha: float,
                                       two_tailed: bool,
                                       ci_kwargs: Dict = None) -> Dict:
    ci_kwargs = ci_kwargs or {}
    pearson_kwargs, spearman_kwargs, kendall_kwargs = _split_correlation_kwargs(
        ci_kwargs)

    pearson = functools.partial(summary_level_corr, pearsonr)
    spearman = functools.partial(summary_level_corr, spearmanr)
    kendall = functools.partial(summary_level_corr, kendalltau)

    r, r_groups = pearson(X, Y, return_num_instances=True)
    r_lower, r_upper = corr_ci(pearson,
                               X,
                               Y,
                               ci_method,
                               alpha,
                               two_tailed,
                               kwargs=pearson_kwargs)

    rho, rho_groups = spearman(X, Y, return_num_instances=True)
    rho_lower, rho_upper = corr_ci(spearman,
                                   X,
                                   Y,
                                   ci_method,
                                   alpha,
                                   two_tailed,
                                   kwargs=spearman_kwargs)

    tau, tau_groups = kendall(X, Y, return_num_instances=True)
    tau_lower, tau_upper = corr_ci(kendall,
                                   X,
                                   Y,
                                   ci_method,
                                   alpha,
                                   two_tailed,
                                   kwargs=kendall_kwargs)

    assert r_groups == rho_groups == tau_groups

    return {
        'num_summary_groups': r_groups,
        'ci_method': ci_method,
        'alpha': alpha,
        'two_tailed': two_tailed,
        'pearson': {
            'r': r,
            'lower': r_lower,
            'upper': r_upper,
        },
        'spearman': {
            'rho': rho,
            'lower': rho_lower,
            'upper': rho_upper,
        },
        'kendall': {
            'tau': tau,
            'lower': tau_lower,
            'upper': tau_upper,
        }
    }