def _run_simulation(X: np.ndarray, Y: np.ndarray, corr_func, method: str, alpha: float, random_seed: int) -> str: np.random.seed(random_seed) (X_A, Y_A), _, _, (X_D, Y_D) = sample_submatrices(X, Y) lower, upper = corr_ci(corr_func, X_A, Y_A, method, alpha=alpha) corr_D = corr_func(X_D, Y_D) if corr_D < lower: # The CI is too high return 'too_high' elif lower <= corr_D <= upper: return 'contains' else: # The CI is too low return 'too_low'
def test_corr_ci(self): # Regression test np.random.seed(3) X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) Y = np.array([[5, 2, 7], [1, 7, 3], [4, 2, 2]]) corr_func = functools.partial(global_corr, pearsonr) # Make sure we get the same result going through bootstrap_ci and corr_ci expected_lower, expected_upper = bootstrap_ci(corr_func, X, Y, bootstrap_system_sample) lower, upper = corr_ci(corr_func, X, Y, 'bootstrap-system') self.assertAlmostEqual(lower, expected_lower, places=4) self.assertAlmostEqual(upper, expected_upper, places=4) expected_lower, expected_upper = bootstrap_ci(corr_func, X, Y, bootstrap_input_sample) lower, upper = corr_ci(corr_func, X, Y, 'bootstrap-input') self.assertAlmostEqual(lower, expected_lower, places=4) self.assertAlmostEqual(upper, expected_upper, places=4) expected_lower, expected_upper = bootstrap_ci(corr_func, X, Y, bootstrap_both_sample) lower, upper = corr_ci(corr_func, X, Y, 'bootstrap-both') self.assertAlmostEqual(lower, expected_lower, places=4) self.assertAlmostEqual(upper, expected_upper, places=4) # If we do a single tail, the result should be the same with alpha / 2 expected_lower, expected_upper = bootstrap_ci(corr_func, X, Y, bootstrap_system_sample) lower, upper = corr_ci(corr_func, X, Y, 'bootstrap-system', alpha=0.025, two_tailed=False) self.assertAlmostEqual(lower, expected_lower, places=4) self.assertAlmostEqual(upper, expected_upper, places=4) # None cases assert corr_ci(corr_func, X, Y, None) == (None, None) assert corr_ci(corr_func, X, Y, 'none') == (None, None) with self.assertRaises(Exception): corr_ci(corr_func, X, Y, 'does-not-exist')
def compute_system_level_correlations(X: np.ndarray, Y: np.ndarray, ci_method: str, alpha: float, two_tailed: bool, ci_kwargs: Dict = None) -> Dict: ci_kwargs = ci_kwargs or {} pearson_kwargs, spearman_kwargs, kendall_kwargs = _split_correlation_kwargs( ci_kwargs) pearson = functools.partial(system_level_corr, pearsonr) spearman = functools.partial(system_level_corr, spearmanr) kendall = functools.partial(system_level_corr, kendalltau) r, r_pvalue = pearson(X, Y, return_pvalue=True) r_lower, r_upper = corr_ci(pearson, X, Y, ci_method, alpha, two_tailed, kwargs=pearson_kwargs) rho, rho_pvalue = spearman(X, Y, return_pvalue=True) rho_lower, rho_upper = corr_ci(spearman, X, Y, ci_method, alpha, two_tailed, kwargs=spearman_kwargs) tau, tau_pvalue = kendall(X, Y, return_pvalue=True) tau_lower, tau_upper = corr_ci(kendall, X, Y, ci_method, alpha, two_tailed, kwargs=kendall_kwargs) num_summarizers, num_instances = X.shape return { 'num_summarizers': num_summarizers, 'num_instances': num_instances, 'ci_method': ci_method, 'alpha': alpha, 'two_tailed': two_tailed, 'pearson': { 'r': r, 'p_value': r_pvalue, 'lower': r_lower, 'upper': r_upper }, 'spearman': { 'rho': rho, 'p_value': rho_pvalue, 'lower': rho_lower, 'upper': rho_upper }, 'kendall': { 'tau': tau, 'p_value': tau_pvalue, 'lower': tau_lower, 'upper': tau_upper }, }
def compute_global_correlations(X: np.ndarray, Y: np.ndarray, ci_method: str, alpha: float, two_tailed: bool, ci_kwargs: Dict = None) -> Dict: ci_kwargs = ci_kwargs or {} pearson_kwargs, spearman_kwargs, kendall_kwargs = _split_correlation_kwargs( ci_kwargs) pearson = functools.partial(global_corr, pearsonr) spearman = functools.partial(global_corr, spearmanr) kendall = functools.partial(global_corr, kendalltau) r, r_pvalue = pearson(X, Y, return_pvalue=True) r_lower, r_upper = corr_ci(pearson, X, Y, ci_method, alpha, two_tailed, kwargs=pearson_kwargs) rho, rho_pvalue = spearman(X, Y, return_pvalue=True) rho_lower, rho_upper = corr_ci(spearman, X, Y, ci_method, alpha, two_tailed, kwargs=spearman_kwargs) tau, tau_pvalue = kendall(X, Y, return_pvalue=True) tau_lower, tau_upper = corr_ci(kendall, X, Y, ci_method, alpha, two_tailed, kwargs=kendall_kwargs) num_summaries = int((~np.isnan(X)).sum()) # number of non-NaN scores return { 'num_summaries': num_summaries, 'ci_method': ci_method, 'alpha': alpha, 'two_tailed': two_tailed, 'pearson': { 'r': r, 'p_value': r_pvalue, 'lower': r_lower, 'upper': r_upper }, 'spearman': { 'rho': rho, 'p_value': rho_pvalue, 'lower': rho_lower, 'upper': rho_upper }, 'kendall': { 'tau': tau, 'p_value': tau_pvalue, 'lower': tau_lower, 'upper': tau_upper }, }
def compute_summary_level_correlations(X: np.ndarray, Y: np.ndarray, ci_method: str, alpha: float, two_tailed: bool, ci_kwargs: Dict = None) -> Dict: ci_kwargs = ci_kwargs or {} pearson_kwargs, spearman_kwargs, kendall_kwargs = _split_correlation_kwargs( ci_kwargs) pearson = functools.partial(summary_level_corr, pearsonr) spearman = functools.partial(summary_level_corr, spearmanr) kendall = functools.partial(summary_level_corr, kendalltau) r, r_groups = pearson(X, Y, return_num_instances=True) r_lower, r_upper = corr_ci(pearson, X, Y, ci_method, alpha, two_tailed, kwargs=pearson_kwargs) rho, rho_groups = spearman(X, Y, return_num_instances=True) rho_lower, rho_upper = corr_ci(spearman, X, Y, ci_method, alpha, two_tailed, kwargs=spearman_kwargs) tau, tau_groups = kendall(X, Y, return_num_instances=True) tau_lower, tau_upper = corr_ci(kendall, X, Y, ci_method, alpha, two_tailed, kwargs=kendall_kwargs) assert r_groups == rho_groups == tau_groups return { 'num_summary_groups': r_groups, 'ci_method': ci_method, 'alpha': alpha, 'two_tailed': two_tailed, 'pearson': { 'r': r, 'lower': r_lower, 'upper': r_upper, }, 'spearman': { 'rho': rho, 'lower': rho_lower, 'upper': rho_upper, }, 'kendall': { 'tau': tau, 'lower': tau_lower, 'upper': tau_upper, } }