def run_correlation_test(data_generator, test, test_choices, pval_assignment_method, permutations=None): """Run correlation tests. Inputs: data_generator - generator from correlation_row_generator, basically a list of tuples where each tuple contains two arrays. test - str, one of CORRELATION_TEST_CHOICES keys. test_choices - dict, CORRELATION_TEST_CHOICES. pval_assignment_method - str, one of CORRELATION_PVALUE_CHOICES. permutations - int or None, number of permutations to use for bootstrapped methods. """ corr_coefs, pvals = [], [] test_fn = test_choices[test] for otu_vals, md_vals in data_generator: r = test_fn(otu_vals, md_vals) if pval_assignment_method == 'bootstrapped': pval = assign_correlation_pval(r, len(otu_vals), pval_assignment_method, permutations, test_fn, otu_vals, md_vals) else: pval = assign_correlation_pval(r, len(otu_vals), pval_assignment_method) corr_coefs.append(r) pvals.append(pval) return corr_coefs, pvals
def test_run_grouped_correlation(self): """Test that grouped correlation values are calculated as expected.""" # # hand calculation of spearman and pearson for 01 # md_g1 = array([6.1, 0.0, 14.2, 6.5, 21]) # md_g2 = array([.3, 9.1, .8, 5.0, 11]) # o1_g1 = array([22, 48, 34, 0, 0]) # o1_g2 = array([0, 15, 0, 76, 74]) # c1_g1 = -0.6155870112510925 #spearman(md_g1, o1_g1) # c2_g2 = 0.66688592885535025 #spearman(md_g2, o1_g2) # #fisher_population_correlation([-0.6155870112510925, # # 0.66688592885535025], [5,5]) # fpc, h = (0.043595171909468329, 0.12776325359984511) g1_rhos = [corrcoef(self.otus1[0][i], self.mds1[0])[0][1] for i in range(10)] g2_rhos = [corrcoef(self.otus1[1][i], self.mds1[1])[0][1] for i in range(10)] exp_rhos = [g1_rhos, g2_rhos] g1_pvals = [assign_correlation_pval(g1_rhos[i], 5, 'parametric_t_distribution') for i in range(10)] g2_pvals = [assign_correlation_pval(g2_rhos[i], 5, 'parametric_t_distribution') for i in range(10)] exp_pvals = [g1_pvals, g2_pvals] exp_f_pvals = [fisher([g1_pvals[i], g2_pvals[i]]) for i in range(10)] tmp = [fisher_population_correlation([g1_rhos[i], g2_rhos[i]], [5,5]) for i in range(10)] exp_f_rhos = [x[0] for x in tmp] exp_f_hs = [x[1] for x in tmp] obs_rhos, obs_pvals, obs_f_pvals, obs_f_rhos, obs_f_hs = \ run_grouped_correlation(self.mds1, self.otus1, 'pearson', CORRELATION_TEST_CHOICES, 'parametric_t_distribution') self.assertFloatEqual(obs_rhos, exp_rhos) self.assertFloatEqual(obs_pvals, exp_pvals) self.assertFloatEqual(obs_f_pvals, exp_f_pvals) self.assertFloatEqual(obs_f_rhos, exp_f_rhos) self.assertFloatEqual(obs_f_hs, exp_f_hs)
def run_grouped_correlation(md_vals, otu_arrays, test, test_choices, pval_assignment_method, permutations=None): """Run grouped correlation test This function runs the grouped correlation test. Briefly, it ingests the metadata values, the arrays of otu values that are to be correlated with them, and the test and pvalue assignment method to use. It calculates the individual correlation coefficients for each group (specified implicitly by the grouping and ordering of md_vals and otu_arrays) and then it combines the corrcoeffs and the pvalues with methods by Fisher. Inputs: md_vals - list of 1d arrays, continuous metadata to be correlated. otu_arrays - list of 1d, otu abundances to be correlated. test - str, one of CORRELATION_TEST_CHOICES keys. test_choices - dict, CORRELATION_TEST_CHOICES. pval_assignment_method - str, one of CORRELATION_PVALUE_CHOICES. permutations - int or None, number of permutations to use for bootstrapped methods. """ test_fn = test_choices[test] sample_sizes = map(len, md_vals) def _rho(otu_vals, md_vals): return test_fn(otu_vals, md_vals) # find the correlations. rhos is list of 1D arrays. rhos = [] for i in range(len(md_vals)): rhos.append(apply_along_axis(_rho, 1, otu_arrays[i], md_vals[i])) pvals = [] for i, group_rhos in enumerate(rhos): pvals_i = zeros(len(group_rhos)) for j, rho in enumerate(group_rhos): pvals_i[j] = assign_correlation_pval(rho, sample_sizes[i], pval_assignment_method, permutations, test_fn, otu_arrays[i][j], md_vals[i]) pvals.append(array(pvals_i)) # calculate combined stats fisher_pvals = apply_along_axis(fisher, 0, array(pvals)) fisher_rho_and_h = apply_along_axis(fisher_population_correlation, 0, array(rhos), sample_sizes) return (rhos, pvals, fisher_pvals, fisher_rho_and_h[0], fisher_rho_and_h[1])
def run_grouped_correlation(md_vals, otu_arrays, test, test_choices, pval_assignment_method, permutations=None): """Run grouped correlation test This function runs the grouped correlation test. Briefly, it ingests the metadata values, the arrays of otu values that are to be correlated with them, and the test and pvalue assignment method to use. It calculates the individual correlation coefficients for each group (specified implicitly by the grouping and ordering of md_vals and otu_arrays) and then it combines the corrcoeffs and the pvalues with methods by Fisher. Inputs: md_vals - list of 1d arrays, continuous metadata to be correlated. otu_arrays - list of 1d, otu abundances to be correlated. test - str, one of CORRELATION_TEST_CHOICES keys. test_choices - dict, CORRELATION_TEST_CHOICES. pval_assignment_method - str, one of CORRELATION_PVALUE_CHOICES. permutations - int or None, number of permutations to use for bootstrapped methods. """ test_fn = test_choices[test] sample_sizes = map(len, md_vals) def _rho(otu_vals, md_vals): return test_fn(otu_vals, md_vals) # find the correlations. rhos is list of 1D arrays. rhos = [] for i in range(len(md_vals)): rhos.append(apply_along_axis(_rho, 1, otu_arrays[i], md_vals[i])) pvals = [] for i, group_rhos in enumerate(rhos): pvals_i = zeros(len(group_rhos)) for j, rho in enumerate(group_rhos): pvals_i[j] = assign_correlation_pval(rho, sample_sizes[i], pval_assignment_method, permutations, test_fn, otu_arrays[ i][j], md_vals[i]) pvals.append(array(pvals_i)) # calculate combined stats fisher_pvals = apply_along_axis(fisher, 0, array(pvals)) fisher_rho_and_h = apply_along_axis(fisher_population_correlation, 0, array(rhos), sample_sizes) return ( (rhos, pvals, fisher_pvals, fisher_rho_and_h[0], fisher_rho_and_h[1]) )
def naive_cc_tool(bt, corr_method, pval_assignment_method, cval_fp, pval_fp): '''Calculate co-occurence using naive approach. Inputs: bt - biom table with OTUs to be correlated. corr_method - str, correlation statistics to use, one of pearson, spearmans_rho, or kendalls_tau. pval_assignment_method - str, one of parametric_t_distribution, fisher_z_transform, bootstrapped, kendall. ''' data = array([bt.observationData(i) for i in bt.ObservationIds]) r,c = data.shape ccs = zeros((r,r)) ps = zeros((r,r)) test_fn = CORRELATION_TEST_CHOICES[corr_method] for o1 in range(r): for o2 in range(o1+1,r): cc = test_fn(data[o1], data[o2]) ccs[o1][o2] = cc # assign correlation pvalues if pval_assignment_method == 'None': ps[o1][o2] = 1.0 else: pval = assign_correlation_pval(cc, len(data[o1]), pval_assignment_method, permutations=1000, perm_test_fn=test_fn, v1=data[o1], v2=data[o2]) ps[o1][o2] = pval # write values header = '#OTU ID\t'+'\t'.join(bt.ObservationIds) clines = [header]+[bt.ObservationIds[i]+'\t'+'\t'.join(map(str,ccs[i])) \ for i in range(r)] plines = [header]+[bt.ObservationIds[i]+'\t'+'\t'.join(map(str,ps[i])) \ for i in range(r)] o = open(cval_fp, 'w') o.writelines('\n'.join(clines)) o.close() o = open(pval_fp, 'w') o.writelines('\n'.join(plines)) o.close()