示例#1
0
def run_correlation_test(data_generator,
                         test,
                         test_choices,
                         pval_assignment_method,
                         permutations=None):
    """Run correlation tests.

    Inputs:
     data_generator - generator from correlation_row_generator, basically a list
      of tuples where each tuple contains two arrays.
     test - str, one of CORRELATION_TEST_CHOICES keys.
     test_choices - dict, CORRELATION_TEST_CHOICES. 
     pval_assignment_method - str, one of CORRELATION_PVALUE_CHOICES. 
     permutations - int or None, number of permutations to use for bootstrapped
      methods.
    """
    corr_coefs, pvals = [], []
    test_fn = test_choices[test]
    for otu_vals, md_vals in data_generator:
        r = test_fn(otu_vals, md_vals)
        if pval_assignment_method == 'bootstrapped':
            pval = assign_correlation_pval(r, len(otu_vals),
                                           pval_assignment_method,
                                           permutations, test_fn, otu_vals,
                                           md_vals)
        else:
            pval = assign_correlation_pval(r, len(otu_vals),
                                           pval_assignment_method)
        corr_coefs.append(r)
        pvals.append(pval)
    return corr_coefs, pvals
示例#2
0
def run_correlation_test(data_generator, test, test_choices, 
    pval_assignment_method, permutations=None):
    """Run correlation tests.

    Inputs:
     data_generator - generator from correlation_row_generator, basically a list
      of tuples where each tuple contains two arrays.
     test - str, one of CORRELATION_TEST_CHOICES keys.
     test_choices - dict, CORRELATION_TEST_CHOICES. 
     pval_assignment_method - str, one of CORRELATION_PVALUE_CHOICES. 
     permutations - int or None, number of permutations to use for bootstrapped
      methods.
    """
    corr_coefs, pvals = [], []
    test_fn = test_choices[test]
    for otu_vals, md_vals in data_generator:
        r = test_fn(otu_vals, md_vals)
        if pval_assignment_method == 'bootstrapped':
            pval = assign_correlation_pval(r, len(otu_vals), 
                pval_assignment_method, permutations, test_fn, otu_vals, 
                md_vals)
        else:
            pval = assign_correlation_pval(r, len(otu_vals), 
                pval_assignment_method)
        corr_coefs.append(r)
        pvals.append(pval)
    return corr_coefs, pvals
示例#3
0
    def test_run_grouped_correlation(self):
        """Test that grouped correlation values are calculated as expected."""
        # # hand calculation of spearman and pearson for 01
        # md_g1 = array([6.1, 0.0, 14.2, 6.5, 21])
        # md_g2 = array([.3, 9.1, .8, 5.0, 11])
        # o1_g1 = array([22, 48, 34, 0, 0])
        # o1_g2 = array([0, 15, 0, 76, 74])
        # c1_g1 = -0.6155870112510925 #spearman(md_g1, o1_g1)
        # c2_g2 = 0.66688592885535025 #spearman(md_g2, o1_g2)
        # #fisher_population_correlation([-0.6155870112510925, 
        # #    0.66688592885535025], [5,5])
        # fpc, h = (0.043595171909468329, 0.12776325359984511) 
        g1_rhos = [corrcoef(self.otus1[0][i], self.mds1[0])[0][1] for i in range(10)]
        g2_rhos = [corrcoef(self.otus1[1][i], self.mds1[1])[0][1] for i in range(10)]
        exp_rhos = [g1_rhos, g2_rhos]
        g1_pvals = [assign_correlation_pval(g1_rhos[i], 5,
            'parametric_t_distribution') for i in range(10)]
        g2_pvals = [assign_correlation_pval(g2_rhos[i], 5,
            'parametric_t_distribution') for i in range(10)]
        exp_pvals = [g1_pvals, g2_pvals]
        exp_f_pvals = [fisher([g1_pvals[i], g2_pvals[i]]) for i in range(10)]

        tmp = [fisher_population_correlation([g1_rhos[i], g2_rhos[i]], [5,5])
            for i in range(10)]
        exp_f_rhos = [x[0] for x in tmp]
        exp_f_hs = [x[1] for x in tmp]

        obs_rhos, obs_pvals, obs_f_pvals, obs_f_rhos, obs_f_hs = \
            run_grouped_correlation(self.mds1, self.otus1, 'pearson', 
                CORRELATION_TEST_CHOICES, 'parametric_t_distribution')

        self.assertFloatEqual(obs_rhos, exp_rhos)
        self.assertFloatEqual(obs_pvals, exp_pvals)
        self.assertFloatEqual(obs_f_pvals, exp_f_pvals)
        self.assertFloatEqual(obs_f_rhos, exp_f_rhos)
        self.assertFloatEqual(obs_f_hs, exp_f_hs)
示例#4
0
def run_grouped_correlation(md_vals,
                            otu_arrays,
                            test,
                            test_choices,
                            pval_assignment_method,
                            permutations=None):
    """Run grouped correlation test

    This function runs the grouped correlation test. Briefly, it ingests the
    metadata values, the arrays of otu values that are to be correlated with 
    them, and the test and pvalue assignment method to use. It calculates the 
    individual correlation coefficients for each group (specified implicitly 
    by the grouping and ordering of md_vals and otu_arrays) and then it combines
    the corrcoeffs and the pvalues with methods by Fisher.
    Inputs:
     md_vals - list of 1d arrays, continuous metadata to be correlated.
     otu_arrays - list of 1d, otu abundances to be correlated. 
     test - str, one of CORRELATION_TEST_CHOICES keys.
     test_choices - dict, CORRELATION_TEST_CHOICES. 
     pval_assignment_method - str, one of CORRELATION_PVALUE_CHOICES. 
     permutations - int or None, number of permutations to use for bootstrapped
      methods.
    """
    test_fn = test_choices[test]
    sample_sizes = map(len, md_vals)

    def _rho(otu_vals, md_vals):
        return test_fn(otu_vals, md_vals)

    # find the correlations. rhos is list of 1D arrays.
    rhos = []
    for i in range(len(md_vals)):
        rhos.append(apply_along_axis(_rho, 1, otu_arrays[i], md_vals[i]))
    pvals = []
    for i, group_rhos in enumerate(rhos):
        pvals_i = zeros(len(group_rhos))
        for j, rho in enumerate(group_rhos):
            pvals_i[j] = assign_correlation_pval(rho, sample_sizes[i],
                                                 pval_assignment_method,
                                                 permutations, test_fn,
                                                 otu_arrays[i][j], md_vals[i])
        pvals.append(array(pvals_i))
    # calculate combined stats
    fisher_pvals = apply_along_axis(fisher, 0, array(pvals))
    fisher_rho_and_h = apply_along_axis(fisher_population_correlation, 0,
                                        array(rhos), sample_sizes)
    return (rhos, pvals, fisher_pvals, fisher_rho_and_h[0],
            fisher_rho_and_h[1])
示例#5
0
def run_grouped_correlation(md_vals, otu_arrays, test, test_choices,
                            pval_assignment_method, permutations=None):
    """Run grouped correlation test

    This function runs the grouped correlation test. Briefly, it ingests the
    metadata values, the arrays of otu values that are to be correlated with
    them, and the test and pvalue assignment method to use. It calculates the
    individual correlation coefficients for each group (specified implicitly
    by the grouping and ordering of md_vals and otu_arrays) and then it combines
    the corrcoeffs and the pvalues with methods by Fisher.
    Inputs:
     md_vals - list of 1d arrays, continuous metadata to be correlated.
     otu_arrays - list of 1d, otu abundances to be correlated.
     test - str, one of CORRELATION_TEST_CHOICES keys.
     test_choices - dict, CORRELATION_TEST_CHOICES.
     pval_assignment_method - str, one of CORRELATION_PVALUE_CHOICES.
     permutations - int or None, number of permutations to use for bootstrapped
      methods.
    """
    test_fn = test_choices[test]
    sample_sizes = map(len, md_vals)

    def _rho(otu_vals, md_vals):
        return test_fn(otu_vals, md_vals)
    # find the correlations. rhos is list of 1D arrays.
    rhos = []
    for i in range(len(md_vals)):
        rhos.append(apply_along_axis(_rho, 1, otu_arrays[i], md_vals[i]))
    pvals = []
    for i, group_rhos in enumerate(rhos):
        pvals_i = zeros(len(group_rhos))
        for j, rho in enumerate(group_rhos):
            pvals_i[j] = assign_correlation_pval(rho, sample_sizes[i],
                                                 pval_assignment_method, permutations, test_fn, otu_arrays[
                                                     i][j],
                                                 md_vals[i])
        pvals.append(array(pvals_i))
    # calculate combined stats
    fisher_pvals = apply_along_axis(fisher, 0, array(pvals))
    fisher_rho_and_h = apply_along_axis(fisher_population_correlation, 0,
                                        array(rhos), sample_sizes)
    return (
        (rhos, pvals, fisher_pvals, fisher_rho_and_h[0], fisher_rho_and_h[1])
    )
示例#6
0
def naive_cc_tool(bt, corr_method, pval_assignment_method, cval_fp, pval_fp):
    '''Calculate co-occurence using naive approach.

    Inputs:
     bt - biom table with OTUs to be correlated.
     corr_method - str, correlation statistics to use, one of pearson, 
     spearmans_rho, or kendalls_tau.
     pval_assignment_method - str, one of parametric_t_distribution, 
     fisher_z_transform, bootstrapped, kendall.
    '''
    data = array([bt.observationData(i) for i in bt.ObservationIds])
    r,c = data.shape
    ccs = zeros((r,r))
    ps = zeros((r,r))
    test_fn = CORRELATION_TEST_CHOICES[corr_method]
    for o1 in range(r):
        for o2 in range(o1+1,r):
            cc = test_fn(data[o1], data[o2])
            ccs[o1][o2] = cc
            # assign correlation pvalues
            if pval_assignment_method == 'None':
                ps[o1][o2] = 1.0
            else:
                pval = assign_correlation_pval(cc, len(data[o1]), 
                    pval_assignment_method, permutations=1000, 
                    perm_test_fn=test_fn, v1=data[o1], v2=data[o2])
                ps[o1][o2] = pval
    # write values
    header = '#OTU ID\t'+'\t'.join(bt.ObservationIds)
    clines = [header]+[bt.ObservationIds[i]+'\t'+'\t'.join(map(str,ccs[i])) \
        for i in range(r)]
    plines = [header]+[bt.ObservationIds[i]+'\t'+'\t'.join(map(str,ps[i])) \
        for i in range(r)]
    o = open(cval_fp, 'w')
    o.writelines('\n'.join(clines))
    o.close()
    o = open(pval_fp, 'w')
    o.writelines('\n'.join(plines))
    o.close()