Exemplo n.º 1
0
def run_correlation_test(data_generator, test, test_choices,
                         pval_assignment_method, permutations=None):
    """Run correlation tests.

    Inputs:
     data_generator - generator from correlation_row_generator, basically a list
      of tuples where each tuple contains two arrays.
     test - str, one of CORRELATION_TEST_CHOICES keys.
     test_choices - dict, CORRELATION_TEST_CHOICES.
     pval_assignment_method - str, one of CORRELATION_PVALUE_CHOICES.
     permutations - int or None, number of permutations to use for bootstrapped
      methods.
    """
    corr_coefs, pvals = [], []
    test_fn = test_choices[test]
    for otu_vals, md_vals in data_generator:
        r = test_fn(otu_vals, md_vals)
        if pval_assignment_method == 'bootstrapped':
            pval = assign_correlation_pval(r, len(otu_vals),
                                           pval_assignment_method, permutations, test_fn, otu_vals,
                                           md_vals)
        else:
            pval = assign_correlation_pval(r, len(otu_vals),
                                           pval_assignment_method)
        corr_coefs.append(r)
        pvals.append(pval)
    return corr_coefs, pvals
Exemplo n.º 2
0
def run_correlation_test(data_generator,
                         test,
                         test_choices,
                         pval_assignment_method,
                         permutations=None):
    """Run correlation tests.

    Inputs:
     data_generator - generator from correlation_row_generator, basically a list
      of tuples where each tuple contains two arrays.
     test - str, one of CORRELATION_TEST_CHOICES keys.
     test_choices - dict, CORRELATION_TEST_CHOICES.
     pval_assignment_method - str, one of CORRELATION_PVALUE_CHOICES.
     permutations - int or None, number of permutations to use for bootstrapped
      methods.
    """
    corr_coefs, pvals = [], []
    test_fn = test_choices[test]
    for otu_vals, md_vals in data_generator:
        r = test_fn(otu_vals, md_vals)
        if pval_assignment_method == 'bootstrapped':
            pval = assign_correlation_pval(r, len(otu_vals),
                                           pval_assignment_method,
                                           permutations, test_fn, otu_vals,
                                           md_vals)
        else:
            pval = assign_correlation_pval(r, len(otu_vals),
                                           pval_assignment_method)
        corr_coefs.append(r)
        pvals.append(pval)
    return corr_coefs, pvals
Exemplo n.º 3
0
def run_grouped_correlation(md_vals,
                            otu_arrays,
                            test,
                            test_choices,
                            pval_assignment_method,
                            permutations=None):
    """Run grouped correlation test

    This function runs the grouped correlation test. Briefly, it ingests the
    metadata values, the arrays of otu values that are to be correlated with
    them, and the test and pvalue assignment method to use. It calculates the
    individual correlation coefficients for each group (specified implicitly
    by the grouping and ordering of md_vals and otu_arrays) and then it combines
    the corrcoeffs and the pvalues with methods by Fisher.
    Inputs:
     md_vals - list of 1d arrays, continuous metadata to be correlated.
     otu_arrays - list of 1d, otu abundances to be correlated.
     test - str, one of CORRELATION_TEST_CHOICES keys.
     test_choices - dict, CORRELATION_TEST_CHOICES.
     pval_assignment_method - str, one of CORRELATION_PVALUE_CHOICES.
     permutations - int or None, number of permutations to use for bootstrapped
      methods.
    """
    test_fn = test_choices[test]
    sample_sizes = map(len, md_vals)

    def _rho(otu_vals, md_vals):
        return test_fn(otu_vals, md_vals)

    # find the correlations. rhos is list of 1D arrays.
    rhos = []
    for i in range(len(md_vals)):
        rhos.append(apply_along_axis(_rho, 1, otu_arrays[i], md_vals[i]))
    pvals = []
    for i, group_rhos in enumerate(rhos):
        pvals_i = zeros(len(group_rhos))
        for j, rho in enumerate(group_rhos):
            pvals_i[j] = assign_correlation_pval(rho, sample_sizes[i],
                                                 pval_assignment_method,
                                                 permutations, test_fn,
                                                 otu_arrays[i][j], md_vals[i])
        pvals.append(array(pvals_i))
    # calculate combined stats
    fisher_pvals = apply_along_axis(fisher, 0, array(pvals))
    fisher_rho_and_h = apply_along_axis(fisher_population_correlation, 0,
                                        array(rhos), sample_sizes)
    return ((rhos, pvals, fisher_pvals, fisher_rho_and_h[0],
             fisher_rho_and_h[1]))
Exemplo n.º 4
0
def run_grouped_correlation(md_vals, otu_arrays, test, test_choices,
                            pval_assignment_method, permutations=None):
    """Run grouped correlation test

    This function runs the grouped correlation test. Briefly, it ingests the
    metadata values, the arrays of otu values that are to be correlated with
    them, and the test and pvalue assignment method to use. It calculates the
    individual correlation coefficients for each group (specified implicitly
    by the grouping and ordering of md_vals and otu_arrays) and then it combines
    the corrcoeffs and the pvalues with methods by Fisher.
    Inputs:
     md_vals - list of 1d arrays, continuous metadata to be correlated.
     otu_arrays - list of 1d, otu abundances to be correlated.
     test - str, one of CORRELATION_TEST_CHOICES keys.
     test_choices - dict, CORRELATION_TEST_CHOICES.
     pval_assignment_method - str, one of CORRELATION_PVALUE_CHOICES.
     permutations - int or None, number of permutations to use for bootstrapped
      methods.
    """
    test_fn = test_choices[test]
    sample_sizes = map(len, md_vals)

    def _rho(otu_vals, md_vals):
        return test_fn(otu_vals, md_vals)
    # find the correlations. rhos is list of 1D arrays.
    rhos = []
    for i in range(len(md_vals)):
        rhos.append(apply_along_axis(_rho, 1, otu_arrays[i], md_vals[i]))
    pvals = []
    for i, group_rhos in enumerate(rhos):
        pvals_i = zeros(len(group_rhos))
        for j, rho in enumerate(group_rhos):
            pvals_i[j] = assign_correlation_pval(rho, sample_sizes[i],
                                                 pval_assignment_method, permutations, test_fn, otu_arrays[
                                                     i][j],
                                                 md_vals[i])
        pvals.append(array(pvals_i))
    # calculate combined stats
    fisher_pvals = apply_along_axis(fisher, 0, array(pvals))
    fisher_rho_and_h = apply_along_axis(fisher_population_correlation, 0,
                                        array(rhos), sample_sizes)
    return (
        (rhos, pvals, fisher_pvals, fisher_rho_and_h[0], fisher_rho_and_h[1])
    )
Exemplo n.º 5
0
def naive_cc_tool(bt, corr_method, pval_assignment_method, cval_fp, pval_fp):
    '''Calculate co-occurence using naive approach.

    Inputs:
     bt - biom table with OTUs to be correlated.
     corr_method - str, correlation statistics to use, one of pearson, 
     spearmans_rho, or kendalls_tau.
     pval_assignment_method - str, one of parametric_t_distribution, 
     fisher_z_transform, bootstrapped, kendall.
    '''
    data = array(
        [bt.data(i, axis='observation') for i in bt.ids(axis='observation')])
    r, c = data.shape
    ccs = zeros((r, r))
    ps = zeros((r, r))
    test_fn = CORRELATION_TEST_CHOICES[corr_method]
    for o1 in range(r):
        for o2 in range(o1 + 1, r):
            cc = test_fn(data[o1], data[o2])
            ccs[o1][o2] = cc
            # assign correlation pvalues
            if pval_assignment_method == 'None':
                ps[o1][o2] = 1.0
            else:
                pval = assign_correlation_pval(cc,
                                               len(data[o1]),
                                               pval_assignment_method,
                                               permutations=1000,
                                               perm_test_fn=test_fn,
                                               v1=data[o1],
                                               v2=data[o2])
                ps[o1][o2] = pval
    # write values
    header = '#OTU ID\t' + '\t'.join(bt.ids(axis='observation'))
    clines = [header]+[bt.ids(axis='observation')[i]+'\t'+'\t'.join(map(str,ccs[i])) \
        for i in range(r)]
    plines = [header]+[bt.ids(axis='observation')[i]+'\t'+'\t'.join(map(str,ps[i])) \
        for i in range(r)]
    o = open(cval_fp, 'w')
    o.writelines('\n'.join(clines))
    o.close()
    o = open(pval_fp, 'w')
    o.writelines('\n'.join(plines))
    o.close()
Exemplo n.º 6
0
def naive_cc_tool(bt, corr_method, pval_assignment_method, cval_fp, pval_fp):
    """Calculate co-occurence using naive approach.

    Inputs:
     bt - biom table with OTUs to be correlated.
     corr_method - str, correlation statistics to use, one of pearson, 
     spearmans_rho, or kendalls_tau.
     pval_assignment_method - str, one of parametric_t_distribution, 
     fisher_z_transform, bootstrapped, kendall.
    """
    data = array([bt.data(i, axis="observation") for i in bt.ids(axis="observation")])
    r, c = data.shape
    ccs = zeros((r, r))
    ps = zeros((r, r))
    test_fn = CORRELATION_TEST_CHOICES[corr_method]
    for o1 in range(r):
        for o2 in range(o1 + 1, r):
            cc = test_fn(data[o1], data[o2])
            ccs[o1][o2] = cc
            # assign correlation pvalues
            if pval_assignment_method == "None":
                ps[o1][o2] = 1.0
            else:
                pval = assign_correlation_pval(
                    cc,
                    len(data[o1]),
                    pval_assignment_method,
                    permutations=1000,
                    perm_test_fn=test_fn,
                    v1=data[o1],
                    v2=data[o2],
                )
                ps[o1][o2] = pval
    # write values
    header = "#OTU ID\t" + "\t".join(bt.ids(axis="observation"))
    clines = [header] + [bt.ids(axis="observation")[i] + "\t" + "\t".join(map(str, ccs[i])) for i in range(r)]
    plines = [header] + [bt.ids(axis="observation")[i] + "\t" + "\t".join(map(str, ps[i])) for i in range(r)]
    o = open(cval_fp, "w")
    o.writelines("\n".join(clines))
    o.close()
    o = open(pval_fp, "w")
    o.writelines("\n".join(plines))
    o.close()
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    if opts.test == 'cscore' and opts.pval_assignment_method != 'bootstrapped':
        option_parser.error(cscore_error_text)

    bt = load_table(opts.otu_table_fp)
    pmf, _ = parse_mapping_file_to_dict(opts.mapping_fp)

    samples_to_correlate = []
    md_values_to_correlate = []
    bt_sample_ids = bt.ids(axis='sample')

    for sample_id, sample_md in pmf.items():
        if sample_id in bt_sample_ids:
            try:
                v = is_computable_float(sample_md[opts.category])
                samples_to_correlate.append(sample_id)
                md_values_to_correlate.append(v)
            except KeyError:
                option_parser.error('The category (%s)' % opts.category +
                                    ' was not found in the mapping file.')
            except ValueError:
                pass  # value couldn't be converted to float, ignore this sample
        else:
            pass  # sample in mf, but not bt

    # remove samples which are not found in the mapping file or do not have
    # metadata that converts to float
    bt.filter(ids_to_keep=samples_to_correlate, axis='sample')

    # sort the biom table so that feature values are retrieved in the same
    # order as the metadata in the samples they correspond to
    bt = bt.sort(sort_f=lambda _: samples_to_correlate, axis='sample')

    if bt.shape[1] <= 3:
        option_parser.error(filtration_error_text)

    rhos = []
    pvals = []
    for feature_vector in bt.iter_data(axis='observation'):
        rho = correlate(feature_vector,
                        md_values_to_correlate,
                        method=opts.test)
        pval = assign_correlation_pval(rho, len(feature_vector),
                                       method=opts.pval_assignment_method,
                                       permutations=opts.permutations,
                                       perm_test_fn=\
                                            bootstrap_functions[opts.test],
                                       v1=feature_vector,
                                       v2=md_values_to_correlate)
        rhos.append(rho)
        pvals.append(pval)

    fdr_pvals = benjamini_hochberg_step_down(pvals)
    bon_pvals = bonferroni_correction(pvals)
    # correct for cases where values above 1.0 due to correction
    fdr_pvals = where(array(fdr_pvals) > 1.0, 1.0, fdr_pvals)
    bon_pvals = where(array(bon_pvals) > 1.0, 1.0, bon_pvals)

    lines = correlate_output_formatter(bt, rhos, pvals, fdr_pvals, bon_pvals,
                                       opts.metadata_key)
    lines = sort_by_pval(lines, ind=2)

    o = open(opts.output_fp, 'w')
    o.writelines('\n'.join(lines))
    o.write('\n')
    o.close()
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    if opts.test == 'cscore' and opts.pval_assignment_method != 'bootstrapped':
        option_parser.error(cscore_error_text)

    bt = load_table(opts.otu_table_fp)
    pmf, _ = parse_mapping_file_to_dict(opts.mapping_fp)

    samples_to_correlate = []
    md_values_to_correlate = []
    bt_sample_ids = bt.ids(axis='sample')

    for sample_id, sample_md in pmf.items():
        if sample_id in bt_sample_ids:
            try:
                v = is_computable_float(sample_md[opts.category])
                samples_to_correlate.append(sample_id)
                md_values_to_correlate.append(v)
            except KeyError:
                option_parser.error('The category (%s)' % opts.category +
                    ' was not found in the mapping file.')
            except ValueError:
                pass  # value couldn't be converted to float, ignore this sample
        else:
            pass  # sample in mf, but not bt

    # remove samples which are not found in the mapping file or do not have
    # metadata that converts to float
    bt.filter(ids_to_keep = samples_to_correlate, axis='sample')

    # sort the biom table so that feature values are retrieved in the same
    # order as the metadata in the samples they correspond to
    bt.sort(sort_f = lambda _: samples_to_correlate, axis='sample')

    if bt.shape[1] <= 3:
        option_parser.error(filtration_error_text)

    rhos = []
    pvals = []
    for feature_vector in bt.iter_data(axis='observation'):
        rho = correlate(feature_vector, md_values_to_correlate,
                        method=opts.test)
        pval = assign_correlation_pval(rho, len(feature_vector),
                                       method=opts.pval_assignment_method,
                                       permutations=opts.permutations,
                                       perm_test_fn=\
                                            bootstrap_functions[opts.test],
                                       v1=feature_vector,
                                       v2=md_values_to_correlate)
        rhos.append(rho)
        pvals.append(pval)

    fdr_pvals = benjamini_hochberg_step_down(pvals)
    bon_pvals = bonferroni_correction(pvals)
    # correct for cases where values above 1.0 due to correction
    fdr_pvals = where(array(fdr_pvals) > 1.0, 1.0, fdr_pvals)
    bon_pvals = where(array(bon_pvals) > 1.0, 1.0, bon_pvals)

    lines = correlate_output_formatter(bt, rhos, pvals, fdr_pvals,
                                       bon_pvals, opts.metadata_key)
    lines = sort_by_pval(lines, ind=2)

    o = open(opts.output_fp, 'w')
    o.writelines('\n'.join(lines))
    o.write('\n')
    o.close()