示例#1
0
 def test_get_taxa_prevalence(self):
     otu_table = np.array([[2, 0, 0, 1], [1, 1, 1, 1], [0, 0, 0, 0]], float)
     sample_weights = [3, 1, 1, 2]
     res = bp.get_taxa_prevalence(otu_table)
     # print res
     # self.assertFloatEqual(res, np.array([(2/3) + 1/2, 1/3+1+1+1/2, 0])/4)
     assert_almost_equal(res, np.array([(2/3) + 1/2, 1/3+1+1+1/2, 0])/4\
         * 4/(2.5+1/3))
     otu_table = np.array([[2, 0, 0, 1], [1, 1, 1, 1], [0, 2, 2, 1]], float)
     res = bp.get_taxa_prevalence(otu_table)
     # print res
     # self.assertFloatEqual(res, np.array([3,4,5])/12) # if no normalize
     assert_almost_equal(res, [0, .5, 1])
示例#2
0
def preprocess_otu_table(otu_sample_ids,
                         otu_table,
                         lineages,
                         coords_data,
                         coords_headers,
                         N=0):
    """Preprocess the OTU table to to generate the required data for the biplots

    Input:
    otu_sample_ids: sample identifiers for the otu_table
    otu_table: contingency table
    lineages: taxonomic assignments for the OTUs in the otu_table
    coords_data: principal coordinates data where the taxa will be mapped
    N: number of most prevalent taxa to keep, by default will use all

    Output:
    otu_coords: coordinates representing the N most prevalent taxa in otu_table
    otu_table: N most prevalent OTUs from the input otu_table
    otu_lineages: taxonomic assignments corresponding to the N most prevalent
    OTUs
    otu_prevalence: vector with the prevalence scores of the N highest values
    lines: coords where the N most prevalent taxa will be positioned in the
    biplot
    """

    # return empty values if any of the taxa data is empty
    if (otu_sample_ids == []) or (otu_table == array([])) or (lineages == []):
        return [], [], [], [], ''

    # this means there's only one or fewer rows in the contingency table
    if len(otu_table) <= 1 or len(lineages) <= 1:
        raise EmperorUnsupportedComputation, "Biplots are not supported for "+\
            "contingency tables with one or fewer rows"

    # if this element is a list take the first headers and coordinates
    # both of these will be the master coordinates, i. e. where data is centered
    if type(coords_data) == list and type(coords_headers) == list:
        coords_data = coords_data[0]
        coords_headers = coords_headers[0]

    # re-arrange the otu table so it matches the order of the samples in the
    # coordinates data & remove any sample that is not in the coordinates header
    otu_sample_ids, otu_table = sort_taxa_table_by_pcoa_coords(
        coords_headers, otu_table, otu_sample_ids)

    # retrieve the prevalence and the coords prior the filtering
    prevalence = get_taxa_prevalence(otu_table)
    bi_plot_coords = get_taxa_coords(otu_table, coords_data)

    o_otu_coords, o_otu_table, o_otu_lineages, o_prevalence =\
        extract_taxa_data(bi_plot_coords, otu_table, lineages, prevalence, N)

    lines = '\n'.join(
        make_biplot_scores_output({
            'coord': o_otu_coords,
            'lineages': o_otu_lineages
        }))

    return o_otu_coords, o_otu_table, o_otu_lineages, o_prevalence, lines
 def test_get_taxa_prevalence(self):
     otu_table = np.array([  [2,0,0,1],
                             [1,1,1,1],
                             [0,0,0,0]],float)
     sample_weights = [3,1,1,2]
     res = bp.get_taxa_prevalence(otu_table)
     # print res
     # self.assertFloatEqual(res, np.array([(2/3) + 1/2, 1/3+1+1+1/2, 0])/4) 
     assert_almost_equal(res, np.array([(2/3) + 1/2, 1/3+1+1+1/2, 0])/4\
         * 4/(2.5+1/3))                    
     otu_table = np.array([  [2,0,0,1],
                             [1,1,1,1],
                             [0,2,2,1]],float)
     res = bp.get_taxa_prevalence(otu_table)
     # print res
     # self.assertFloatEqual(res, np.array([3,4,5])/12) # if no normalize
     assert_almost_equal(res, [0,.5,1])
示例#4
0
def preprocess_otu_table(otu_sample_ids, otu_table, lineages,
                        coords_data, coords_headers, N=0):
    """Preprocess the OTU table to to generate the required data for the biplots

    Input:
    otu_sample_ids: sample identifiers for the otu_table
    otu_table: contingency table
    lineages: taxonomic assignments for the OTUs in the otu_table
    coords_data: principal coordinates data where the taxa will be mapped
    N: number of most prevalent taxa to keep, by default will use all

    Output:
    otu_coords: coordinates representing the N most prevalent taxa in otu_table
    otu_table: N most prevalent OTUs from the input otu_table
    otu_lineages: taxonomic assignments corresponding to the N most prevalent
    OTUs
    otu_prevalence: vector with the prevalence scores of the N highest values
    lines: coords where the N most prevalent taxa will be positioned in the
    biplot
    """

    # return empty values if any of the taxa data is empty
    if (otu_sample_ids == []) or (otu_table == array([])) or (lineages == []):
        return [], [], [], [], ''

    # this means there's only one or fewer rows in the contingency table
    if len(otu_table) <= 1 or len(lineages) <= 1:
        raise EmperorUnsupportedComputation, "Biplots are not supported for "+\
            "contingency tables with one or fewer rows"

    # if this element is a list take the first headers and coordinates
    # both of these will be the master coordinates, i. e. where data is centered
    if type(coords_data) == list and type(coords_headers) == list:
        coords_data = coords_data[0]
        coords_headers = coords_headers[0]

    # re-arrange the otu table so it matches the order of the samples in the
    # coordinates data & remove any sample that is not in the coordinates header
    otu_sample_ids, otu_table = sort_taxa_table_by_pcoa_coords(coords_headers,
        otu_table, otu_sample_ids)

    # retrieve the prevalence and the coords prior the filtering
    prevalence = get_taxa_prevalence(otu_table)
    bi_plot_coords = get_taxa_coords(otu_table, coords_data)

    o_otu_coords, o_otu_table, o_otu_lineages, o_prevalence =\
        extract_taxa_data(bi_plot_coords, otu_table, lineages, prevalence, N)

    lines = '\n'.join(make_biplot_scores_output({'coord': o_otu_coords,
        'lineages': o_otu_lineages}))

    return o_otu_coords, o_otu_table, o_otu_lineages, o_prevalence, lines