def test_get_taxa_prevalence(self): otu_table = np.array([[2, 0, 0, 1], [1, 1, 1, 1], [0, 0, 0, 0]], float) sample_weights = [3, 1, 1, 2] res = bp.get_taxa_prevalence(otu_table) # print res # self.assertFloatEqual(res, np.array([(2/3) + 1/2, 1/3+1+1+1/2, 0])/4) assert_almost_equal(res, np.array([(2/3) + 1/2, 1/3+1+1+1/2, 0])/4\ * 4/(2.5+1/3)) otu_table = np.array([[2, 0, 0, 1], [1, 1, 1, 1], [0, 2, 2, 1]], float) res = bp.get_taxa_prevalence(otu_table) # print res # self.assertFloatEqual(res, np.array([3,4,5])/12) # if no normalize assert_almost_equal(res, [0, .5, 1])
def preprocess_otu_table(otu_sample_ids, otu_table, lineages, coords_data, coords_headers, N=0): """Preprocess the OTU table to to generate the required data for the biplots Input: otu_sample_ids: sample identifiers for the otu_table otu_table: contingency table lineages: taxonomic assignments for the OTUs in the otu_table coords_data: principal coordinates data where the taxa will be mapped N: number of most prevalent taxa to keep, by default will use all Output: otu_coords: coordinates representing the N most prevalent taxa in otu_table otu_table: N most prevalent OTUs from the input otu_table otu_lineages: taxonomic assignments corresponding to the N most prevalent OTUs otu_prevalence: vector with the prevalence scores of the N highest values lines: coords where the N most prevalent taxa will be positioned in the biplot """ # return empty values if any of the taxa data is empty if (otu_sample_ids == []) or (otu_table == array([])) or (lineages == []): return [], [], [], [], '' # this means there's only one or fewer rows in the contingency table if len(otu_table) <= 1 or len(lineages) <= 1: raise EmperorUnsupportedComputation, "Biplots are not supported for "+\ "contingency tables with one or fewer rows" # if this element is a list take the first headers and coordinates # both of these will be the master coordinates, i. e. where data is centered if type(coords_data) == list and type(coords_headers) == list: coords_data = coords_data[0] coords_headers = coords_headers[0] # re-arrange the otu table so it matches the order of the samples in the # coordinates data & remove any sample that is not in the coordinates header otu_sample_ids, otu_table = sort_taxa_table_by_pcoa_coords( coords_headers, otu_table, otu_sample_ids) # retrieve the prevalence and the coords prior the filtering prevalence = get_taxa_prevalence(otu_table) bi_plot_coords = get_taxa_coords(otu_table, coords_data) o_otu_coords, o_otu_table, o_otu_lineages, o_prevalence =\ extract_taxa_data(bi_plot_coords, otu_table, lineages, prevalence, N) lines = '\n'.join( make_biplot_scores_output({ 'coord': o_otu_coords, 'lineages': o_otu_lineages })) return o_otu_coords, o_otu_table, o_otu_lineages, o_prevalence, lines
def test_get_taxa_prevalence(self): otu_table = np.array([ [2,0,0,1], [1,1,1,1], [0,0,0,0]],float) sample_weights = [3,1,1,2] res = bp.get_taxa_prevalence(otu_table) # print res # self.assertFloatEqual(res, np.array([(2/3) + 1/2, 1/3+1+1+1/2, 0])/4) assert_almost_equal(res, np.array([(2/3) + 1/2, 1/3+1+1+1/2, 0])/4\ * 4/(2.5+1/3)) otu_table = np.array([ [2,0,0,1], [1,1,1,1], [0,2,2,1]],float) res = bp.get_taxa_prevalence(otu_table) # print res # self.assertFloatEqual(res, np.array([3,4,5])/12) # if no normalize assert_almost_equal(res, [0,.5,1])
def preprocess_otu_table(otu_sample_ids, otu_table, lineages, coords_data, coords_headers, N=0): """Preprocess the OTU table to to generate the required data for the biplots Input: otu_sample_ids: sample identifiers for the otu_table otu_table: contingency table lineages: taxonomic assignments for the OTUs in the otu_table coords_data: principal coordinates data where the taxa will be mapped N: number of most prevalent taxa to keep, by default will use all Output: otu_coords: coordinates representing the N most prevalent taxa in otu_table otu_table: N most prevalent OTUs from the input otu_table otu_lineages: taxonomic assignments corresponding to the N most prevalent OTUs otu_prevalence: vector with the prevalence scores of the N highest values lines: coords where the N most prevalent taxa will be positioned in the biplot """ # return empty values if any of the taxa data is empty if (otu_sample_ids == []) or (otu_table == array([])) or (lineages == []): return [], [], [], [], '' # this means there's only one or fewer rows in the contingency table if len(otu_table) <= 1 or len(lineages) <= 1: raise EmperorUnsupportedComputation, "Biplots are not supported for "+\ "contingency tables with one or fewer rows" # if this element is a list take the first headers and coordinates # both of these will be the master coordinates, i. e. where data is centered if type(coords_data) == list and type(coords_headers) == list: coords_data = coords_data[0] coords_headers = coords_headers[0] # re-arrange the otu table so it matches the order of the samples in the # coordinates data & remove any sample that is not in the coordinates header otu_sample_ids, otu_table = sort_taxa_table_by_pcoa_coords(coords_headers, otu_table, otu_sample_ids) # retrieve the prevalence and the coords prior the filtering prevalence = get_taxa_prevalence(otu_table) bi_plot_coords = get_taxa_coords(otu_table, coords_data) o_otu_coords, o_otu_table, o_otu_lineages, o_prevalence =\ extract_taxa_data(bi_plot_coords, otu_table, lineages, prevalence, N) lines = '\n'.join(make_biplot_scores_output({'coord': o_otu_coords, 'lineages': o_otu_lineages})) return o_otu_coords, o_otu_table, o_otu_lineages, o_prevalence, lines