Пример #1
0
    def test_sort_taxa_table_by_pcoa_coords(self):
        """Make sure OTU table and coordinates are sorted equally"""

        # case with shuffled inputs
        o_headers, o_otu_table = sort_taxa_table_by_pcoa_coords(
            self.coords_header, self.otu_table, self.otu_headers)

        self.assertEquals(o_headers, [
            'PC.354', 'PC.356', 'PC.481', 'PC.593', 'PC.355', 'PC.607',
            'PC.634', 'PC.636', 'PC.635'
        ])
        assert_almost_equal(o_otu_table, OTU_TABLE_A)

        # case with shuffled inputs and fewer samples
        o_headers, o_otu_table = sort_taxa_table_by_pcoa_coords(
            ['PC.354', 'PC.356', 'PC.635'], self.otu_table, self.otu_headers)
        self.assertEquals(o_headers, ['PC.354', 'PC.356', 'PC.635'])
        assert_almost_equal(
            o_otu_table,
            array([[0.01, 0.02, 0.04697987], [0., 0.02, 0.02013423],
                   [0.38926174, 0.65333333, 0.27516779],
                   [0., 0.03333333, 0.02013423],
                   [0.41610738, 0.22, 0.45637584],
                   [0.03355705, 0.01333333, 0.02013423], [0., 0.01333333, 0.],
                   [0.14765101, 0.02666667, 0.16107383]]))
Пример #2
0
    def test_sort_taxa_table_by_pcoa_coords(self):
        """Make sure OTU table and coordinates are sorted equally"""

        # case with shuffled inputs
        o_headers, o_otu_table = sort_taxa_table_by_pcoa_coords(
            self.coords_header, self.otu_table, self.otu_headers)

        self.assertEquals(o_headers, ['PC.354', 'PC.356', 'PC.481', 'PC.593',
                                      'PC.355', 'PC.607', 'PC.634', 'PC.636',
                                      'PC.635'])
        assert_almost_equal(o_otu_table, OTU_TABLE_A)

        # case with shuffled inputs and fewer samples
        o_headers, o_otu_table = sort_taxa_table_by_pcoa_coords(
            ['PC.354', 'PC.356', 'PC.635'], self.otu_table, self.otu_headers)
        self.assertEquals(o_headers, ['PC.354', 'PC.356', 'PC.635'])
        assert_almost_equal(o_otu_table, array(
            [[0.01, 0.02, 0.04697987],
             [0., 0.02, 0.02013423],
             [0.38926174, 0.65333333, 0.27516779],
             [0., 0.03333333, 0.02013423],
             [0.41610738, 0.22, 0.45637584],
             [0.03355705, 0.01333333, 0.02013423],
             [0., 0.01333333, 0.],
             [0.14765101, 0.02666667, 0.16107383]]))
Пример #3
0
def preprocess_otu_table(otu_sample_ids,
                         otu_table,
                         lineages,
                         coords_data,
                         coords_headers,
                         N=0):
    """Preprocess the OTU table to to generate the required data for the biplots

    Input:
    otu_sample_ids: sample identifiers for the otu_table
    otu_table: contingency table
    lineages: taxonomic assignments for the OTUs in the otu_table
    coords_data: principal coordinates data where the taxa will be mapped
    N: number of most prevalent taxa to keep, by default will use all

    Output:
    otu_coords: coordinates representing the N most prevalent taxa in otu_table
    otu_table: N most prevalent OTUs from the input otu_table
    otu_lineages: taxonomic assignments corresponding to the N most prevalent
    OTUs
    otu_prevalence: vector with the prevalence scores of the N highest values
    lines: coords where the N most prevalent taxa will be positioned in the
    biplot
    """

    # return empty values if any of the taxa data is empty
    if (otu_sample_ids == []) or (otu_table == array([])) or (lineages == []):
        return [], [], [], [], ''

    # this means there's only one or fewer rows in the contingency table
    if len(otu_table) <= 1 or len(lineages) <= 1:
        raise EmperorUnsupportedComputation, "Biplots are not supported for "+\
            "contingency tables with one or fewer rows"

    # if this element is a list take the first headers and coordinates
    # both of these will be the master coordinates, i. e. where data is centered
    if type(coords_data) == list and type(coords_headers) == list:
        coords_data = coords_data[0]
        coords_headers = coords_headers[0]

    # re-arrange the otu table so it matches the order of the samples in the
    # coordinates data & remove any sample that is not in the coordinates header
    otu_sample_ids, otu_table = sort_taxa_table_by_pcoa_coords(
        coords_headers, otu_table, otu_sample_ids)

    # retrieve the prevalence and the coords prior the filtering
    prevalence = get_taxa_prevalence(otu_table)
    bi_plot_coords = get_taxa_coords(otu_table, coords_data)

    o_otu_coords, o_otu_table, o_otu_lineages, o_prevalence =\
        extract_taxa_data(bi_plot_coords, otu_table, lineages, prevalence, N)

    lines = '\n'.join(
        make_biplot_scores_output({
            'coord': o_otu_coords,
            'lineages': o_otu_lineages
        }))

    return o_otu_coords, o_otu_table, o_otu_lineages, o_prevalence, lines
Пример #4
0
def preprocess_otu_table(otu_sample_ids, otu_table, lineages,
                        coords_data, coords_headers, N=0):
    """Preprocess the OTU table to to generate the required data for the biplots

    Input:
    otu_sample_ids: sample identifiers for the otu_table
    otu_table: contingency table
    lineages: taxonomic assignments for the OTUs in the otu_table
    coords_data: principal coordinates data where the taxa will be mapped
    N: number of most prevalent taxa to keep, by default will use all

    Output:
    otu_coords: coordinates representing the N most prevalent taxa in otu_table
    otu_table: N most prevalent OTUs from the input otu_table
    otu_lineages: taxonomic assignments corresponding to the N most prevalent
    OTUs
    otu_prevalence: vector with the prevalence scores of the N highest values
    lines: coords where the N most prevalent taxa will be positioned in the
    biplot
    """

    # return empty values if any of the taxa data is empty
    if (otu_sample_ids == []) or (otu_table == array([])) or (lineages == []):
        return [], [], [], [], ''

    # this means there's only one or fewer rows in the contingency table
    if len(otu_table) <= 1 or len(lineages) <= 1:
        raise EmperorUnsupportedComputation, "Biplots are not supported for "+\
            "contingency tables with one or fewer rows"

    # if this element is a list take the first headers and coordinates
    # both of these will be the master coordinates, i. e. where data is centered
    if type(coords_data) == list and type(coords_headers) == list:
        coords_data = coords_data[0]
        coords_headers = coords_headers[0]

    # re-arrange the otu table so it matches the order of the samples in the
    # coordinates data & remove any sample that is not in the coordinates header
    otu_sample_ids, otu_table = sort_taxa_table_by_pcoa_coords(coords_headers,
        otu_table, otu_sample_ids)

    # retrieve the prevalence and the coords prior the filtering
    prevalence = get_taxa_prevalence(otu_table)
    bi_plot_coords = get_taxa_coords(otu_table, coords_data)

    o_otu_coords, o_otu_table, o_otu_lineages, o_prevalence =\
        extract_taxa_data(bi_plot_coords, otu_table, lineages, prevalence, N)

    lines = '\n'.join(make_biplot_scores_output({'coord': o_otu_coords,
        'lineages': o_otu_lineages}))

    return o_otu_coords, o_otu_table, o_otu_lineages, o_prevalence, lines