Exemplo n.º 1
0
    def outliers(self, options):
        """Create information for identifying taxnomic outliers"""

        check_file_exists(options.input_tree)
        check_file_exists(options.taxonomy_file)

        if options.plot_taxa_file:
            check_file_exists(options.plot_taxa_file)

        if options.trusted_taxa_file:
            check_file_exists(options.trusted_taxa_file)

        if not os.path.exists(options.output_dir):
            os.makedirs(options.output_dir)

        if options.highlight_polyphyly and not options.fmeasure_table:
            self.logger.error(
                "The '--highlight_polyphyly' flag must be used with the '--fmeasure_table' flag."
            )
            return

        o = Outliers(options.skip_mpld3, options.dpi, options.output_dir)
        o.run(options.input_tree, options.taxonomy_file, options.viral,
              options.plot_taxa_file, options.plot_dist_taxa_only,
              options.plot_domain, options.highlight_polyphyly,
              options.highlight_taxa_file, options.trusted_taxa_file,
              options.fixed_root, options.min_children, options.min_support,
              options.mblet, options.fmeasure_table, options.min_fmeasure,
              options.fmeasure_mono, options.verbose_table)

        self.logger.info('Done.')
Exemplo n.º 2
0
    def outliers(self, options):
        """Create information for identifying taxnomic outliers"""

        check_file_exists(options.input_tree)

        if options.plot_taxa_file:
            check_file_exists(options.plot_taxa_file)

        if options.trusted_taxa_file:
            check_file_exists(options.trusted_taxa_file)

        if not os.path.exists(options.output_dir):
            os.makedirs(options.output_dir)

        o = Outliers(options.dpi)
        o.run(options.input_tree,
                options.taxonomy_file,
                options.output_dir,
                options.plot_taxa_file,
                options.plot_dist_taxa_only,
                options.plot_domain,
                options.trusted_taxa_file,
                options.fixed_root,
                options.min_children,
                options.min_support,
                options.verbose_table)

        self.logger.info('Done.')
Exemplo n.º 3
0
    def _median_rank_rd(self, 
                            tree, 
                            placed_taxon, 
                            taxonomy,
                            trusted_taxa_file, 
                            min_children, 
                            min_support):
        """Calculate median relative divergence to each node and thresholds for each taxonomic rank.
        
        Parameters
        ----------
        tree : Tree
          Dendropy Tree.
        placed_taxon : set
          Taxon currently placed in tree which can be used for relative divergence inference.
        taxonomy: d[taxon_id] -> taxonomy info
          Taxonomic information for extant taxa.
        trusted_taxa_file : str
          File specifying trusted taxa to consider when inferring distribution. Set to None to consider all taxa.
        min_children : int
          Only consider taxa with at least the specified number of children taxa when inferring distribution.
        min_support : float
          Only consider taxa with at least this level of support when inferring distribution.
        
        Returns
        -------
        d[rank_index] -> float
          Median relative divergence for each taxonomic rank.
        """
                      
        # read trusted taxa
        trusted_taxa = None
        if trusted_taxa_file:
            trusted_taxa = read_taxa_file(trusted_taxa_file)
            
        # determine taxa to be used for inferring distribution
        taxa_for_dist_inference = filter_taxa_for_dist_inference(tree, 
                                                                    taxonomy, 
                                                                    trusted_taxa, 
                                                                    min_children, 
                                                                    min_support)
        taxa_for_dist_inference.intersection_update(placed_taxon)
 
        # infer distribution                                        
        outliers = Outliers()
        phylum_rel_dists, rel_node_dists = outliers.median_rd_over_phyla(tree, 
                                                                            taxa_for_dist_inference, 
                                                                            taxonomy)    
        median_for_rank = outliers.rank_median_rd(phylum_rel_dists, 
                                                    taxa_for_dist_inference)
                                                    
        # set edge lengths to median value over all rootings
        tree.seed_node.rel_dist = 0.0
        for n in tree.preorder_node_iter(lambda n: n != tree.seed_node):
            n.rel_dist = np_median(rel_node_dists[n.id])
            
        return median_for_rank
Exemplo n.º 4
0
    def _median_rank_rd(self, 
                            tree, 
                            placed_taxon, 
                            taxonomy,
                            trusted_taxa_file, 
                            min_children, 
                            min_support):
        """Calculate median relative divergence to each node and thresholds for each taxonomic rank.
        
        Parameters
        ----------
        tree : Tree
          Dendropy Tree.
        placed_taxon : set
          Taxon currently placed in tree which can be used for relative divergence inference.
        taxonomy: d[taxon_id] -> taxonomy info
          Taxonomic information for extant taxa.
        trusted_taxa_file : str
          File specifying trusted taxa to consider when inferring distribution. Set to None to consider all taxa.
        min_children : int
          Only consider taxa with at least the specified number of children taxa when inferring distribution.
        min_support : float
          Only consider taxa with at least this level of support when inferring distribution.
        
        Returns
        -------
        d[rank_index] -> float
          Median relative divergence for each taxonomic rank.
        """
                      
        # read trusted taxa
        trusted_taxa = None
        if trusted_taxa_file:
            trusted_taxa = read_taxa_file(trusted_taxa_file)
            
        # determine taxa to be used for inferring distribution
        taxa_for_dist_inference = filter_taxa_for_dist_inference(tree, 
                                                                    taxonomy, 
                                                                    trusted_taxa, 
                                                                    min_children, 
                                                                    min_support)
        taxa_for_dist_inference.intersection_update(placed_taxon)
 
        # infer distribution                                        
        outliers = Outliers()
        phylum_rel_dists, rel_node_dists = outliers.median_rd_over_phyla(tree, 
                                                                            taxa_for_dist_inference, 
                                                                            taxonomy)    
        median_for_rank = outliers.rank_median_rd(phylum_rel_dists, 
                                                    taxa_for_dist_inference)
                                                    
        # set edge lengths to median value over all rootings
        tree.seed_node.rel_dist = 0.0
        for n in tree.preorder_node_iter(lambda n: n != tree.seed_node):
            n.rel_dist = np_median(rel_node_dists[n.id])
            
        return median_for_rank
Exemplo n.º 5
0
    def outliers(self, options):
        """Create information for identifying taxnomic outliers"""

        check_file_exists(options.input_tree)

        if options.plot_taxa_file:
            check_file_exists(options.plot_taxa_file)

        if options.trusted_taxa_file:
            check_file_exists(options.trusted_taxa_file)

        if not os.path.exists(options.output_dir):
            os.makedirs(options.output_dir)

        o = Outliers(options.dpi)
        o.run(options.input_tree, options.taxonomy_file, options.output_dir,
              options.plot_taxa_file, options.plot_dist_taxa_only,
              options.plot_domain, options.trusted_taxa_file,
              options.fixed_root, options.min_children, options.min_support,
              options.verbose_table)

        self.logger.info('Done.')