Пример #1
0
    def validate(self, options):
        """Validate command"""

        check_file_exists(options.taxonomy_file)

        taxonomy = Taxonomy()
        t = taxonomy.read(options.taxonomy_file)

        errors = taxonomy.validate(t,
                                   check_prefixes=not options.no_prefix,
                                   check_ranks=not options.no_all_ranks,
                                   check_hierarchy=not options.no_hierarhcy,
                                   check_species=not options.no_species,
                                   check_group_names=True,
                                   check_duplicate_names=True,
                                   report_errors=True)

        invalid_ranks, invalid_prefixes, invalid_species_name, invalid_hierarchies, invalid_group_name = errors

        if sum([len(e) for e in errors]) == 0:
            self.logger.info('No errors identified in taxonomy file.')
        else:
            self.logger.info('Identified %d incomplete taxonomy strings.' %
                             len(invalid_ranks))
            self.logger.info('Identified %d rank prefix errors.' %
                             len(invalid_prefixes))
            self.logger.info('Identified %d invalid species names.' %
                             len(invalid_species_name))
            self.logger.info('Identified %d taxa with multiple parents.' %
                             len(invalid_hierarchies))
            self.logger.info('Identified %d invalid group names.' %
                             len(invalid_group_name))
Пример #2
0
    def validate(self, options):
        """Check taxonomy file is formatted as expected."""

        check_file_exists(options.input_taxonomy)

        taxonomy = Taxonomy()
        t = taxonomy.read(options.input_taxonomy)

        taxonomy.validate(t,
                          check_prefixes=True,
                          check_ranks=True,
                          check_hierarchy=True,
                          check_species=True,
                          check_group_names=True,
                          check_duplicate_names=True,
                          report_errors=True)

        self.logger.info('Finished performing validation tests.')
Пример #3
0
    def validate(self, options):
        """Validate command"""

        check_file_exists(options.taxonomy_file)

        taxonomy = Taxonomy()
        t = taxonomy.read(options.taxonomy_file)

        errors = taxonomy.validate(t,
                                     not options.no_prefix,
                                     not options.no_all_ranks,
                                     not options.no_hierarhcy,
                                     not options.no_species,
                                     True)

        invalid_ranks, invalid_prefixes, invalid_species_name, invalid_hierarchies = errors

        if sum([len(e) for e in errors]) == 0:
            self.logger.info('No errors identified in taxonomy file.')
        else:
            self.logger.info('Identified %d incomplete taxonomy strings.' % len(invalid_ranks))
            self.logger.info('Identified %d rank prefix errors.' % len(invalid_prefixes))
            self.logger.info('Identified %d invalid species names.' % len(invalid_species_name))
            self.logger.info('Identified %d taxa with multiple parents.' % len(invalid_hierarchies))
Пример #4
0
    def check_tree(self, options):
        """Validate taxonomy of decorated tree and check for polyphyletic groups."""

        check_file_exists(options.decorated_tree)

        # validate taxonomy
        taxonomy = Taxonomy()
        if options.taxonomy_file:
            t = taxonomy.read(options.taxonomy_file)
        else:
            t = taxonomy.read_from_tree(options.decorated_tree)

        taxonomy.validate(t,
                          check_prefixes=True,
                          check_ranks=True,
                          check_hierarchy=True,
                          check_species=True,
                          check_group_names=True,
                          check_duplicate_names=True,
                          report_errors=True)

        # check for polyphyletic groups
        polyphyletic_groups = set()
        tree = dendropy.Tree.get_from_path(options.decorated_tree,
                                           schema='newick',
                                           rooting="force-rooted",
                                           preserve_underscores=True)

        if options.taxonomy_file:
            # reduce taxonomy to taxa in tree and map taxon labels to Taxon objects
            reduced_taxonomy = {}
            taxon_map = {}
            for leaf in tree.leaf_node_iter():
                reduced_taxonomy[leaf.taxon.label] = t[leaf.taxon.label]
                taxon_map[leaf.taxon.label] = leaf.taxon

            # find taxa with an MRCA spanning additional taxa
            for rank_label in Taxonomy.rank_labels[1:]:
                extant_taxa = taxonomy.extant_taxa_for_rank(
                    rank_label, reduced_taxonomy)
                for taxon, taxa_ids in extant_taxa.items():
                    mrca = tree.mrca(taxa=[taxon_map[t] for t in taxa_ids])
                    mrca_leaf_count = sum([1 for leaf in mrca.leaf_iter()])
                    if mrca_leaf_count != len(taxa_ids):
                        polyphyletic_groups.add(taxon)
        else:
            # find duplicate taxon labels in tree
            taxa = set()

            for node in tree.preorder_node_iter(lambda n: not n.is_leaf()):
                _support, taxon_label, _aux_info = parse_label(node.label)
                if taxon_label:
                    for taxon in [t.strip() for t in taxon_label.split(';')]:
                        if taxon in taxa:
                            polyphyletic_groups.add(taxon)

                        taxa.add(taxon)

        if len(polyphyletic_groups):
            print('')
            print('Tree contains polyphyletic groups:')
            for taxon in polyphyletic_groups:
                print('%s' % (taxon))

        self.logger.info('Finished performing validation tests.')