def test_taxtree_builder(self): cfg = EpacConfig() testfile_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "testfiles") tax_fname = os.path.join(testfile_dir, "test.tax") tax = Taxonomy(EpacConfig.REF_SEQ_PREFIX, tax_fname) tree_fname = os.path.join(testfile_dir, "taxtree.nw") expected_tree = Tree(tree_fname, format=8) tb = TaxTreeBuilder(cfg, tax) tax_tree, seq_ids = tb.build() self.assertEqual(seq_ids, tax.get_map().keys()) self.assertEqual(tax_tree.write(format=8), expected_tree.write(format=8))
def build_multif_tree(self): c = self.cfg tb = TaxTreeBuilder(c, self.taxonomy) (t, ids) = tb.build(c.reftree_min_rank, c.reftree_max_seqs_per_leaf, c.reftree_clades_to_include, c.reftree_clades_to_ignore) self.reftree_ids = frozenset(ids) self.reftree_size = len(ids) self.reftree_multif = t # IMPORTANT: select GAMMA or CAT model based on tree size! self.cfg.resolve_auto_settings(self.reftree_size) if self.cfg.debug: refseq_fname = self.cfg.tmp_fname("%NAME%_seq_ids.txt") # list of sequence ids which comprise the reference tree with open(refseq_fname, "w") as f: for sid in ids: f.write("%s\n" % sid) # original tree with taxonomic ranks as internal node labels reftax_fname = self.cfg.tmp_fname("%NAME%_mfu_tax.tre") t.write(outfile=reftax_fname, format=8)