def test_initialize_other_taxon_namespace(self): tns1 = TaxonNamespace(self.taxa) tns2 = TaxonNamespace(tns1) self.assertIsNot(tns1, tns2) self.validate_taxon_concepts(tns1, self.taxa_labels) self.validate_taxon_concepts(tns2, self.taxa_labels) for t in self.taxa: self.assertIn(t, tns1._taxa) self.assertIn(t, tns2._taxa) for t1, t2 in zip(tns1, tns2): self.assertIs(t1, t2)
def test_construct_from_another(self): tns2 = TaxonNamespace(self.tns1) for tns2 in (TaxonNamespace(self.tns1), self.tns1.clone(0), copy.copy(self.tns1)): self.assertIsNot(tns2, self.tns1) self.assertEqual(tns2.label, self.tns1.label) self.assertEqual(tns2._taxa, self.tns1._taxa) for t1, t2 in zip(self.tns1, tns2): self.assertIs(t1, t2) self.compare_distinct_annotables(tns2, self.tns1)
def test_basic_adding(self): tns = TaxonNamespace() self.assertEqual(len(tns), 0) for idx, label in enumerate(self.str_labels): tns.add_taxon(Taxon(label=label)) self.assertEqual(len(tns), idx+1) self.validate_taxon_concepts(tns, self.str_labels)
def test_require_taxon_by_label_noadd(self): tns = TaxonNamespace(self.str_labels) for label in self.str_labels: t = tns.get_taxon(label) self.assertEqual(t.label, label) self.assertEqual(len(tns), len(self.str_labels)) self.validate_taxon_concepts(tns, self.str_labels)
def read_forest(filenames, true_tree_file): """ Reads files with Netwick formated trees into dendropy tree objects :param filenames: array of paths to files to compare :param true_tree_file: path to file of "true" tree :return: dict of dendropy trees with basename as key and dendropy tree for "true" tree """ def _read_tree_from_path(path, taxon_namespace): """ Wrapper for netwick-file to dendropy tree """ tree = Tree() my_tree = tree.get_from_path(path, "newick", taxon_namespace=taxon_namespace) return my_tree taxon_ns = TaxonNamespace() # needed true_tree = _read_tree_from_path(true_tree_file, taxon_ns) trees = { basename(tree_path).replace(".msl", ""): _read_tree_from_path(tree_path, taxon_ns) for tree_path in filenames } return trees, true_tree
def simulate_pangenome(ngenes, nisolates, effective_pop_size, gain_rate, loss_rate, mutation_rate, max_core): # simulate a phylogeny using the coalscent sim_tree = treesim.pure_kingman_tree(taxon_namespace=TaxonNamespace( [str(i) for i in range(1, 1 + nisolates)]), pop_size=effective_pop_size) basic_tree = copy.deepcopy(sim_tree) # simulate gene p/a and mutation sim_tree = simulate_img_with_mutation(sim_tree, gain_rate=gain_rate, loss_rate=loss_rate, mutation_rate=mutation_rate, ngenes=ngenes, max_ncore=max_core) # get genes and mutations for each isolate gene_mutations = [] for leaf in sim_tree.leaf_node_iter(): gene_mutations.append([[g, leaf.gene_mutations[g]] for g in leaf.acc_genes]) return (gene_mutations, basic_tree)
def test_reverse_iter(self): tns = TaxonNamespace(self.str_labels) r = self.str_labels[:] r.reverse() assert r != self.str_labels for idx, t1 in enumerate(reversed(tns)): self.assertEqual(t1.label, r[idx])
def test_get_nonexistant_taxon_by_label(self): tns = TaxonNamespace(self.str_labels) check = ["u", "x", "y",] for label in check: assert label not in self.str_labels t = tns.get_taxon(check) self.assertIs(t, None)
def test_no_has_labels(self): tns = TaxonNamespace(self.str_labels) check = ["u", "x", "y",] for label in check: assert label not in self.str_labels self.assertFalse(tns.has_taxa_labels(check)) self.assertFalse(tns.has_taxa_labels(check + self.str_labels))
def test_new_taxon_to_immutable(self): tns = TaxonNamespace() tns.is_mutable = False for idx, label in enumerate(self.str_labels): with self.assertRaises(TypeError): t = tns.new_taxon(label) self.assertEqual(len(tns), 0)
def test_add_taxa_duplicate(self): tns = TaxonNamespace(self.taxa) self.validate_taxon_concepts(tns, self.taxa_labels) tns.add_taxa(self.taxa) self.assertEqual(len(tns), len(self.taxa)) for t1, t2 in zip(tns, self.taxa): self.assertIs(t1, t2)
def test_add_taxon(self): tns = TaxonNamespace() for t in self.taxa: tns.add_taxon(t) self.validate_taxon_concepts(tns, self.taxa_labels) for t in self.taxa: self.assertIn(t, tns._taxa)
def run_tree_regression(arg, taxa): taxon_namespace = TaxonNamespace([taxon['id'] for taxon in taxa['taxa']]) tree_format = 'newick' with open(arg.tree) as fp: if next(fp).upper().startswith('#NEXUS'): tree_format = 'nexus' if tree_format == 'nexus': tree = Tree.get( path=arg.tree, schema='nexus', tree_offset=0, preserve_underscores=True, taxon_namespace=taxon_namespace, ) else: tree = Tree.get( path=arg.tree, schema='newick', tree_offset=0, preserve_underscores=True, taxon_namespace=taxon_namespace, ) tree.resolve_polytomies(update_bipartitions=True) setup_indexes(tree, False) taxa2 = [{'date': taxon['attributes']['date']} for taxon in taxa['taxa']] initialize_dates_from_taxa(tree, taxa2) return regression(tree)
def test_has_labels_case_sensitivity(self): tns = TaxonNamespace(self.str_labels) labels_upper = [label.upper() for label in self.str_labels if label.upper() != label] assert labels_upper tns.is_case_sensitive = True self.assertFalse(tns.has_taxa_labels(labels_upper)) tns.is_case_sensitive = False self.assertTrue(tns.has_taxa_labels(labels_upper))
def test_new_taxon(self): tns = TaxonNamespace() for idx, label in enumerate(self.str_labels): t = tns.new_taxon(label) self.assertTrue(isinstance(t, Taxon)) self.assertEqual(t.label, label) self.assertEqual(len(tns), idx+1) self.validate_taxon_concepts(tns, self.str_labels)
def test_basic_adding_to_immutable(self): tns = TaxonNamespace() self.assertEqual(len(tns), 0) tns.is_mutable = False for idx, label in enumerate(self.str_labels): with self.assertRaises(TypeError): tns.add_taxon(Taxon(label=label)) self.assertEqual(len(tns), 0)
def test_get_taxa_by_label(self): tns = TaxonNamespace(self.str_labels) # label_set = set(self.str_labels) # taxa = tns.get_taxa(label_set) taxa = tns.get_taxa(self.str_labels + ["u", "x", "y"]) self.assertEqual(len(taxa), len(self.str_labels)) tx = [t.label for t in taxa] self.assertEqual(tx, self.str_labels)
def test_require_taxon_by_label_add_to_immutable(self): tns = TaxonNamespace(self.str_labels) tns.is_mutable = False check = ["u", "x", "y",] for label in check: assert label not in self.str_labels with self.assertRaises(TypeError): t = tns.require_taxon(label)
def test_clear(self): tns = TaxonNamespace(self.str_labels) self.assertEqual(len(tns), len(self.str_labels)) tns.clear() self.assertEqual(len(tns), 0) x = [] for t in tns: x.append(t) self.assertEqual(len(x), 0)
def test_delete_by_index(self): for idx in range(len(self.taxa)): tns = TaxonNamespace(self.taxa) del tns[idx] for idx2, taxon in enumerate(self.taxa): if idx2 == idx: self.assertNotIn(taxon, tns) else: self.assertIn(taxon, tns)
def test_reversed(self): r = self.str_labels[:] r.sort() tns = TaxonNamespace(r) r2 = r[:] r2.reverse() assert r != r2 for idx, t1 in enumerate(reversed(tns)): self.assertEqual(t1.label, r2[idx])
def test_discard_taxon_label_error(self): tns = TaxonNamespace(self.str_labels) key = "zzz" assert key not in self.str_labels try: tns.discard_taxon_label(key) except LookupError: self.fail() else: self.validate_taxon_concepts(tns, self.str_labels)
def test_sort(self): r = self.str_labels[:] r.sort() r.reverse() tns = TaxonNamespace(r) tns.sort() r2 = sorted(r) assert r != r2 for idx, t1 in enumerate(tns): self.assertEqual(t1.label, r2[idx])
def test_findall_multiple(self): tns = TaxonNamespace(self.str_labels) multilabels= ["_", "z"] for label in multilabels: tns.is_case_sensitive=True taxa = tns.findall(label=label) self.assertTrue(isinstance(taxa, collections.Iterable)) self.assertEqual(len(taxa), len([s for s in self.str_labels if s == label])) for t in taxa: self.assertEqual(t.label, label)
def test_case_insensitive_require_taxon_by_label2(self): tns = TaxonNamespace(self.str_labels) labels_upper = [label.upper() for label in self.str_labels if label.upper() != label] labels_upper = list(set(labels_upper)) assert labels_upper for label in labels_upper: tns.is_case_sensitive = True t = tns.require_taxon(label) self.assertEqual(t.label, label) self.validate_taxon_concepts(tns, self.str_labels + labels_upper)
def test_create_clade(data): spp = data.keys() ages = list(data.values()) assume(len(set(ages)) == len(ages)) tn = TaxonNamespace(spp, label="taxa") clade = create_clade(tn, spp, ages) xx = [x.label == "locked" for x in edge_iter(clade.seed_node)] cnt = sum(xx) tot = len(list(edge_iter(clade.seed_node))) assert (tot == cnt + 1)
def test_construct_from_another_different_label(self): tns2 = TaxonNamespace(self.tns1, label="T2") self.assertIsNot(tns2, self.tns1) self.assertNotEqual(tns2.label, self.tns1.label) self.assertEqual(self.tns1.label, "T1") self.assertEqual(tns2.label, "T2") self.assertEqual(tns2._taxa, self.tns1._taxa) for t1, t2 in zip(self.tns1, tns2): self.assertIs(t1, t2) self.compare_distinct_annotables(tns2, self.tns1)
def test_case_insensitive_require_taxon_by_label1(self): tns = TaxonNamespace(self.str_labels) labels_upper = [label.upper() for label in self.str_labels if label.upper() != label] assert labels_upper for label in labels_upper: tns.is_case_sensitive = False t = tns.require_taxon(label) self.assertEqual(t.label.lower(), label.lower()) self.assertEqual(len(tns), len(self.str_labels)) self.validate_taxon_concepts(tns, self.str_labels)
def write_and_read_nexus(filename, header, tree_id, tree_str): tns = TaxonNamespace(is_case_sensitive=True) # write a temp file containing tree with open(filename, "w") as f: for line in header + ["tree " + tree_id + " " + tree_str]: f.write(line + "\n"); # read tree as dendropy tree tree = Tree.get(path=filename, schema="nexus", taxon_namespace=tns, case_sensitive_taxon_labels=True, suppress_internal_node_taxa=False) return tree
def test_require_taxon_by_label_add(self): tns = TaxonNamespace(self.str_labels) check = ["u", "x", "y",] for label in check: assert label not in self.str_labels t = tns.require_taxon(label) self.assertTrue(isinstance(t, Taxon)) self.assertEqual(t.label, label) total = self.str_labels + check self.assertEqual(len(tns), len(total)) self.validate_taxon_concepts(tns, total)