def get_char_matrix(self, labels=None): char_matrix = charmatrixmodel.CharacterMatrix() if labels is None: labels = [str(i) for i in range(1000)] char_matrix.expected_labels = [] char_matrix.original_taxa = [] char_matrix.original_seqs = [] self.rng.shuffle(labels) for label in labels: t = dendropy.Taxon(label=label) char_matrix.taxon_namespace.add_taxon(t) char_matrix.original_taxa.append(t) char_matrix[t].original_taxon = t char_matrix.expected_labels.append(label) seq = [self.rng.randint(0, 100) for _ in range(4)] char_matrix[t] = seq char_matrix[t].original_seq = char_matrix[t] char_matrix.original_seqs.append(char_matrix[t]) char_matrix[t].original_taxon = t char_matrix[t].label = label assert len(char_matrix.taxon_namespace) == len(char_matrix.original_taxa) assert len(char_matrix) == len(char_matrix.original_taxa) assert len(char_matrix) == len(labels) char_matrix.nseqs = len(char_matrix) return char_matrix
def test_mixed_token_lookup(self): labels = ["t{}".format(i) for i in range(1, 101)] tns = dendropy.TaxonNamespace() tsm = nexusprocessing.NexusTaxonSymbolMapper(taxon_namespace=tns) translate = {} t_labels = {} labels_t = {} for label_idx, label in enumerate(labels): t = dendropy.Taxon(label) t_labels[t] = t.label labels_t[t.label] = t tsm.add_taxon(t) if label_idx % 2 == 0: token = str(label_idx + 1) translate[token] = t tsm.add_translate_token(token, t) self.assertEqual(len(tns), len(labels)) for label_idx, label in enumerate(labels): token = label_idx + 1 t1 = tsm.require_taxon_for_symbol(token) self.assertEqual(len(tns), len(labels)) self.assertEqual(t1.label, label) self.assertIs(t1, labels_t[label]) if token in translate: self.assertIs(t1, translate[token]) self.assertEqual(len(tns), len(labels))
def get_char_matrix(self): labels = [ "z01", "<NONE>", "z03", "z04", "z05", "z06", None, None, "z09", "z10", "z11", "<NONE>", None, "z14", "z15", ] char_matrix = charmatrixmodel.CharacterMatrix() char_matrix.expected_labels = [] char_matrix.expected_taxa = set() random.shuffle(labels) for label in labels: t = dendropy.Taxon(label=None) char_matrix.taxon_namespace.add_taxon(t) char_matrix[t] = [1, 1, 1] char_matrix.expected_taxa.add(t) char_matrix.expected_labels.append(t.label) char_matrix.taxon_namespace = dendropy.TaxonNamespace() assert len(char_matrix) == len(labels) assert len(char_matrix) == len(char_matrix._taxon_sequence_map) char_matrix.nseqs = len(char_matrix) return char_matrix
def setUp(self): self.tree, self.anodes, self.lnodes, self.inodes = self.get_tree( suppress_internal_node_taxa=True, suppress_leaf_node_taxa=True) self.node_label_to_taxon_label_map = { "a" : "a", "b" : "a", "c" : "2", "e" : "2", "f" : "b", "g" : "B", "h" : "B", "i" : "h", "j" : "H", "k" : "h", "l" : None, "m" : None, "n" : "H", "o" : "J", "p" : "j", } self.original_taxa = [] for idx, nd in enumerate(self.tree): taxon_label = self.node_label_to_taxon_label_map[nd.label] t = dendropy.Taxon(label=taxon_label) self.tree.taxon_namespace.add_taxon(t) nd.taxon = t nd.original_taxon = t self.original_taxa.append(t) assert len(self.tree.taxon_namespace) == len(self.node_label_to_taxon_label_map) assert len(self.tree.taxon_namespace) == len(self.original_taxa)
def getDiploid(self): """ Set diploid species list. Open up a dialog for user to select diploid species. Get result from the dialog and store as a global variable. """ class emptyFileError(Exception): pass try: if len(self.inputFiles) == 0: raise emptyFileError # Create a taxon_namespace object based on current taxa names set. taxa = dendropy.TaxonNamespace() for taxon in list(self.taxa_names): taxa.add_taxon(dendropy.Taxon(taxon)) dialog = diploidList.DiploidListDlg(taxa, self.diploidList, self) if dialog.exec_(): # If executed, update diploid species list. self.diploidList = dialog.getDiploidSpeciesList() except emptyFileError: QMessageBox.warning(self, "Warning", "Please select a file type and upload data!", QMessageBox.Ok) return
def MaybeMergeChildren(parent_node): children = parent_node.child_nodes() assert len(children) == 2 if not AreLeaves(children): logging.debug('Not both children are leaves. Bailing.') return False # Make the new dictionaries and edge lengths child_pathways = [c.pathways for c in children] child_lengths = [c.edge.length for c in children] virtual_count = sum(c.count for c in children) max_length_idx = pylab.argmax(child_lengths) label = children[max_length_idx].taxon.label merged_pathways = set.union(*child_pathways) logging.debug('Merging 2 children with edge lengths %s', child_lengths) # Remove children and update the parent map(parent_node.remove_child, children) parent_node.edge.length += child_lengths[max_length_idx] parent_node.pathways = merged_pathways parent_node.count = virtual_count parent_node.annotate('count') for pname in parent_node.pathways: setattr(parent_node, pname, True) parent_node.annotate(pname) # Set up a taxon for the parent according to the # most distinct child. # TODO(flamholz): indicate somehow that this was merged. taxon = dendropy.Taxon() taxon.label = label parent_node.taxon = taxon return True
def json_to_dendropy_sub(json, node, taxon_set): ''' recursively calls itself for all children of node and builds up the tree. entries in json are added as node attributes ''' if 'xvalue' in json: node.xvalue = float(json['xvalue']) for attr, val in json.iteritems(): if attr == 'children': for sub_json in val: child_node = dendropy.Node() json_to_dendropy_sub(sub_json, child_node, taxon_set) if hasattr(child_node, 'xvalue'): node.add_child(child_node, edge_length=child_node.xvalue - node.xvalue) elif hasattr(child_node, 'branch_length'): node.add_child(child_node, edge_length=child_node.branch_length) else: node.add_child(child_node, edge_length=1.0) else: try: node.__setattr__(attr, float(val)) except: if val == 'undefined': node.__setattr__(attr, None) else: node.__setattr__(attr, val) if len(node.child_nodes()) == 0: node.taxon = dendropy.Taxon(label=json['strain'].upper()) node.strain = json['strain'] taxon_set.add_taxon(node.taxon)
def setUp(self): self.taxa = [ dendropy.Taxon(label=label) for label in ["a", "b", "c", "d"] ] self.n0 = dendropy.Node(label="0", taxon=self.taxa[0]) self.c1 = dendropy.Node(label="1", taxon=None) self.c2 = dendropy.Node(label=None, taxon=self.taxa[1]) self.c3 = dendropy.Node(label=None, taxon=None) self.c3 = dendropy.Node(label=None, taxon=self.taxa[2]) self.p1 = dendropy.Node(label="-1", taxon=self.taxa[3]) self.n0.parent_node = self.p1 self.n0.set_child_nodes([self.c1, self.c2]) self.c2.set_child_nodes([self.c3]) self.nodes = [self.n0, self.c1, self.c2, self.c3, self.p1] for idx, nd in enumerate(self.nodes): if idx % 2 == 0: nd.edge.label = "E{}".format(idx) nd.edge.length = idx an1 = nd.annotations.add_new( "a{}".format(idx), "{}{}{}".format(nd.label, nd.taxon, idx)) an2 = nd.annotations.add_bound_attribute("label") an3 = an1.annotations.add_bound_attribute("name") ae1 = nd.edge.annotations.add_new( "a{}".format(idx), "{}{}".format(nd.edge.label, idx)) ae2 = nd.edge.annotations.add_bound_attribute("label") ae3 = ae1.annotations.add_bound_attribute("name") self.e0 = self.n0._edge
def testTaxaQuerying(self): ts = dendropy.TaxonSet(self.labels) self.assertTrue(ts.has_taxa(labels=self.labels)) self.assertTrue(ts.has_taxa(taxa=ts)) self.assertFalse(ts.has_taxa(labels=self.labels + ["k"])) k = ts.new_taxon(label="k") self.assertTrue(ts.has_taxa(taxa=[k])) self.assertTrue(ts.has_taxon(label="k")) self.assertTrue(ts.has_taxa(labels=self.labels + ["k"])) j = dendropy.Taxon(label="j") ts.add_taxon(j) self.assertTrue(ts.has_taxa(taxa=[j])) self.assertTrue(ts.has_taxon(label="j")) self.assertTrue(ts.has_taxa(labels=self.labels + ["j"])) self.assertFalse(ts.has_taxon(taxon=dendropy.Taxon())) for label in self.labels: self.assertTrue(ts.has_taxon(label=label))
def makeTaxTree(splits, contigTax, outname): RANK_PREFIXES = ['k', 'p', 'c', 'o', 'f', 'g', 's'] # Create namespace and node collection names = set() contigTax2 = {} for k, v in contigTax.items(): for i, p in enumerate(RANK_PREFIXES): # Add rank prefixes v[i] = '{}_{}'.format(p, v[i]) v[i] = v[i].replace('(', '[').replace( ')', ']') # Parentheses will break newick contigTax2[k] = v [names.update([k] + v) for k, v in contigTax2.items()] names.update( splits) # We want to have the contigs AND the splits in our tree nodes = {name: dendropy.Node() for name in names} taxa = [] for name, node in nodes.items(): taxon = dendropy.Taxon(name) node.taxon = taxon taxa.append(taxon) namespace = dendropy.TaxonNamespace() namespace.add_taxa(taxa) # Create and populate tree tree = dendropy.Tree(taxon_namespace=namespace) parents = {} removedSplits = set( ) # This shouldn't be needed but since we have taxonomy problems do it for now. for split in splits: contig = split.rsplit('_split', 1)[0] tax = contigTax2[contig] if tax[-1] == 's_Firmicutes bacterium' or tax[ 4] == 'f_Clostridia bacterium [no family in NCBI]': # Weird taxonomy, find solution, avoid for now! print(contig) removedSplits.add(split) continue tree.seed_node.add_child(nodes[tax[0]]) for i in range(1, len(tax)): nodes[tax[i - 1]].add_child(nodes[tax[i]]) if tax[i] not in parents: parents[tax[i]] = set([tax[i - 1]]) else: parents[tax[i]].add(tax[i - 1]) nodes[tax[-1]].add_child(nodes[contig]) nodes[contig].add_child(nodes[split]) # All nodes should have only one parent! for p in parents: if len(parents[p]) > 1: print(p, parents[p]) with open(outname, 'w') as outfile: outfile.write(tree.as_string('newick').replace('\'', '')) return removedSplits
def reportNewickTree(self, outDir, outFile, leafLabels=None): # read duplicate nodes duplicateSeqs = self.__readDuplicateSeqs() # read tree treeFile = os.path.join(outDir, 'storage', 'tree', DefaultValues.PPLACER_TREE_OUT) tree = dendropy.Tree.get_from_path(treeFile, schema='newick', rooting="force-rooted", preserve_underscores=True) # clean up internal node labels for node in tree.internal_nodes(): if node.label: labelSplit = node.label.split('|') label = labelSplit[0] if labelSplit[1] != '': label += '|' + labelSplit[1] if labelSplit[2] != '': label += '|' + labelSplit[2] node.label = label # insert duplicate nodes into tree for leaf in tree.leaf_nodes(): duplicates = duplicateSeqs.get(leaf.taxon.label, None) if duplicates != None: newParent = leaf.parent_node.new_child( edge_length=leaf.edge_length) curLeaf = leaf.parent_node.remove_child(leaf) newParent.new_child(taxon=curLeaf.taxon, edge_length=0) for d in duplicates: newParent.new_child(taxon=dendropy.Taxon(label=d), edge_length=0) # append taxonomy to leaf nodes if leafLabels == 'taxonomy': # read taxonomy string for each IMG genome taxonomy = {} for line in open( os.path.join(DefaultValues.GENOME_TREE_DIR, DefaultValues.GENOME_TREE_TAXONOMY)): lineSplit = line.split('\t') taxonomy[lineSplit[0]] = lineSplit[1].rstrip() # append taxonomy to leaf labels for leaf in tree.leaf_nodes(): taxaStr = taxonomy.get(leaf.taxon.label, None) if taxaStr: leaf.taxon.label += '|' + taxaStr # write out tree oldStdOut = reassignStdOut(outFile) print((tree.as_string(schema='newick', suppress_rooting=True))) restoreStdOut(outFile, oldStdOut)
def test_basic_construction(self): taxon = dendropy.Taxon("z") nd = dendropy.Node(taxon=taxon, label="x", edge_length=1) self.assertIs(nd.taxon, taxon) self.assertEqual(nd.label, "x") edge = nd.edge self.assertEqual(edge.length, 1) self.assertIs(edge.head_node, nd) self.assertIs(edge.tail_node, None)
def getTaxamap(self): """ When user clicks "Set taxa map", open up TaxamapDlg for user input and update taxa map. """ class emptyFileError(Exception): pass try: if self.data is None: raise emptyFileError # For unphased data, the number of taxa should double because of phasing. if str(self.dataTypeEdit.currentText()) == "unphased data": taxa = dendropy.TaxonNamespace() # Turn each taxon into two. for taxon in self.data.taxon_namespace: taxa.add_taxon(dendropy.Taxon(taxon.label + "_0")) taxa.add_taxon(dendropy.Taxon(taxon.label + "_1")) # Default is only one individual for each species. if len(self.taxamap) == 0: for taxon in taxa: self.taxamap[taxon.label] = taxon.label dialog = TaxamapDlg.TaxamapDlg(taxa, self.taxamap, self) if dialog.exec_(): self.taxamap = dialog.getTaxamap() else: # Default is only one individual for each species. if len(self.taxamap) == 0: for taxon in self.data.taxon_namespace: self.taxamap[taxon.label] = taxon.label dialog = TaxamapDlg.TaxamapDlg(self.data.taxon_namespace, self.taxamap, self) if dialog.exec_(): self.taxamap = dialog.getTaxamap() except emptyFileError: QMessageBox.warning(self, "Warning", "Please upload data first!", QMessageBox.Ok) return
def getTaxaList(self): """ When user clicks "Select", open up taxaList dialog for user to select taxa used for inference. Update self.taxaList based on user input. """ class emptyFileError(Exception): pass try: if self.data is None: raise emptyFileError # For unphased data, the number of taxa should double because of phasing. if str(self.dataTypeEdit.currentText()) == "unphased data": taxa = dendropy.TaxonNamespace() # Turn each taxon into two. for taxon in self.data.taxon_namespace: taxa.add_taxon(dendropy.Taxon(taxon.label + "_0")) taxa.add_taxon(dendropy.Taxon(taxon.label + "_1")) # Default is all taxa are used for inference. if len(self.taxaList) == 0: for taxon in taxa: self.taxaList.append(taxon.label) dialog = taxaList.TaxaListDlg(taxa, self.taxaList, self) if dialog.exec_(): self.taxaList = dialog.getTaxaList() else: # Default is all taxa are used for inference. if len(self.taxaList) == 0: for taxon in self.data.taxon_namespace: self.taxaList.append(taxon.label) dialog = taxaList.TaxaListDlg(self.data.taxon_namespace, self.taxaList, self) if dialog.exec_(): self.taxaList = dialog.getTaxaList() except emptyFileError: QMessageBox.warning(self, "Warning", "Please upload data first!", QMessageBox.Ok) return
def setUp(self): self.tree1, self.anodes1, self.lnodes1, self.inodes1 = self.get_tree( suppress_internal_node_taxa=True, suppress_leaf_node_taxa=True) self.expected_labels = set([nd.label for nd in self.anodes1 if nd.label is not None]) self.expected_taxa = set() for nd in self.tree1: if nd.label is not None: nd.taxon = dendropy.Taxon(label=nd.label) self.expected_taxa.add(nd.taxon) assert len(self.expected_labels) == len(self.anodes1) assert len(self.expected_taxa) == len(self.expected_labels)
def setUp(self): self.char_matrix = charmatrixmodel.CharacterMatrix() labels = [ "a", "b", "c", "d", "e", "f", ] self.expected_taxa = set() for label in labels: t = dendropy.Taxon(label=label) self.char_matrix.taxon_namespace.add_taxon(t) self.expected_taxa.add(t) seq = [_ for _ in range(4)] self.char_matrix[t] = seq
def decorate_tree(tree, replace_dict): t = deepcopy(tree) for n in tree_traverse(t.seed_node): if n.parent_node is not None: if n.label is None: if n.taxon != None: if n.taxon._label in replace_dict: tax = dendropy.Taxon( label=replace_dict[n.taxon._label]) n.taxon = tax t.taxon_namespace.add_taxon(tax) return t
def test_truncate_ultrametric(self): for tree in self.trees: dists = tree.calc_node_root_distances() min_dist, max_dist = tree.minmax_leaf_distance_from_root() trunc_dists = [(max_dist * f) for f in (0.25, 0.5, 0.75, 0.90)] for td in trunc_dists: working = dendropy.Tree(tree) working.truncate_from_root(td) for idx, leaf in enumerate(working.leaf_iter()): if leaf.label is None and leaf.taxon is None: leaf.taxon = dendropy.Taxon(label="t{}".format(idx + 1)) self.check_ultrametric_tree(working, td)
def add_trifurication(tree): parent_node = list(tree.leaf_node_iter())[0].parent_node t1 = dendropy.Taxon(f'X1') t2 = dendropy.Taxon(f'X2') t3 = dendropy.Taxon(f'X3') tree.taxon_namespace.add_taxon(t1) tree.taxon_namespace.add_taxon(t2) tree.taxon_namespace.add_taxon(t3) child_a = dendropy.Node(edge_length=1.234) child_b = dendropy.Node(edge_length=1.234) child_c = dendropy.Node(edge_length=4.123) child_a.taxon = t1 child_b.taxon = t2 child_c.taxon = t3 parent_node.add_child(child_a) parent_node.add_child(child_b) parent_node.add_child(child_c)
def matrix_to_dendropy_pdm(dmat, taxa): """Read FastME distance matrix into a dendropy PDM object Parameters ---------- dmat : str paup* distance matrix file name Returns ------- pdm : dendropy phylogenetic distance matrix object """ pdm = dendropy.PhylogeneticDistanceMatrix() pdm.taxon_namespace = dendropy.TaxonNamespace() pdm._mapped_taxa = set() for i, si in enumerate(taxa): for j, sj in enumerate(taxa): dij = dmat[i, j] xi = pdm.taxon_namespace.get_taxon(si) if not xi: xi = dendropy.Taxon(si) pdm.taxon_namespace.add_taxon(xi) pdm._mapped_taxa.add(xi) pdm._taxon_phylogenetic_distances[xi] = {} xj = pdm.taxon_namespace.get_taxon(sj) if not xj: xj = dendropy.Taxon(sj) pdm.taxon_namespace.add_taxon(xj) pdm._mapped_taxa.add(xj) pdm._taxon_phylogenetic_distances[xj] = {} dij = float(dij) pdm._taxon_phylogenetic_distances[xi][xj] = dij return pdm
def get_char_matrix(self, taxon_namespace=None): char_matrix = self.__class__.matrix_type(taxon_namespace=taxon_namespace) labels = [str(i) for i in range(self.__class__.nseqs)] self.__class__.rng.shuffle(labels) seq_iter = itertools.cycle(self.__class__.sequence_source) nchar = len(self.__class__.sequence_source) * 2 for label in labels: t = dendropy.Taxon(label=label) char_matrix.taxon_namespace.add_taxon(t) seq = [next(seq_iter) for s in range(nchar)] char_matrix[t] = seq self.assertTrue(isinstance(char_matrix[t], self.__class__.sequence_type)) self.assertIs(type(char_matrix[t]), self.__class__.sequence_type) return char_matrix
def test_add_taxon(self): labels = ["t{}".format(i) for i in range(1, 101)] tns = dendropy.TaxonNamespace() tsm = nexusprocessing.NexusTaxonSymbolMapper(taxon_namespace=tns) for label_idx, label in enumerate(labels): t = dendropy.Taxon(label) tsm.add_taxon(t) self.assertEqual(len(tns), label_idx + 1) self.assertEqual(t.label, label) self.assertIs(tsm.require_taxon_for_symbol(label), t) self.assertEqual(len(tns), label_idx + 1) self.assertIs(tsm.require_taxon_for_symbol(str(label_idx + 1)), t) self.assertEqual(len(tns), label_idx + 1) self.assertEqual(len(tns), len(labels))
def test_no_number_lookup_and_create2(self): # looking up a number symbol should result in new taxon creation labels = ["t{}".format(i) for i in range(1, 101)] tns = dendropy.TaxonNamespace() tsm = nexusprocessing.NexusTaxonSymbolMapper( taxon_namespace=tns, enable_lookup_by_taxon_number=False) taxa = [] for label_idx, label in enumerate(labels): t = dendropy.Taxon(label) tsm.add_taxon(t) taxa.append(t) self.assertEqual(len(tns), len(labels)) for label_idx, label in enumerate(labels): t1 = tsm.require_taxon_for_symbol(label_idx + 1) self.assertNotIn(t1, taxa) self.assertEqual(t1.label, str(label_idx + 1)) self.assertEqual(len(tns), len(labels) + label_idx + 1)
def insert(placed_edge, query_name, x_1, x_2): tailn = placed_edge.tail_node headn = placed_edge.head_node tailn.remove_child(headn) nn = dy.Node() nn.add_child(headn) qry = dy.Node(taxon=dy.Taxon(query_name)) nn.add_child(qry) qry.edge_length = x_1 tailn.add_child(nn) if placed_edge.head_node in list(master_edge.head_node.ancestor_iter() ) or master_edge == placed_edge: nn.edge_length = placed_edge.length - max(x_2, 0) headn.edge_length = max(x_2, 0) else: nn.edge_length = max(x_2, 0) headn.edge_length = placed_edge.length - max(x_2, 0)
def generateSpeciesTree(nspecies, mean_edge_length): node = dendropy.Node() leaves = [node] for i in range(nspecies - 1): toSplit = random.choice(range(len(leaves))) c1, c2 = splitLeaf(leaves[toSplit]) c1.edge_length = sample_edge_length(mean_edge_length) c2.edge_length = sample_edge_length(mean_edge_length) del leaves[toSplit] leaves.append(c1) leaves.append(c2) names = generateNames(nspecies) for i in range(len(leaves)): tx = dendropy.Taxon(label=names[i]) leaves[i].taxon = tx tree = dendropy.Tree(seed_node=node) return tree
def generateBalancedSpeciesTree(nspecies, elength): assert nspecies > 0 and (nspecies & (nspecies - 1)) == 0 # power of 2 node = dendropy.Node() leaves = [node] for i in range(nspecies - 1): c1, c2 = splitLeaf(leaves[0]) c1.edge_length = elength c2.edge_length = elength del leaves[0] leaves.append(c1) leaves.append(c2) names = generateNames(nspecies) for i in range(len(leaves)): tx = dendropy.Taxon(label=names[i]) leaves[i].taxon = tx tree = dendropy.Tree(seed_node=node) return tree
def transfer_internal_node_labels_to_tree(source_tree_filename, destination_tree_filename, output_tree_filename, sequence_reconstructor): source_tree = dendropy.Tree.get_from_path(source_tree_filename, 'newick', preserve_underscores=True) source_internal_node_labels = [] for source_internal_node in source_tree.internal_nodes(): if source_internal_node.label: source_internal_node_labels.append(source_internal_node.label) else: source_internal_node_labels.append('') destination_tree = dendropy.Tree.get_from_path(destination_tree_filename, 'newick', preserve_underscores=True) for index, destination_internal_node in enumerate(destination_tree.internal_nodes()): new_label = sequence_reconstructor.replace_internal_node_label(str(source_internal_node_labels[index])) destination_internal_node.label = None destination_internal_node.taxon = dendropy.Taxon(new_label) output_tree_string = tree_as_string(destination_tree, suppress_internal=False, suppress_rooting=False) with open(output_tree_filename, 'w+') as output_file: output_file.write(output_tree_string.replace('\'', ''))
def simulate_fossils_on_tree(tree, q): """ the main function to simulate fossils on a tree """ # update its bipartitions (way of indexing the edges) tree.update_bipartitions() # get age of the root root_age = tree.max_distance_from_root() # store taxon label needed to be kept (the extant and fossils leaves) taxon_label_to_keep = [] # fossil id f = 1 # loop through bipartitions for i in range(len(tree.bipartition_encoding)): # access the edge through the bipartitions-edges hash edge = tree.bipartition_edge_map[tree.bipartition_encoding[i]] # we add the tip node, if it is an extant leaf, to the taxon_label_to_keep if is_extant_leaf(tree, edge.head_node) == True: taxon_label_to_keep.append(edge.head_node.taxon.label) # generate a poisson number of fossil F = poisson(q * edge.length) # get F divergence age values. # They need to be sorted in order to use always the same split node in the function 'add_fossil_tip'. F_div_ages = sorted([ uniform(0, edge.length) + (root_age - edge.head_node.distance_from_root()) for x in range(F) ]) # loop through fossil divergence ages for i in range(F): # create a new fossil tip object, with a very small branch length fossil_tip = dp.Node(edge_length=0.00001, taxon=dp.Taxon(label='f%s' % (f))) # add the fossil add_fossil_tip(root_age, edge.head_node, fossil_tip, F_div_ages[i]) # add the fossil tip taxon label to the taxon_label_to_keep taxon_label_to_keep.append('f%s' % (f)) f += 1 # write temp_tree with fossil and extinct lineages # tree.write(path="10/%s_temp.tree"%(str(iterator)), schema="newick") # update taxonnamespace tree.update_taxon_namespace() # Now, we prune extinct tips tree.retain_taxa_with_labels(taxon_label_to_keep) return tree
def generateMonoConcordantTree(sptree, genesPerSp): gtree = dendropy.Tree(sptree) for node in gtree.postorder_node_iter(): node.edge_length = None for leaf in gtree.leaf_nodes(): name = leaf.taxon.label leaves = [leaf] for i in range(genesPerSp - 1): toSplit = random.choice(range(len(leaves))) c1, c2 = splitLeaf(leaves[toSplit]) del leaves[toSplit] leaves.append(c1) leaves.append(c2) for i in range(len(leaves)): tx = dendropy.Taxon(label=name + str(i)) leaves[i].taxon = tx gtree.unassign_taxa(exclude_leaves=True) return gtree
def transfer_internal_names_to_tree(self, source_tree, destination_tree, output_tree): source_tree_obj = dendropy.Tree.get_from_path( source_tree, 'newick', preserve_underscores=True) source_internal_node_labels = [] for source_internal_node in source_tree_obj.internal_nodes(): if source_internal_node.label: source_internal_node_labels.append(source_internal_node.label) else: source_internal_node_labels.append('') destination_tree_obj = dendropy.Tree.get_from_path( destination_tree, 'newick', preserve_underscores=True) for index, destination_internal_node in enumerate( destination_tree_obj.internal_nodes()): destination_internal_node.label = None destination_internal_node.taxon = dendropy.Taxon( self.internal_node_prefix + str(source_internal_node_labels[index])) self.write_tree(destination_tree_obj, output_tree)