def __init__( self, nodes_dmp, tax_ids_to_names = None ): #Go through every line of Nodes file to construct tree. tmp_nodes will be a dictionary pointing from the taxid to its clade tmp_nodes = {} #with open( nodes_dmp_file ) as inpf: for line in nodes_dmp: ( tax_id, parent_tax_id, rank, embl_code, division_id, inherited_div_flag, genetic_code_id, inherited_GC_flag, mitochondrial_genetic_code, inherited_MGC_flag, GenBank_hidden_flag, hidden_subtree_root_flag, comments ) = line[::2] #For every entry in Nodes (every location in the tree) create clade containing the scientific name and pointer to the parent node. #Specify the rank of the clade and the taxonomic ID of the root. name = (tax_ids_to_names[int(tax_id)] if tax_ids_to_names else None) clade = BClade( clades = [], name = name ) clade.parent_tax_id = int(parent_tax_id) clade.rank = rank.strip("_") #clade.rank = re.sub(r'\W+', '', rank).strip("_") clade.tax_id = int(tax_id) #clade.accession = accessions[clade.tax_id] if clade.tax_id in accessions else [] #Set clade status values to "True" for sequence data and "final" or "draft" if it appears in accessions (taxid -> name, status, accessions) #if clade.tax_id in accessions: # clade.sequence_data = True # clade.status = clade.accession['status'] tmp_nodes[clade.tax_id] = clade # can add any other info in node.dmp #Build the tree using all the clades (iterate through clades using tmp_nodes) self.tree = BTree() for node in tmp_nodes.values(): # node = parent is the trick from NCBI to identify the root if node.tax_id == node.parent_tax_id: self.tree.root = node continue parent = tmp_nodes[node.parent_tax_id] parent.clades.append( node )
def plotLineage(lineage, axes, censore=True): """ Makes lineage tree. """ root = lineage.output_lineage[0] if np.isfinite(root.obs[4]): # starts from G1 length = root.obs[2] + root.obs[3] assert np.isfinite(length) else: # starts from G2 length = root.obs[3] assert np.isfinite(length) a = [Clade(length)] # input the root cells in the lineage c = CladeRecursive(lineage.output_lineage[0], a, censore) return Phylo.draw(c, axes=axes)
def rebuild_on_unpickle(cls, clade_repr, top_level_call=True): if clade_repr in cls._to_wrapped_map: if top_level_call: return cls(cls._to_wrapped_map[clade_repr]) else: return cls._to_wrapped_map[clade_repr] elif isinstance(clade_repr, str): leaf = Clade(name=clade_repr) cls._to_wrapped_map[clade_repr] = leaf return leaf if top_level_call: return cls.requisition(*[ cls.rebuild_on_unpickle(c_repr, False) for c_repr in clade_repr ]) else: return cls.requisition(*[ cls.rebuild_on_unpickle(c_repr, False) for c_repr in clade_repr ]).wrapped
def plotLineage(lineage, axes, censor=True, color=True): """ Given a lineage of cells, uses the `CladeRecursive` function to plot the lineage. """ root = lineage.output_lineage[0] if np.isfinite(root.obs[4]): # the lineage starts from G1 phase if np.isfinite(root.obs[3]): length = root.obs[2] + root.obs[3] else: length = root.obs[2] assert np.isfinite(length) else: # the lineage starts from S/G2 phase length = root.obs[3] assert np.isfinite(length) a = [Clade(length)] # input the root cell c = CladeRecursive(lineage.output_lineage[0], a, censor, color) return draw(c, axes=axes)
def getTreeFromLinkage(names, linkage): """ Obtain the tree encoded by ``linkage``. :arg names: a list of names, the order should correspond to the values in linkage :type names: list, :class:`~numpy.ndarray` :arg linkage: linkage matrix :type linkage: :class:`~numpy.ndarray` """ try: import Bio except ImportError: raise ImportError('Phylo module could not be imported. ' 'Reinstall ProDy or install Biopython ' 'to solve the problem.') from Bio.Phylo.BaseTree import Tree, Clade if not isinstance(linkage, np.ndarray): raise TypeError('linkage must be a numpy.ndarray instance') if linkage.ndim != 2: raise LinkageError('linkage must be a 2-dimensional matrix') if linkage.shape[1] != 4: raise LinkageError('linkage must have exactly 4 columns') n_terms = len(names) if linkage.shape[0] != n_terms - 1: raise LinkageError('linkage must have exactly len(names)-1 rows') clades = [] heights = [] for name in names: clade = Clade(None, name) clades.append(clade) heights.append(0.) for link in linkage: l = int(link[0]) r = int(link[1]) height = link[2] left = clades[l] right = clades[r] lh = heights[l] rh = heights[r] left.branch_length = height - lh right.branch_length = height - rh clade = Clade(None, None) clade.clades.append(left) clade.clades.append(right) clades.append(clade) heights.append(height) return Tree(clade)
def consensus(trees, cutoff=0.5, callback=None): """ Generate a consensus tree by counting splits and using the splits with frequencies above the cutoff to resolve a star tree. :param trees: iterable containing Phylo.BaseTree objects :param cutoff: float, bootstrap threshold (default 0.5) :param callback: function, optional callback :return: Phylo.BaseTree """ ntrees = 1 tree = next(trees) # store terminal labels and branch lengths tip_index = {} for i, tip in enumerate(tree.get_terminals()): tip_index.update({tip.name: i}) ntips = len(tip_index) if callback: callback("Recording splits and branch lengths", level='DEBUG') splits = {} terminals = dict([(tn, 0) for tn in tip_index.keys()]) while True: # record terminal branch lengths for tip in tree.get_terminals(): terminals[tip.name] += tip.branch_length # record splits in tree tree = label_nodes(tree, tip_index) # aggregates tip indices down tree for node in tree.get_nonterminals(): key = ','.join(map(str, node.tip_index)) if key not in splits: splits.update({key: {'sum': 0., 'count': 0}}) if node.branch_length is not None: # None interpreted as zero length (e.g., root branch) splits[key]['sum'] += node.branch_length splits[key]['count'] += 1 try: tree = next(trees) if callback: callback(".. {} completed ".format(ntrees), level="DEBUG") ntrees += 1 except StopIteration: if callback: callback("... done", level='DEBUG') break # filter splits by frequency (support) threshold intermed = [(k.count(',') + 1, k, v) for k, v in splits.items() if v['count'] / ntrees >= cutoff] intermed.sort() # sort by level (tips to root) del splits # free some memory # construct consensus tree if callback: callback("Building consensus tree", level='DEBUG') orphans = dict([(tip_index[tname], Clade(name=tname, branch_length=totlen / ntrees)) for tname, totlen in terminals.items()]) for _, key, val in intermed: # average branch lengths across relevant trees bl = val['sum'] / val['count'] support = val['count'] / ntrees node = Clade(branch_length=bl, confidence=support) for child in map(int, key.split(',')): branch = orphans.pop(child, None) if branch: node.clades.append(branch) # use a single tip name to label ancestral node newkey = tip_index[node.get_terminals()[0].name] orphans.update({newkey: node}) return orphans.popitem()[1]
def add_children(clade, lang, label_func): for child in sorted(lang.children, key=lambda l: l.name): subclade = Clade(branch_length=1, name=label_func(child)) clade.clades.append(subclade) if child.children: add_children(subclade, child, label_func)
def CladeRecursive_MCF10A(cell, a: list, censor: bool, color: bool): """ A recurssive function that takes in the root cell and traverses through cells to plot the lineage. The width of the lines show the phase of the cells. The color of the lines show the state of the cells. "a" should be: a = [Clade(lineage1.full_lineage[0].obs[1])] which is the root cell """ if color: if np.isfinite(cell.state): colorr = stateColors[cell.state] else: # in case that the cells we wish to plot, have not been assigned any states. colorr = "black" else: colorr = "black" if cell.isLeaf(): length = cell.obs[1] # Creating the clade and assigning the color my_clade = Clade(branch_length=length, width=1, color=colorr) # Assigning the line width according to the phase my_clade.G1lw = 1.0 my_clade.G2lw = 1.0 my_clade.G1 = cell.obs[1] my_clade.G2 = 1e-4 return my_clade else: clades = [] if cell.left is not None and cell.left.observed: clades.append(CladeRecursive_MCF10A(cell.left, a, censor, color)) if cell.right is not None and cell.right.observed: clades.append(CladeRecursive_MCF10A(cell.right, a, censor, color)) lengths = cell.obs[1] my_clade = Clade(branch_length=lengths, width=1, clades=clades, color=colorr) my_clade.G1lw = 1.0 my_clade.G2lw = 1.0 my_clade.G1 = cell.obs[1] my_clade.G2 = 1e-4 return my_clade
def CladeRecursive(cell, a: list, censor: bool, color: bool): """ A recurssive function that takes in the root cell and traverses through cells to plot the lineage. The width of the lines show the phase of the cells. The color of the lines show the state of the cells. "a" should be: a = [Clade(lineage1.full_lineage[0].obs[2]+lineage1.full_lineage[0].obs[3])] which is the root cell The following is the source code used to create Clades manually: https://github.com/biopython/biopython/blob/fce4b11b4b8e414f1bf093a76e04a3260d782905/Bio/Phylo/BaseTree.py#L801 """ if color: if np.isfinite(cell.state): colorr = stateColors[cell.state] else: # in case that the cells we wish to plot, have not been assigned any states. colorr = "black" else: colorr = "black" if cell.isLeaf() and censor: if np.isfinite(cell.obs[2]) and np.isfinite(cell.obs[3]): length = cell.obs[2] + cell.obs[3] elif np.isnan(cell.obs[2]): length = cell.obs[3] elif np.isnan(cell.obs[3]): length = cell.obs[2] # Creating the clade and assigning the color my_clade = Clade(branch_length=length, width=1, color=colorr) # Assigning the line width according to the phase my_clade.G1lw = 2.0 my_clade.G2lw = 1.0 my_clade.G1 = cell.obs[2] if np.isfinite(cell.obs[2]) else 1e-4 my_clade.G2 = cell.obs[3] if np.isfinite(cell.obs[3]) else 1e-4 return my_clade else: clades = [] if cell.left is not None and cell.left.observed: clades.append(CladeRecursive(cell.left, a, censor, color)) if cell.right is not None and cell.right.observed: clades.append(CladeRecursive(cell.right, a, censor, color)) if np.isnan(cell.obs[3]): # if the cell got stuck in G1 lengths = cell.obs[2] elif np.isnan(cell.obs[2]): # is a root parent and G1 is not observed lengths = cell.obs[3] else: lengths = cell.obs[2] + cell.obs[3] # both are observed my_clade = Clade(branch_length=lengths, width=1, clades=clades, color=colorr) my_clade.G1lw = 2.0 my_clade.G2lw = 1.0 my_clade.G1 = cell.obs[2] if np.isfinite(cell.obs[2]) else 1e-4 my_clade.G2 = cell.obs[3] if np.isfinite(cell.obs[3]) else 1e-4 return my_clade
def consensus(trees, cutoff=0.5, callback=None): """ Generate a consensus tree by counting splits and using the splits with frequencies above the cutoff to resolve a star tree. :param trees: iterable containing Phylo.BaseTree objects :param cutoff: float, bootstrap threshold (default 0.5) :return: Phylo.BaseTree """ if type(trees) is not list: # resolve generator object trees = list(trees) count = len(trees) # store terminal labels and branch lengths tip_index = {} for i, tip in enumerate(trees[0].get_terminals()): tip_index.update({tip.name: i}) if callback: callback("Recording splits and branch lengths") splits = {} terminals = dict([(tn, []) for tn in tip_index.keys()]) for phy in trees: # record terminal branch lengths for tip in phy.get_terminals(): terminals[tip.name].append(tip.branch_length) # record splits in tree phy = label_nodes(phy, tip_index) for node in phy.get_nonterminals(): key = tuple(node.tip_index) if key not in splits: splits.update({key: []}) splits[key].append(node.branch_length) # filter splits by frequency threshold intermed = [(len(k), k, v) for k, v in splits.items() if len(v) / count >= cutoff] intermed.sort() # construct consensus tree if callback: callback("Building consensus tree") orphans = dict([(tip_index[tname], Clade(name=tname, branch_length=sum(tdata) / len(tdata))) for tname, tdata in terminals.items()]) for _, key, val in intermed: # average branch lengths across relevant trees if all([v is None for v in splits[key]]): bl = None else: bl = sum(splits[key]) / len(splits[key]) support = len(val) / count node = Clade(branch_length=bl, confidence=support) for child in key: branch = orphans.pop(child, None) if branch: node.clades.append(branch) # use a single tip name to label ancestral node newkey = tip_index[node.get_terminals()[0].name] orphans.update({newkey: node}) return orphans.popitem()[1]