Пример #1
0
    def __init__(self, newick = None, text_array = None, \
                 fdist=clustvalidation.default_dist):
        # Default dist is spearman_dist when scipy module is loaded
        # otherwise, it is set to euclidean_dist.

        # Initialize basic tree features and loads the newick (if any)
        TreeNode.__init__(self, newick)
        self._fdist = None
        self._silhouette = None
        self._intercluster_dist = None
        self._intracluster_dist = None
        self._profile = None
        self._std_profile = None

        # Cluster especific features
        self.features.add("intercluster_dist")
        self.features.add("intracluster_dist")
        self.features.add("silhouette")
        self.features.add("profile")
        self.features.add("deviation")

        # Initialize tree with array data
        if text_array:
            self.link_to_arraytable(text_array)

        if newick:
            self.set_distance_function(fdist)
Пример #2
0
def validate_monophylies(tree: ete3.TreeNode, clades: dict, force_check=False):
    exit = 0
    for clade, leaves in clades.items():
        ismono, cladetype, badleaves = tree.check_monophyly(leaves, 'name')
        if ismono and not force_check:
            print('Clade %s: OK (%d leaves).' % (clade, len(leaves)))
        else:
            exit = 1
            print('Clade %s: NO (%s):' % (clade, cladetype))

            for node in tree.traverse('preorder'):
                if node.name.startswith(clade):
                    found_leaves = node.get_leaf_names()
                    print('  * Found node %s' % node.name)
                    print('    with extra leaves: ',
                          ' '.join(set(found_leaves).difference(leaves)))
                    print('    and missing leaves: ',
                          ' '.join(set(leaves).difference(found_leaves)))
                    break
            else:
                print("   * Not found:  '^%s.*'" % clade)

            mrca = tree.get_common_ancestor(leaves)
            print('  * MRCA is', mrca.name)
    return exit
def recreate_tree(tree, num_layers=None, color=True):
    # build tree with same topology but without the coordinate and metadata labels
    # use color_dict to color nodes the appropriate colors
    new_tree = TreeNode(name=tree.name)
    #new_tree = TreeNodeHashable(name = tree.name)
    new_tree.img_style['size'] = 10
    if color:
        new_tree.img_style['fgcolor'] = tree.color
    new_tree.img_style['shape'] = 'sphere'
    old_layer = [tree]
    new_layer = [new_tree]
    layer_num = 0
    while old_layer:
        next_old_layer, next_new_layer = [], []
        for ind, node in enumerate(old_layer):
            for child in node.children:
                next_old_layer.append(child)
                new_child = TreeNode(name=child.name)
                new_child.img_style['size'] = 10
                if color:
                    new_child.img_style['fgcolor'] = child.color
                new_child.img_style['shape'] = 'sphere'
                new_layer[ind].add_child(new_child)
                next_new_layer.append(new_child)
        old_layer = next_old_layer
        new_layer = next_new_layer
        layer_num += 1
        if num_layers is not None and layer_num == num_layers:
            break

    return new_tree
Пример #4
0
    def __init__(self, newick = None, text_array = None, \
                 fdist=clustvalidation.default_dist):
        # Default dist is spearman_dist when scipy module is loaded
        # otherwise, it is set to euclidean_dist.

        # Initialize basic tree features and loads the newick (if any)
        TreeNode.__init__(self, newick)
        self._fdist = None
        self._silhouette = None
        self._intercluster_dist = None
        self._intracluster_dist = None
        self._profile = None
        self._std_profile = None

        # Cluster especific features
        self.features.add("intercluster_dist")
        self.features.add("intracluster_dist")
        self.features.add("silhouette")
        self.features.add("profile")
        self.features.add("deviation")

        # Initialize tree with array data
        if text_array:
            self.link_to_arraytable(text_array)

        if newick:
            self.set_distance_function(fdist)
Пример #5
0
 def compare(self, tree2, method='identity'):
     '''compare this tree to the other tree'''
     if method == 'identity':
         # we compare lists of seq, parent, abundance
         # return true if these lists are identical, else false
         list1 = sorted((node.sequence, node.frequency,
                         node.up.sequence if node.up is not None else None)
                        for node in self.tree.traverse())
         list2 = sorted((node.sequence, node.frequency,
                         node.up.sequence if node.up is not None else None)
                        for node in tree2.tree.traverse())
         return list1 == list2
     elif method == 'MRCA':
         # matrix of hamming distance of common ancestors of taxa
         # takes a true and inferred tree as CollapsedTree objects
         taxa = [
             node.sequence for node in self.tree.traverse()
             if node.frequency
         ]
         n_taxa = len(taxa)
         d = scipy.zeros(shape=(n_taxa, n_taxa))
         sum_sites = scipy.zeros(shape=(n_taxa, n_taxa))
         for i in range(n_taxa):
             nodei_true = self.tree.iter_search_nodes(
                 sequence=taxa[i]).next()
             nodei = tree2.tree.iter_search_nodes(sequence=taxa[i]).next()
             for j in range(i + 1, n_taxa):
                 nodej_true = self.tree.iter_search_nodes(
                     sequence=taxa[j]).next()
                 nodej = tree2.tree.iter_search_nodes(
                     sequence=taxa[j]).next()
                 MRCA_true = self.tree.get_common_ancestor(
                     (nodei_true, nodej_true)).sequence
                 MRCA = tree2.tree.get_common_ancestor(
                     (nodei, nodej)).sequence
                 d[i, j] = hamming_distance(MRCA_true, MRCA)
                 sum_sites[i, j] = len(MRCA_true)
         return d.sum() / sum_sites.sum()
     elif method == 'RF':
         tree1_copy = self.tree.copy(method='deepcopy')
         tree2_copy = tree2.tree.copy(method='deepcopy')
         for treex in (tree1_copy, tree2_copy):
             for node in list(treex.traverse()):
                 if node.frequency > 0:
                     child = TreeNode()
                     child.add_feature('sequence', node.sequence)
                     node.add_child(child)
         try:
             return tree1_copy.robinson_foulds(tree2_copy,
                                               attr_t1='sequence',
                                               attr_t2='sequence',
                                               unrooted_trees=True)[0]
         except:
             return tree1_copy.robinson_foulds(tree2_copy,
                                               attr_t1='sequence',
                                               attr_t2='sequence',
                                               unrooted_trees=True,
                                               allow_dup=True)[0]
     else:
         raise ValueError('invalid distance method: ' + method)
Пример #6
0
def _collapse_tree_by_sequence_and_isotype(tree: ete3.TreeNode):
    for node in tree.iter_descendants():
        node.dist = node.up.sequence != node.sequence or node.up.isotype != node.isotype
    for node in tree.iter_descendants():
        if node.dist == 0:
            node.up.abundance += node.abundance
            node.up.name = node.name
            node.delete(prevent_nondicotomic=False)
Пример #7
0
def read_tree(infile, format, quiet=False):
    if infile=='-':
        nwk_string = sys.stdin.readlines()[0]
        tree = TreeNode(newick=nwk_string, format=format, quoted_node_names=True)
    else:
        tree = TreeNode(newick=infile, format=format, quoted_node_names=True)
    if not quiet:
        num_leaves = len([ n for n in tree.traverse() if n.is_leaf() ])
        sys.stderr.write('number of leaves in input tree: {:,}\n'.format(num_leaves))
    return tree
Пример #8
0
 def simplify_tree(self, tree):
     root_label = self._simplify_tree(tree)
     if tree.label in ['Arg1', 'Arg2', 'Conn', 'none']:
         tree.children = self.get_leave_node(tree)
         return
     for i, c in enumerate(tree.children):
         if self.deeperthan1(c):
             self.simplify_tree(c)
         else:
             n = TreeNode()
             n.children = [c]
             n.label = c.label
             tree.children[i] = n
Пример #9
0
 def __init__(self,
              newick=None,
              format=0,
              dist=None,
              support=None,
              name=None):
     """	Default init for the TreeClass. This works better than wrapping the entire class"""
     TreeNode.__init__(self,
                       newick=newick,
                       format=format,
                       dist=dist,
                       support=support,
                       name=name)
Пример #10
0
    def DFS_get_tree(root, par_node):
        results = get_info(root)
        par_node.name = results[0]
        if len(results) == 1:
            # par node is a leaf, end

            return
        elif len(results) == 3:
            name, l, r = results
            l_node = TreeNode()
            r_node = TreeNode()
            par_node.add_child(l_node)
            par_node.add_child(r_node)
            return DFS_get_tree(l, l_node), DFS_get_tree(r, r_node)
Пример #11
0
def isotype_tree(
    tree: ete3.TreeNode,
    newidmap: Dict[str, Dict[str, str]],
    isotype_names: Sequence[str],
    weight_matrix: Optional[Sequence[Sequence[float]]] = None,
) -> ete3.TreeNode:
    """Method adds isotypes to ``tree``, minimizing isotype switching and
    obeying switching order.

    * Adds observed isotypes to each observed node in the collapsed
      trees output by gctree inference. If cells with the same sequence
      but different isotypes are observed, then collapsed tree nodes
      must be ‘exploded’ into new nodes with the appropriate isotypes
      and abundances. Each unique sequence ID generated by gctree is
      prepended to its observed isotype, and a new `isotyped.idmap`
      mapping these new sequence IDs to original sequence IDs is
      written in the output directory.
    * Resolves isotypes of unobserved ancestral genotypes in a way
      that minimizes isotype switching and obeys isotype switching
      order. If observed isotypes of an observed internal node and its
      children violate switching order, then the observed internal node
      is replaced with an unobserved node with the same sequence, and
      the observed internal node is placed as a child leaf. This
      procedure always allows switching order conflicts to be resolved,
      and should usually increase isotype transitions required in the
      resulting tree.

    Args:
        tree: ete3 Tree
        newidmap: mapping of sequence IDs to isotypes, such as that output by :meth:`utils.explode_idmap`.
        isotype_names: list or other sequence of isotype names observed, in correct switching order.

    Returns:
        A new ete3 Tree whose nodes have isotype annotations in the attribute ``isotype``.
        Node names in this tree also contain isotype names.
    """
    tree = tree.copy()
    _add_observed_isotypes(tree,
                           newidmap,
                           isotype_names,
                           weight_matrix=weight_matrix)
    _disambiguate_isotype(tree)
    _collapse_tree_by_sequence_and_isotype(tree)
    for node in tree.traverse():
        node.name = str(node.name) + " " + str(node.isotype)
    for node in tree.iter_descendants():
        node.dist = hamming_distance(node.up.sequence, node.sequence)
    return tree
def get_clade_count(tree:TreeNode, clades: List[str], alternate_names: Dict[str, str]) -> Dict[str, int]:
    """
    Returns the total number of clades for each clade in [clades].

    Sequence not matching any of [clades] will be added under "other" in return dictionary

    Args:

        - tree (TreeNode): the tree in which clades are to be counted for
        - clades (List[str]): List containing the clade names
            Clade names should occur at the start of the sequence name.
        - altername_names (Dict[str, str]): Maps alternate name found in sequence to desired name.

    Returns:

        - Dict[str, int]: Maps the clade to the total number present.
    """
    def get_belonging(leaf: TreeNode, clades: Dict[str, str]):
        """
        """
        result = [clades[clade] for clade in clades.keys() if leaf.name.startswith(clade)]
        if len(result) == 0:
            return "other"
        return result[0]
    

    clade_dict = {clade: clade for clade in clades}
    clade_dict.update(alternate_names)

    leaf_clades = list(map(partial(get_belonging, clades = clade_dict), tree.get_leaves()))
    serialize = pd.Series(data = leaf_clades, dtype = str)
    return serialize.value_counts().to_dict()
Пример #13
0
    def parameterised_test(mutDict, insertionDict, mutations, expected_output):
        f = MockFile()
        node = TreeNode(name="test_node")
        node.mutations = mutations

        original_mD = mutDict.copy()
        original_iD = insertionDict.copy()
        genome_tree.writeGenomeShortIndels(node=node,
                                           file=f,
                                           mutDict=mutDict,
                                           insertionDict=insertionDict)
        # the whole point of this function is that the genome tree updates and then de-updates
        # any mutations. So we need the mutDict and insertionDict to remain the same before and after printing.
        assert mutDict == original_mD
        assert insertionDict == original_iD
        assert f.written_data == expected_output  #
Пример #14
0
 def initialize_pathogen_tree(self):
     """
     Initialize one pathogen lineage per host tip
     dist records height that pathogen lineage was started
     TODO: relax this assumption - needs some way to input
     """
     # reset containers
     self.extant_p = []  # pathogen lineages that have not coalesced
     self.not_yet_sampled_p = []  # pathogen lineages higher in the tree
     for i, host_tip in enumerate(self.hosttree.get_leaves()):
         pnode = TreeNode(name=host_tip.name + '_P', dist=0)
         pnode.add_features(height=host_tip.height, host=host_tip)
         if host_tip.height == 0:
             self.extant_p.append(pnode)
         else:
             self.not_yet_sampled_p.append(pnode)
Пример #15
0
def _convert_biotree_to_etetree(bio_tree):
    fhand = io.StringIO()
    write_newick([bio_tree], fhand)
    newick = fhand.getvalue()
    newick = re.sub("Inner[0-9]+:", ":", newick)
    ete_tree = TreeNode(newick)
    return (ete_tree)
def add_tree_layer(tree, leaves, clusters, proportions, child_coords,
                   prop_filter):
    '''
    tree: tree that we want to add an additional layer to
    leaves: leaves of tree 
    clusters: number of clusters in the child layer
    proportions: nested dictionary containing id of parent and id of child and the proportion of cells 
                 contained in the parent that are also contained in the child 
    prop_filter: proportion of cells for edge between clusters to be created 
    '''
    child_nodes = {}
    for ind in range(len(clusters)):
        child_node_id = clusters[ind]
        child_nodes[child_node_id] = TreeNode(name=child_node_id)
        # add coordinate data to node
        child_nodes[child_node_id].add_features(
            coords=child_coords[child_node_id])
        child_nodes[child_node_id].add_features(cluster_id=child_node_id)
    for child_node_id in proportions:
        # ensure that each child node is not added to more than one parent node
        proportions_child = proportions[child_node_id]
        max_node_id = max(proportions_child, key=proportions_child.get)
        if proportions_child[max_node_id] > prop_filter:
            parent_node = leaves[max_node_id]
            parent_node.add_child(child_nodes[child_node_id])

    return tree, child_nodes
Пример #17
0
def layout_lift(node: TreeNode, levels: int = 3) -> None:
    """Layout implementation for a tree node

    Parameters
    ----------
    node : TreeNode
        the root of the taxonomy tree / sub-tree
    levels : int
        a number of tree levels to draw

    Returns
    -------
    None
    """

    name = TextFace(node.name if
                    (int(node.e) < levels or node.Hd == "1") else "",
                    tight_text=True)
    name.rotation = 270
    node.add_face(name, column=0, position="branch-right")
    nst = NodeStyle()

    if .2 >= float(node.u) > 0:
        nst["fgcolor"] = "#90ee90"
    elif .4 >= float(node.u) > .2:
        nst["fgcolor"] = "green"
    elif float(node.u) > .4:
        nst["fgcolor"] = "#004000"
    else:
        nst["fgcolor"] = "red"

    if node.Hd == "0":
        nst["size"] = 20
        nst["shape"] = "square"
    else:
        if node.Ch == "1":
            nst["size"] = 40
            nst["shape"] = "circle"
        else:
            nst["size"] = 40
            nst["shape"] = "circle"

    if node.Sq == "1":
        nst["shape"] = "circle"

    node.set_style(nst)
Пример #18
0
    def p_toArbre(self):

        n = TreeNode()
        n.name = "main()"

        n1 = TreeNode()
        n1.name = str(self.sons[0])

        n2 = self.sons[1].c_toArbre()

        n3 = self.sons[2].e_toArbre()

        n.add_child(n1)
        n.add_child(n2)
        n.add_child(n3)

        return n
Пример #19
0
def discover_children(object=None):

    """
    Discovers all children defined in the thrift_spec of an instance of a thrift auto-generated class.
    :param object: The treenode object to search to discover the children.
    :return: The discovered children, wrapped in treenodes.
    """

    nodes = []

    for spec in object.obj.thrift_spec.values():

        node = TreeNode(name=spec[1])
        node.add_features(t_parent=object.obj, t_name=spec[1], t_type=spec[2])
        object.add_child(node)
        nodes.append(node)

    return nodes
Пример #20
0
    def __init__(self, newick=None, alignment=None, alg_format="fasta", \
                 sp_naming_function=_parse_species, format=0, **kargs):

        # _update names?
        self._name = "NoName"
        self._species = "Unknown"
        self._speciesFunction = None
        # Caution! native __init__ has to be called after setting
        # _speciesFunction to None!!
        TreeNode.__init__(self, newick=newick, format=format, **kargs)

        # This will be only executed after reading the whole tree,
        # because the argument 'alignment' is not passed to the
        # PhyloNode constructor during parsing
        if alignment:
            self.link_to_alignment(alignment, alg_format)
        if newick:
            self.set_species_naming_function(sp_naming_function)
Пример #21
0
    def __init__(self, newick=None, alignment=None, alg_format="fasta", \
                 sp_naming_function=_parse_species, format=0, **kargs):

        # _update names?
        self._name = "NoName"
        self._species = "Unknown"
        self._speciesFunction = None
        # Caution! native __init__ has to be called after setting
        # _speciesFunction to None!!
        TreeNode.__init__(self, newick=newick, format=format, **kargs)

        # This will be only executed after reading the whole tree,
        # because the argument 'alignment' is not passed to the
        # PhyloNode constructor during parsing
        if alignment:
            self.link_to_alignment(alignment, alg_format)
        if newick:
            self.set_species_naming_function(sp_naming_function)
def color_taxon(node:TreeNode, color_marker: str, offset: int):
    """
    Sets the NodeStyle for [n]

    Args:

        - node(TreeNode): the leaf to set node_style for
        - color_marker(Str): the delineator to split taxon sequence name to extract 
            color information. Color info should be the last thing right after [color_marker].

            Color should also be in Hex format either FFFFF or #FFFFFF
        - offset(int): how many characters off the back to chop off.
    """
    split = node.name.split(color_marker)
    if len(split) > 1:
        color = split[-1][:offset]
        if color.startswith("#"):
            node.img_style = node_style(color=color)
        else:
            node.img_style = node_style(color= f"#{color}")
Пример #23
0
def isotype_parsimony(tree: ete3.TreeNode) -> float:
    """Computes the sum of :meth:`isotype_distance` along each edge in an
    isotyped tree.

    If no weight matrix was provided during isotyping of the tree, then
    the return value of this function is the number of isotype
    transitions along edges in the tree.
    """
    return sum(
        isotype_distance(node.up.isotype, node.isotype)
        for node in tree.iter_descendants())
def root_on(tree: TreeNode, clade: str, clade_details: List, clade_total:int, clades: List[str], alternate_names: Dict[str, str]) -> TreeNode:
    """
    Roots the given tree on clade

    Args:

        - tree (TreeNode): The tree in which will be rooted
        - clade (str): The clade to root tree on. Tree must contain clade.
        - clade_details (List): Contains [color, density, coverage] in that order
        - clade_total (int): The total number leaves in the tree belonging to [clade]
        - clades (List[str]): List containing the clade names
            Clade names should occur at the start of the sequence name.
        - altername_names (Dict[str, str]): Maps alternate name found in sequence to desired name.
        
    Returns:        

        - TreeNode: A tree with clade as the outgroup
    """
    node = get_max_ancestor(tree, clade, clade_details, clade_total, clades, alternate_names)
    tree.set_outgroup(node)
    return tree
Пример #25
0
def layout_raw(node: TreeNode, tight_mode: bool = True) -> None:
    """Layout implementation for a tree node

    Parameters
    ----------
    node : TreeNode
        the root of the taxonomy tree / sub-tree
    tight_mode : bool, default=True
        a mode to print node names more tightly

    Returns
    -------
    None
    """

    if tight_mode:
        name_segments = node.name.split(' ')
        for i, name_segment in enumerate(name_segments):
            name_face = TextFace(name_segment, tight_text=True)
            name_face.rotation = 270
            node.add_face(name_face, column=i, position="branch-right")
    else:
        name_face = TextFace(node.name, tight_text=True)
        name_face.rotation = 270
        node.add_face(name_face, column=0, position="branch-right")

    nst = NodeStyle()

    nst["fgcolor"] = "black"
    nst["size"] = 20
    nst["shape"] = "circle"

    node.set_style(nst)
Пример #26
0
    def add_tree_to_distribution(self, tree):
        """
        Add the bipartition of a tree to the CCP distribution
        
        Takes:
            - tree (ete3.Tree): phylogenetic tree
            
        """

        if len(tree.children) == 3:
            ## special unrroted case where the tree begin by a trifurcation ...
            ## we artificially remove the trifurcation to avoid future problems
            a = TreeNode()
            b = tree.children[1]
            c = tree.children[2]
            b.detach()
            c.detach()
            tree.add_child(a)
            a.add_child(b)
            a.add_child(c)
            #print " special rerooting "

        for i in tree.traverse():
            if len(i.children) > 2:
                print "multifurcation detected! Please provide bifurcating trees."
                print "exiting now"
                exit(1)

        if self.nb_observation == 0:  ##no tree has been observed yet: add all the leaves
            for l in tree.get_leaf_names():
                self.get_leaf_id(l)  ##adds the leaves to the CCP

        for node in tree.traverse("postorder"):  ##for each branch of the tree
            self.add_tree_branch_to_distribution(node)

        self.nb_observation += 1

        return
Пример #27
0
    def simulate(self):
        '''
        simulate a collapsed tree given params
        replaces existing tree data member with simulation result, and returns self
        '''
        if self.params is None:
            raise ValueError('params must be defined for simulation')

        # initiate by running a LeavesAndClades simulation to get the number of clones and mutants
        # in the root node of the collapsed tree
        LeavesAndClades.simulate(self)
        self.tree = TreeNode()
        self.tree.add_feature('frequency', self.c)
        if self.m == 0:
            return self
        for _ in range(self.m):
            # ooooh, recursion
            child = CollapsedTree(params=self.params,
                                  frame=self.frame).simulate().tree
            child.dist = 1
            self.tree.add_child(child)

        return self
Пример #28
0
    def coalesce_paths(self, child_paths, t0):
        """
        Create a new TreeNode and assign a given list of child nodes and its host node.
        :param child_paths:  A list of TreeNodes in the pathogen tree.
        :param t0:  Time of pathogen coalescence as height
        :return:  A tuple containing:
            1. TreeNode object for the new pathogen lineage.
            2. updated extant list
        """
        assert len(child_paths
                   ) == 2, 'Can only coalesce 2 pathogen lineages at a time'
        p1, p2 = child_paths

        assert p1 in self.extant_p and p2 in self.extant_p, 'Both pathogen lineages must be extant'
        assert p1.host == p2.host, 'Can only coalesce pathogen lineages in the same host'
        host = p1.host

        assert p1.height < t0 and p2.height < t0, \
            'Pathogen lineage heights %f %f cannot exceed coalescent event %f' % (p1.height, p2.height, t0)

        # create new pathogen lineage
        new_path = TreeNode(name='_'.join([x.name for x in child_paths]),
                            dist=0)
        new_path.add_features(host=host, height=t0)

        # cast child_paths as a List because ete3.Tree.children requires it
        new_path.children = list(child_paths)
        self.extant_p.append(new_path)

        # coalesced pathogen lineages are no longer extant
        for node in child_paths:
            node.up = new_path
            node.dist = t0 - node.height  # when node was created, we stored the height
            self.extant_p.remove(node)
            self.not_extant_p.append(node)

        return new_path
 def get_qualifying_nodes (tree: TreeNode,clade: Union[str, List[str]], clade_total: int, clade_details: List, clades: List[str] = None, alternate_names: Dict[str, str] = None, node_cache = None, ):
     qualifying_list = {}
     for node in tree.traverse():
         if node.is_leaf():
             continue
         
         children_distribution = {}
         if node_cache is None:
             children_distribution = get_clade_count(node, clades= clades, alternate_names=alternate_names)
         else:
             children_distribution = node_cache[node]
         density, coverage = get_details(children_distribution, clade, clade_total)
         if density >= clade_details[MIN_DENSITY] and density <= clade_details[MAX_DENSITY] and coverage >= clade_details[MIN_COV] and coverage <= clade_details[MAX_COV]:
             qualifying_list[node] = (density, coverage, density + coverage)
     return qualifying_list
Пример #30
0
def copy_forest(forest, features=None):
    features = set(features if features else forest[0].features)
    copied_forest = []
    for tree in forest:
        copied_tree = TreeNode()
        todo = [(tree, copied_tree)]
        copied_forest.append(copied_tree)
        while todo:
            n, copied_n = todo.pop()
            copied_n.dist = n.dist
            copied_n.support = n.support
            copied_n.name = n.name
            for f in features:
                if hasattr(n, f):
                    copied_n.add_feature(f, getattr(n, f))
            for c in n.children:
                todo.append((c, copied_n.add_child()))
    return copied_forest
def color_taxons(tree: TreeNode, color_marker: str, offset:int):
    """
    Sets the node style for all nodes in [tree]

    Args:

        - tree(TreeNode): the input tree to set node_style for
        - color_marker(Str): the delineator to split taxon sequence name to extract 
            color information. Color info should be the last thing right after [color_marker].

            Color should also be in Hex format either FFFFF or #FFFFFF
        - offset(int): how many characters off the back to chop off.

    """
    for node in tree.traverse():
        node.img_style = node_style()
        if node.is_leaf():
            color_taxon(node, color_marker=color_marker, offset= offset)
def get_node_details(tree: TreeNode, clades:List[str], alternate_names: Dict[str, str]) -> Dict[TreeNode, Dict[str, int]]:
    """
    Computes the clade distribution at each node of the tree.

    Args

        - tree (TreeNode): The tree in which will be rooted
        - clades (List[str]): List containing the clade names
            Clade names should occur at the start of the sequence name.
        - altername_names (Dict[str, str]): Maps alternate name found in sequence to desired name.
    
    Returns:

        - Dict[TreeNode, Dict[str, int]]: DIctionary mapping a node to the clade_distribution under it.
    """
    result = {}
    for node in tree.traverse():
        if node.is_leaf():
            continue
        result[node] = get_clade_count(node, clades= clades, alternate_names=alternate_names)
    return result
Пример #33
0
    def creation_by_words(self, words):
        """
        Creation of a tree based on separate words in the word list
        :type words: list
        """
        # Creates an empty tree
        tree = Tree()
        tree.name = ""
        # Make sure there are no duplicates
        words = set(words)
        # Populate tree
        for word in words:
            # If no similar words exist, add it to the base of tree
            target = tree

            if self.is_reversed:
                words = list(reversed(split(r'[\s-]+|:[\\/]{2}', word)))
            else:
                words = split(r'[\s-]+|:[\\/]{2}', word)

            # Find relatives in the tree
            root = ''
            pos = 0
            for pos in xrange(len(words), -1, -1):
                root = ' '.join(words[:pos])
                if root in self.name2node:
                    target = self.name2node[root]
                    break

            # Add new nodes as necessary
            fullname = root
            for wd in words[pos:]:
                fullname = (fullname + ' ' + wd).strip()
                new_node = TreeNode(name=wd.strip(), dist=target.dist + 1)
                target.add_child(new_node)
                self.name2node[fullname] = new_node
                target = new_node

        return tree
Пример #34
0
 def __init__(self, newick=None, format=0, dist=None, support=None, name=None):
     """	Default init for the TreeClass. This works better than wrapping the entire class"""
     TreeNode.__init__(
         self, newick=newick, format=format, dist=dist, support=support, name=name)