Python Newick.Tree示例，Bio.Phylo.Newick.Tree Python示例

示例#1

0

显示文件

def parse(handle):
    """Parse the trees in a Nexus file.

    Uses the old Nexus.Trees parser to extract the trees, converts them back to
    plain Newick trees, and feeds those strings through the new Newick parser.
    This way we don't have to modify the Nexus module yet. (Perhaps we'll
    eventually change Nexus to use the new NewickIO parser directly.)
    """
    nex = Nexus.Nexus(handle)

    # NB: Once Nexus.Trees is modified to use Tree.Newick objects, do this:
    # return iter(nex.trees)
    # Until then, convert the Nexus.Trees.Tree object hierarchy:
    def node2clade(nxtree, node):
        subclades = [node2clade(nxtree, nxtree.node(n)) for n in node.succ]
        return Newick.Clade(branch_length=node.data.branchlength,
                            name=node.data.taxon,
                            clades=subclades,
                            confidence=node.data.support,
                            comment=node.data.comment)

    for nxtree in nex.trees:
        newroot = node2clade(nxtree, nxtree.node(nxtree.root))
        yield Newick.Tree(root=newroot,
                          rooted=nxtree.rooted,
                          name=nxtree.name,
                          weight=nxtree.weight)

示例#2

0

显示文件

文件： Taxonomy.py 项目： housw/Vahine

    def taxid2tree(self, taxid_list, out_fmt="newick"):
        """ This function take a list of gi as input, will generate a path for
            for each gi, then construct a newick or phyloxml tree based on these
            gi pathes.

            out_fmt = newick / phyloxml ...
        """
        treeFile = StringIO()

        # get pathes for a list of taxid
        path_list =[";".join([str(item) for item in self.get_path(taxid)])
                    for taxid in taxid_list ]

        # read in pathFile, and store node info into nodes
        nodes = {} # data format {"node_name": Clade_object}
        root = None

        # to parese path iterately
        for i, path in enumerate(path_list):
            line = path.strip().split(";")
            if root is None:
                root = line[0]
            else:
                assert root == line[0], "The %d-th line is from a different root"%(i+1)

            # check node iterately, first reverse list, to from leaf to root
            # to make sure every node has a parent node
            leaf2root = line[::-1]

            for j, item in enumerate(leaf2root):
                # find child_node and parent_node, root node's parent is itself
                if j == len(line)-1:
                    child_node = item; parent_node=item
                else:
                    child_node = item; parent_node = leaf2root[j+1]

                if nodes.has_key(child_node):
                    continue
                else:
                    # add this node
                    nodes[child_node] = Newick.Clade(name=child_node)
                    # add its parent info
                    nodes[child_node].parent = parent_node

        for node_name, node_clade in nodes.iteritems():
            # find the root node, its parent is itself
            if node_name == node_clade.parent:
                root_node = node_clade
                print "root node is %s, constructing tree ..."%(str(node_name))
            # if node is not root, then find its parent, and add to its parent's clades
            else:
                parent_node = nodes[node_clade.parent]
                parent_node.clades.append(node_clade)
            del node_clade.parent

        tree = Newick.Tree(root = root_node)
        bp.write(tree, treeFile, out_fmt)
        treeStr = treeFile.getvalue()
        return treeStr

示例#3

0

显示文件

文件： NewickIO.py 项目： jlaliberte10/Python_Primers

 def _parse_tree(self, text):
     """Parses the text representation into an Tree object."""
     # XXX what global info do we have here? Any? Use **kwargs?
     return Newick.Tree(root=self._parse_subtree(text))

示例#4

0

显示文件

文件： NewickIO.py 项目： miaecle/biopython

    def _parse_tree(self, text):
        """Parses the text representation into an Tree object."""
        tokens = re.finditer(tokenizer, text.strip())

        new_clade = self.new_clade
        root_clade = new_clade()

        current_clade = root_clade
        entering_branch_length = False

        lp_count = 0
        rp_count = 0
        for match in tokens:
            token = match.group()

            if token.startswith("'"):
                # quoted label; add characters to clade name
                current_clade.name = token[1:-1]

            elif token.startswith('['):
                # comment
                current_clade.comment = token[1:-1]
                if self.comments_are_confidence:
                    # Try to use this comment as a numeric support value
                    current_clade.confidence = _parse_confidence(current_clade.comment)

            elif token == '(':
                # start a new clade, which is a child of the current clade
                current_clade = new_clade(current_clade)
                entering_branch_length = False
                lp_count += 1

            elif token == ',':
                # if the current clade is the root, then the external parentheses
                # are missing and a new root should be created
                if current_clade is root_clade:
                    root_clade = new_clade()
                    current_clade.parent = root_clade
                # start a new child clade at the same level as the current clade
                parent = self.process_clade(current_clade)
                current_clade = new_clade(parent)
                entering_branch_length = False

            elif token == ')':
                # done adding children for this parent clade
                parent = self.process_clade(current_clade)
                if not parent:
                    raise NewickError('Parenthesis mismatch.')
                current_clade = parent
                entering_branch_length = False
                rp_count += 1

            elif token == ';':
                break

            elif token.startswith(':'):
                # branch length or confidence
                value = float(token[1:])
                if self.values_are_confidence:
                    current_clade.confidence = value
                else:
                    current_clade.branch_length = value

            elif token == '\n':
                pass

            else:
                # unquoted node label
                current_clade.name = token

        if not lp_count == rp_count:
            raise NewickError('Number of open/close parentheses do not match.')

        # if ; token broke out of for loop, there should be no remaining tokens
        try:
            next_token = next(tokens)
            raise NewickError('Text after semicolon in Newick tree: %s'
                              % next_token.group())
        except StopIteration:
            pass

        self.process_clade(current_clade)
        self.process_clade(root_clade)
        return Newick.Tree(root=root_clade, rooted=self.rooted)

示例#5

0

显示文件

文件： NewickIO.py 项目： Pfiver/RNA-Seqlyze

 def _parse_tree(self, text, rooted):
     """Parses the text representation into an Tree object."""
     # XXX Pass **kwargs along from Parser.parse?
     return Newick.Tree(root=self._parse_subtree(text), rooted=self.rooted)

示例#6

0

显示文件

文件： centrifuge_LCA.py 项目： housw/Vahine

    def path2newick(self, path2pathFile, node_fmt="taxid", out_fmt="newick"):
        """ This function take taxonomic path file as input, path should be consist
            of taxonomic id, not scitific name, because some scientific name are the
            same in different rank, but ids are unique.

            node_fmt = taxid / sciName

            out_fmt = newick / phyloxml ...

        """
        path, fileName = os.path.split(path2pathFile)
        basename = os.path.splitext(fileName)[0]
        outFile = os.path.join(path,
                               basename + "2tree_" + node_fmt + "." + out_fmt)

        with open(path2pathFile, "r") as pathFile:

            # read in pathFile, and store node info into nodes
            nodes = {}  # data format {"node_name": Clade_object}
            root = None

            # open file to parese line iterately
            for i, line in enumerate(pathFile):
                line = line.strip()
                if line.endswith(";"):
                    line = line.rstrip(";")
                line = line.strip().split(";")
                if root is None:
                    root = line[1]
                else:
                    assert root == line[
                        1], "The %d-th line is from a different root" % (i + 1)

                # check node iterately, first reverse list, to from leaf to root
                # to make sure every node has a parent node
                leaf2root = line[::-1]

                for j, item in enumerate(leaf2root):
                    # find child_node and parent_node, root node's parent is itself
                    if j == len(line) - 1:
                        child_node = item
                        parent_node = item
                    else:
                        child_node = item
                        parent_node = leaf2root[j + 1]

                    if nodes.has_key(child_node):
                        continue
                    else:
                        # add this node
                        nodes[child_node] = Newick.Clade(name=child_node)
                        # add its parent info
                        nodes[child_node].parent = parent_node

            for node_name, node_clade in nodes.iteritems():
                # find the root node, its parent is itself
                if node_name == node_clade.parent:
                    root_node = node_clade
                    print node_clade
                    print "root node found!! "
                # if node is not root, then find its parent, and add to its parent's clades
                else:
                    parent_node = nodes[node_clade.parent]
                    parent_node.clades.append(node_clade)
                del node_clade.parent

            # transform between output node format
            if node_fmt == "taxid":
                tree = Newick.Tree(root=root_node)
            else:
                assert node_fmt == "sciName", "The node_fmt should be taxid or sciName"
                # convert taxid to sciName
                for node_name, node in nodes.iteritems():
                    node_name = self.get_sciName(node_name)
                    for child in node.clades:
                        if child:
                            child.name = self.get_sciName(child.name)
                root_node.name = self.get_sciName(root_node.name)
                tree = Newick.Tree(root=root_node)

            # write tree to file
            print 'Writing %s tree to %s...' % (out_fmt, outFile)

            bp.write(tree, outFile, out_fmt)