def printTreeWrapper(rootNode, newickOutput=False, outputFile=None):
    if newickOutput:
        if outputFile is None:
            print(newick.dumps(rootNode))
        else:
            with open(outputFile, 'a') as out:
                out.write(newick.dumps(rootNode) + "\n")

    else:
        print("Begin printing tree.")
        printTree(rootNode)
        print("End printing tree.")
示例#2
0
 def test_no_lengths_equiv(self):
     for ts in (
             self.all_nodes_samples_example(),
             self.only_internal_samples_example(),
             self.mixed_node_samples_example(),
     ):
         for t in ts.trees():
             newick_nolengths = t.newick(include_branch_lengths=False)
             newick_nolengths = newick.loads(newick_nolengths)[0]
             newick_lengths = t.newick()
             newick_lengths = newick.loads(newick_lengths)[0]
             for node in newick_lengths.walk():
                 node.length = None
             assert newick.dumps(newick_nolengths) == newick.dumps(
                 newick_lengths)
示例#3
0
文件: lib.py 项目: dvolk/arboreta
def rescale_newick(trees_str):
    import math

    trees = newick.loads(trees_str)

    lmin = float_info.max
    lmax = -float_info.max

    for tree in trees:
        for n in tree.walk():
            if n.length > lmax:
                lmax = n.length
            if n.length < lmin and not n.length == 0:
                lmin = n.length

    factor = 1 / lmin
    for tree in trees:
        for n in tree.walk():
            n.length = n.length * 4411532
            if n.length < 0.1:
                n.length = 0
            elif n.length <= 1:
                pass
            else:
                n.length = math.sqrt(n.length)

    return newick.dumps(trees)
示例#4
0
def main():
    """Just tests."""
    #tree=newick.loads("((Dmel_CG7377:5.71073e-07,Dsim_Dsim\GD12794:0.426781)n1:0.0026795,(Dmel_CG33268:0.022453,(Dsim_Dsim\GD14314:0.015169,Dsec_Dsec\GM25283:0.029888)n3:0.079816)n2:0.0026795)n0;")
    #tree=newick.loads("(((((((((((((((((((((((((A,B)N1,C)N2,D)N3,E)N4,F)N5,G)N6,H)N7,I)N8,J)N9,K)N10,L)N11,M)N12,N)N13,O)N14,P)N15,Q)N16,R)N17,S)N18,T)N19,U)N20,V)N21,W)N22,X)N23,Y)N24,Z)N25;")

    #Example tree
    species_tree = read_tree(
        "../proteomes_repeats_removed/OrthoFinder/Results_Jan20/Species_Tree/SpeciesTree_rooted_node_labels.txt"
    )
    sp_nnodes = calculate_nnodes(species_tree)
    species_tree_nodes = get_all_nodes(species_tree, sp_nnodes)

    tree = read_tree(
        "../proteomes_repeats_removed/OrthoFinder/Results_Jan20/Resolved_Gene_Trees/OG0000028_tree.txt"
    )
    print(tree.ascii_art())
    #tree = read_tree("../proteomes_repeats_removed/OrthoFinder/Results_Jan20/Resolved_Gene_Trees/OG0012151_tree.txt")

    nnodes = calculate_nnodes(tree)
    nodes = get_all_nodes(tree, nnodes, [], [])
    backup = newick.dumps(tree)
    for node in nodes:
        tree = newick.loads(backup)
        if is_duplication(node):
            #print(node)
            # do the old checky
            sides = node.descendants
            for i in range(len(sides)):
                if is_species_like(sides[i], species_tree_nodes):
                    #print(sides[i].ascii_art())
                    #print("blammo")
                    if node_with_deletion(sides[i], sides[(i + 1) % 2]):
                        print("it's good")
                    break
示例#5
0
def reroot_tree(tree, outgroup):
    """Reroot the tree so that the outgroup is the outgroup.

    Arguments:
    tree - a tree in python newick format
    outgroup - a list of taxa that ought to make up the entirety of the
        outgroup

    Returns:
    rooted_tree - a tree with the correct outgroup in a format.

    Requires:
    Dendropy
    """
    newick_string = newick.dumps(tree)
    tree = ete3.Tree(newick_string, format=1)
    print(tree)
    try:
        if len(outgroup) == 1:
            tree.set_outgroup(outgroup[0])
        else:
            mrca = tree.get_common_ancestor(outgroup)
            tree.set_outgroup(mrca)
    except:
        taxa = []
        for leaf in tree:
            taxa.append(leaf.name)
        outgroup = list(set(taxa) - set(outgroup))
        #print(outgroup)
        if len(outgroup) == 1:
            tree.set_outgroup(outgroup[0])
        else:
            mrca = tree.get_common_ancestor(outgroup)
            tree.set_outgroup(mrca)
    return tree
def postorder_create(node, prefix, hal):
    if node.name is None:
        raise RuntimeError("Requires a tree with all ancestors labeled.")
    sys.stderr.write("working on node %r\n" % (node.name))
    c2h = prefix + '-' + node.name + '.c2h'
    hal_fa = prefix + '-' + node.name + '.hal.fa'
    # get outgroup list (everything in c2h except children / anc)
    outgroups = []
    for species_line in check_output("grep -E '^s' %s | cut -f 2 | uniq" % c2h,
                                     shell=True).splitlines():
        # strip ' marks on either side
        species = species_line[1:-1]
        if species != node.name and species not in [
                n.name for n in node.descendants
        ]:
            outgroups.append(species)
    # get local newick string
    subtree = deepcopy(node)
    for child in subtree.descendants:
        child.descendants = []
    newick = dumps(subtree)
    # actually perform the addition
    cmd = ['halAppendCactusSubtree', c2h, hal_fa, newick, hal]
    if len(outgroups) > 0:
        cmd.extend(['--outgroups', ",".join(outgroups)])
    sys.stderr.write('Running command %r\n' % cmd)
    check_call(cmd)
    # recurse
    for child in node.descendants:
        if len(child.descendants) == 0:
            # Leaf
            continue
        postorder_create(child, prefix, hal)
示例#7
0
def test_dumps(*trees):
    for ex in [
        '(,,(,));',
        '(A,B,(C,D));',
        '(A,B,(C,D)E)F;',
        '(:0.1,:0.2,(:0.3,:0.4):0.5);',
        '((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;',
    ]:
        assert ex == dumps(loads(ex)[0])
示例#8
0
 def test_dumps(self, *trees):
     for ex in [
             '(,,(,));',
             '(A,B,(C,D));',
             '(A,B,(C,D)E)F;',
             '(:0.1,:0.2,(:0.3,:0.4):0.5);',
             '((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;',
     ]:
         self.assertEqual(ex, dumps(loads(ex)[0]))
示例#9
0
def test_dumps(*trees):
    for ex in [
            '(,,(,));',
            '(A,B,(C,D));',
            '(A,B,(C,D)E)F;',
            '(:0.1,:0.2,(:0.3,:0.4):0.5);',
            '((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;',
    ]:
        assert ex == dumps(loads(ex)[0])
示例#10
0
 def test_dumps(self, *trees):
     for ex in [
         '(,,(,));',
         '(A,B,(C,D));',
         '(A,B,(C,D)E)F;',
         '(:0.1,:0.2,(:0.3,:0.4):0.5);',
         '((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;',
     ]:
         self.assertEqual(ex, dumps(loads(ex)[0]))
示例#11
0
    def sanitise_tree(self, tree, tree_type):
        """
        Makes any changes to a user-provided tree required to make
        it suitable for passing to BEAST.

        In particular, this method checks that the supplied string or the
        contents of the supplied file:
            * seems to be a valid Newick tree
            * contains no duplicate taxa
            * has taxa which are a superset of the languages in the analysis
            * has no polytomies or unifurcations.
        """
        # Make sure tree can be parsed
        try:
            tree = newick.loads(tree)[0]
        except:
            raise ValueError("Could not parse %s tree.  Is it valid Newick?" %
                             tree_type)
        # Make sure starting tree contains no duplicate taxa
        tree_langs = tree.get_leaf_names()
        if not len(set(tree_langs)) == len(tree_langs):
            dupes = set([l for l in tree_langs if tree_langs.count(l) > 1])
            dupestring = ",".join(
                ["%s (%d)" % (d, tree_langs.count(d)) for d in dupes])
            raise ValueError("%s tree contains duplicate taxa: %s" %
                             (tree_type.capitalize(), dupestring))
        tree_langs = set(tree_langs)
        # Make sure languges in tree is a superset of languages in the analysis
        if not tree_langs.issuperset(self.languages):
            missing_langs = set(self.languages).difference(tree_langs)
            miss_string = ",".join(missing_langs)
            raise ValueError(
                "Some languages in the data are not in the %s tree: %s" %
                (tree_type, miss_string))
        # If the trees' language set is a proper superset, prune the tree to fit the analysis
        if not tree_langs == self.languages:
            tree.prune_by_names(self.languages, inverse=True)
            self.messages.append(
                "[INFO] %s tree includes languages not present in any data set and will be pruned."
                % tree_type.capitalize())
        # Get the tree looking nice
        tree.remove_redundant_nodes()
        if tree_type == "starting":
            tree.resolve_polytomies()
        # Remove lengths for a monophyly tree
        if tree_type == "monophyly":
            for n in tree.walk():
                n._length = None
        # Checks
        if tree_type == "starting":
            assert all([len(n.descendants) in (0, 2) for n in tree.walk()])
        assert len(tree.get_leaves()) == len(self.languages)
        assert all([l.name for l in tree.get_leaves()])
        # Done
        return newick.dumps(tree)
示例#12
0
文件: lib.py 项目: oxfordmmm/arboreta
def relabel_newick(trees_str):
    '''
    Relabel newick tree from guid to eartag
    '''
    trees = newick.loads(trees_str)
    eartags = []

    for tree in trees:
        for node in tree.walk():
            if node.name:
                node.name, eartags = get_eartag(node.name, eartags)

    return newick.dumps(trees)
示例#13
0
    def newick_tree_species(self):
        """
        Returns a Newick tree with the species present in the current clade.

        :return: Newick tree (string) with species for the current clade
        """
        species = {s.code: s.name for s in Species.query.all()}

        tree = newick.loads(self.newick_tree)[0]

        for code, name in species.items():
            node = tree.get_node(code)
            if node is not None:
                node.name = name

        return newick.dumps([tree])
示例#14
0
    def _convert_to_phyloxml(self,
                             seq_id_to_seq_name: Dict[SequenceID,
                                                      str] = None) -> str:
        if not self.nodes:
            return None

        newick_str = self._convert_to_newick(seq_id_to_seq_name)
        tree = Phylo.read(StringIO(newick_str), 'newick')
        Phylo.write(tree, 'drzewko.xml', 'phyloxml')
        tree_xml = Phylo.PhyloXMLIO.read("drzewko.xml")
        sorted_nodes = sorted(self.nodes, key=lambda x: x.consensus_id)

        nodes_to_process = [(None, sorted_nodes[0])]
        newick_tree = None
        while nodes_to_process:
            n = nodes_to_process.pop()
            node_parent_label = n[0]
            node = n[1]

            if seq_id_to_seq_name:
                label = seq_id_to_seq_name[node.sequences_ids[0]] if len(
                    node.sequences_ids
                ) == 1 else f"Consenses {node.consensus_id}"
            else:
                label = node.sequences_ids[0].value if len(
                    node.sequences_ids
                ) == 1 else f"Consensus {node.consensus_id}"
            if node.parent_node_id is None:
                length = "1"
            else:
                parent_minComp = sorted_nodes[
                    node.parent_node_id].mincomp.root_value().value
                length = str((1 - parent_minComp) -
                             (1 - node.mincomp.root_value().value))

            newick_node = Node(name=label, length=length)

            if newick_tree is None:
                newick_tree = newick_node
            else:
                parent_node = newick_tree.get_node(node_parent_label)
                parent_node.add_descendant(newick_node)

            for child in node.children_nodes_ids:
                nodes_to_process.append((label, sorted_nodes[child]))

        return dumps(newick_tree)
def main():
    """Write all gene trees appropriate to a file."""
    species_treefile, orthogroups_file, gene_tree_dir, outfile, extention = get_args()


    #species tree
    species_tree = tm.read_tree(species_treefile)
    sp_nnodes = tm.calculate_nnodes(species_tree)
    species_tree_nodes = tm.get_all_nodes(species_tree, sp_nnodes)
    
    #Gene trees
    #This is the old filterred list
    #candidates = mod.get_file_data("candidates")
    lines = mod.get_file_data(orthogroups_file)
    candidates = []
    for line in lines[1:]:
        candidates.append(line.split("\t")[0])
    
    out = open(outfile, "w")
    for family_name in candidates:
        sys.stderr.write(family_name + "\n")
        #print(tree_dir + "/" + family_name + extention)
        try:
            tree = tm.read_tree(gene_tree_dir + "/" + family_name + extention)
        except:
            sys.stderr.write(family_name + "not in candidates\n")
            continue
 
        nnodes = tm.calculate_nnodes(tree)
        sys.stderr.write("nodes = " + str(nnodes) + "\n")
        nodes = tm.get_all_nodes(tree, nnodes, [], [])
        backup = newick.dumps(tree)
        for node in nodes:
            sys.stderr.write("node = " + str(node) + "\n")
            tree = newick.loads(backup)
            if tm.is_duplication(node):
                sys.stderr.write("is duplication\n")
                # do the old checky
                sides = node.descendants
                for i in range(len(sides)):
                    if tm.is_species_like(sides[i], species_tree_nodes):
                        #print("blammo")
                        if tm.node_with_deletion(sides[i], sides[(i + 1) % 2]):
                            out.write(family_name + "\t" + node.name + "\n")
                            sys.stderr.write("good node!\n\n")
                        break
示例#16
0
def __replace_ids(tree_string, conversion_table):
    """
    Replaces identifiers in a newick string with those defined in the conversion table

    :param tree_string: tree in newick format
    :param conversion_table: dict with name conversion
    :return: parsed tree, in newick format
    """
    tree = newick.loads(tree_string.strip(), strip_comments=True)[0]

    # Remove internal names, and need to be replaced with proper reconciliation.
    tree.remove_internal_names()

    for leaf in tree.get_leaves():
        if leaf.name in conversion_table.keys():
            leaf.name = conversion_table[leaf.name]

    return newick.dumps([tree])
示例#17
0
文件: lib.py 项目: oxfordmmm/arboreta
def rescale_newick(trees_str):
    trees = newick.loads(trees_str)

    lmin = float_info.max
    lmax = -float_info.max

    for tree in trees:
        for n in tree.walk():
            if n.length > lmax:
                lmax = n.length
            if n.length < lmin and not n.length == 0:
                lmin = n.length

    factor = 1 / lmin
    for tree in trees:
        for n in tree.walk():
            n.length = int(n.length * factor)

    return newick.dumps(trees)
示例#18
0
    def handle_starting_tree(self):
        """
        Makes any changes to the user-provided starting tree required to make
        it suitable for passing to BEAST.

        In particular, this method checks that the supplied string or the
        contents of the supplied file:
            * seems to be a valid Newick tree
            * contains no duplicate taxa
            * has taxa which are a superset of the languages in the analysis
            * has no polytomies or unifurcations.
        """
        if os.path.exists(self.starting_tree):
            with io.open(self.starting_tree, encoding="UTF-8") as fp:
                self.starting_tree = fp.read().strip()
        if self.starting_tree:
            # Make sure starting tree can be parsed
            try:
                tree = newick.loads(self.starting_tree)[0]
            except:
                raise ValueError("Could not parse starting tree.  Is it valid Newick?")
            # Make sure starting tree contains no duplicate taxa
            tree_langs = [n.name for n in tree.walk() if n.is_leaf]
            if not len(set(tree_langs)) == len(tree_langs):
                dupes = [l for l in tree_langs if tree_langs.count(l) > 1]
                dupestring = ",".join(["%s (%d)" % (d, tree_langs.count(d)) for d in dupes])
                raise ValueError("Starting tree contains duplicate taxa: %s" % dupestring)
            tree_langs = set(tree_langs)
            # Make sure languges in tree is a superset of languages in the analysis
            if not tree_langs.issuperset(self.languages):
                missing_langs = set(self.languages).difference(tree_langs)
                miss_string = ",".join(missing_langs)
                raise ValueError("Some languages in the data are not in the starting tree: %s" % miss_string)
            # If the trees' language set is a proper superset, prune the tree to fit the analysis
            if not tree_langs == self.languages:
                tree.prune_by_names(self.languages, inverse=True)
                self.messages.append("[INFO] Starting tree includes languages not present in any data set and will be pruned.")
            # Get the tree looking nice
            tree.remove_redundant_nodes()
            tree.resolve_polytomies()
            # Replace the starting_tree from the config with the new one
            self.starting_tree = newick.dumps(tree)
示例#19
0
def modify(file_name: str, tree: PhyTree):
    finished = False
    while not finished:
        print("Tree has following groups:")
        print(tree.get_nodes())
        print("Add leaf:")
        node = input()
        if re.match(r"[{} ]", node):
            print("Leaf name can not include {, }, and space")
            continue
        print(f"Add leaf '{node}' to group:")
        group = input()
        if not re.match(r'[{}]', group):
            print("Target group has to be enclosed by {} brackets")
            continue
        current_structure = dumps(tree.get_newick())
        try:
            tree.add_to_group(node, group)
        except ValueError as e:
            print(e)
            tree.parse_string(current_structure)
        finished = ask_if_finished()
    tree.save(file_name)
示例#20
0
def main():
    """
    Write appropriate trees to a file.
    
    For each tree:
    get the nodes of the tree
    look at the duplication nodes
    for each side:
    look at one side to see if it is species-like
    if it is, check if the other one is node with deletion
    if success, add to the file or whatever.
    """

    treedir, species_tree, out_file = get_args()
    species_tree = tm.read_tree(species_tree)
    sp_tree_nodes = tm.get_all_nodes(species_tree,
                                     tm.calculate_nnodes(species_tree), [], [])
    treefiles = glob.glob(treedir + "/*rooted")
    good_trees = []
    #    for i in range(len(treefiles)):
    for i in [16]:
        tree = tm.read_tree(treefiles[i])
        nodes = tm.get_all_nodes(tree, tm.calculate_nnodes(tree), [], [])
        backup = newick.dumps(tree)
        for node in tree.descendants:
            tree = newick.loads(backup)
            if tm.is_duplication(node):
                for j in range(2):
                    if tm.is_species_like(node.descendants[j], sp_tree_nodes):
                        print("species_like_side")
                        if tm.node_with_deletion(node.descendants[j],
                                                 node.descendants[(j + 1) %
                                                                  2]):
                            good_trees.append(treefiles[i])
    out = open(out_file, "w")
    out.write("\n".join(good_trees))
    out.close()
示例#21
0
文件: lib.py 项目: dvolk/arboreta
def relabel_newick(trees_str):
    print(trees_str)
    '''
    Relabel newick tree from guid to eartag
    '''
    trees = newick.loads(trees_str)
    eartags = []

    global guid_sample_name_map
    if not guid_sample_name_map:
        guid_sample_name_map = requests.get(
            'http://127.0.0.1:5007/api/all_guid_sample_names').json()

    sample_name_eartag_map = requests.get(
        'http://127.0.0.1:5007/api/all_sample_names_eartags').json()

    for tree in trees:
        for node in tree.walk():
            if node.name:
                node.name, eartags = get_eartag(node.name, eartags,
                                                sample_name_eartag_map,
                                                guid_sample_name_map)

    return newick.dumps(trees)
示例#22
0
    def tree_stripped(self):
        tree = newick.loads(self.data_newick)[0]
        tree.remove_lengths()

        return newick.dumps([tree])
示例#23
0
    def reconcile_trees(self):
        print("\n1.====================Getting into function reconcile_trees")
        # Fetch required data from the database
        sequences = Sequence.query.all()
        #print("\n1.1.=============================Sequences Joined: " + ', '.join(sequences)) #FAILS, bad print statement for list obj
        clades = Clade.query.all()
        #print("\n1.2. =========================Clades: ", *clades, sep='\n') # print works

        seq_to_species = {s.name: s.species.code for s in sequences}
        #print("\n2.=========================seq_to_species: ", *seq_to_species, sep='::')
        seq_to_id = {s.name: s.id for s in sequences}
        clade_to_species = {c.name: json.loads(c.species) for c in clades}
        clade_to_id = {c.name: c.id for c in clades}

        new_associations = []

        phyloxml_data = {}

        for t in self.trees:
            # Load tree from Newick string and start reconciliating
            tree = newick.loads(t.data_newick)[0]
            print("\n3.=========================tree loaded ok")

            for node in tree.walk():
                if len(node.descendants) != 2:
                    #print("\n4.==========length of node descendant=" + str(len(node.descendants)))
                    if not node.is_binary:
                        print("\n5.================Non-Binary-node: " +
                              str(node.is_binary))
                        # Print warning in case there is a non-binary node
                        #sdash: commenting out this original print statement because none binary-node doesn't have id nor label. Process stops at this print statement for non-binary trees.

                        print(
                            "Non-Binary tree: " + t.data_newick
                        )  #sdash: this print statement will show which tree is non-binary and is skipped. Doesn't stop the reconcile process.
                        #sdash May-03-2019#original#
                        #print("[%d, %s] Skipping node... Can only reconcile binary nodes ..." % (tree.id, tree.label))
                    # Otherwise it is a leaf node and can be skipped
                    continue

                branch_one_seq = [
                    l.name.strip() for l in node.descendants[0].get_leaves()
                ]
                # print("\n6.===============Branch-one-seq: " + ', '.join(branch_one_seq))
                branch_two_seq = [
                    l.name.strip() for l in node.descendants[1].get_leaves()
                ]
                # print("\n7.===============Branch-two-seq: " + ', '.join(branch_two_seq))

                branch_one_species = set([
                    seq_to_species[s] for s in branch_one_seq
                    if s in seq_to_species.keys()
                ])
                print(
                    "\n8.===============Branch-one-spp: " +
                    ', '.join(branch_one_species)
                )  #Empty set, length=0; seq_to_species length=143271; SO, problem in forming this set definition
                ## TO DO:
                #Possibly the seq name seq_to_species doesn't match in branch_one_seq and
                #  hence, it is an empty set.  Next check this possibility. Tue June 25.

                branch_two_species = set([
                    seq_to_species[s] for s in branch_two_seq
                    if s in seq_to_species.keys()
                ])
                print("\n9.===============Branch-two-spp: " +
                      ', '.join(branch_two_species))

                all_species = branch_one_species.union(branch_two_species)

                clade, _ = phylo.get_clade(all_species, clade_to_species)
                duplication = phylo.is_duplication(branch_one_species,
                                                   branch_two_species,
                                                   clade_to_species)

                duplication_consistency = None
                if duplication:
                    duplication_consistency = phylo.duplication_consistency(
                        branch_one_species, branch_two_species)

                tags = [
                    clade_to_id[clade] if clade is not None else 0,
                    'D' if duplication else 'S',
                    duplication_consistency if duplication else 0
                ]

                node.name = '_'.join([str(t) for t in tags])

                if clade is not None:
                    for seq_one in branch_one_seq:
                        for seq_two in branch_two_seq:
                            new_associations.append({
                                'sequence_one_id':
                                seq_to_id[seq_one],
                                'sequence_two_id':
                                seq_to_id[seq_two],
                                'tree_id':
                                t.id,
                                'clade_id':
                                clade_to_id[clade],
                                'duplication':
                                1 if duplication else 0,
                                'duplication_consistency_score':
                                duplication_consistency
                            })
                            new_associations.append({
                                'sequence_one_id':
                                seq_to_id[seq_two],
                                'sequence_two_id':
                                seq_to_id[seq_one],
                                'tree_id':
                                t.id,
                                'clade_id':
                                clade_to_id[clade],
                                'duplication':
                                1 if duplication else 0,
                                'duplication_consistency_score':
                                duplication_consistency
                            })

            if len(new_associations) > 400:
                db.engine.execute(
                    SequenceSequenceCladeAssociation.__table__.insert(),
                    new_associations)
                new_associations = []

            # add newick tree to memory
            phyloxml_data[t.id] = newick.dumps([tree])

        db.engine.execute(SequenceSequenceCladeAssociation.__table__.insert(),
                          new_associations)

        # Update PhyloXML data file for all trees
        for t in self.trees:
            if t.id in phyloxml_data.keys():
                t.data_phyloxml = phyloxml_data[t.id]

        db.session.commit()
示例#24
0
def test_all_removal():
    tree = loads('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;')[0]
    tree.remove_names()
    tree.remove_lengths()
    topology_only = dumps(tree)
    assert topology_only == '((,(,)));'
示例#25
0
def test_length_removal():
    tree = loads('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;')[0]
    tree.remove_lengths()
    assert dumps(tree) == '((B,(C,D)E)F)A;'
示例#26
0
def test_leaf_name_removal():
    tree = loads('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;')[0]
    tree.remove_leaf_names()
    assert dumps(tree) == '((:0.2,(:0.3,:0.4)E:0.5)F:0.1)A;'
示例#27
0
def test_length_removal():
    tree = loads('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;')[0]
    tree.remove_lengths()
    assert dumps(tree) == '((B,(C,D)E)F)A;'
示例#28
0
 def test_leaf_name_removal(self):
     tree = loads('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;')[0]
     tree.remove_leaf_names()
     nameless = dumps(tree)
     self.assertEqual(nameless, '((:0.2,(:0.3,:0.4)E:0.5)F:0.1)A;')
示例#29
0
def test_internal_name_removal():
    tree = loads('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;')[0]
    tree.remove_internal_names()
    assert dumps(tree) == '((B:0.2,(C:0.3,D:0.4):0.5):0.1);'
示例#30
0
 def test_length_removal(self):
     tree = loads('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;')[0]
     tree.remove_lengths()
     nameless = dumps(tree)
     self.assertEqual(nameless, '((B,(C,D)E)F)A;')
示例#31
0
 def test_leaf_name_removal(self):
     tree = loads('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;')[0]
     tree.remove_leaf_names()
     nameless = dumps(tree)
     self.assertEqual(nameless, '((:0.2,(:0.3,:0.4)E:0.5)F:0.1)A;')
示例#32
0
    def reconcile_trees(self):
        # Fetch required data from the database
        sequences = Sequence.query.all()
        clades = Clade.query.all()

        seq_to_species = {s.name: s.species.code for s in sequences}
        seq_to_id = {s.name: s.id for s in sequences}
        clade_to_species = {c.name: json.loads(c.species) for c in clades}
        clade_to_id = {c.name: c.id for c in clades}

        new_associations = []

        phyloxml_data = {}

        for t in self.trees:
            # Load tree from Newick string and start reconciliating
            tree = newick.loads(t.data_newick)[0]

            for node in tree.walk():
                if len(node.descendants) != 2:
                    if not node.is_binary:
                        # Print warning in case there is a non-binary node
                        print(
                            "[%d, %s] Skipping node... Can only reconcile binary nodes ..."
                            % (tree.id, tree.label))
                    # Otherwise it is a leaf node and can be skipped
                    continue

                branch_one_seq = [
                    l.name.strip() for l in node.descendants[0].get_leaves()
                ]
                branch_two_seq = [
                    l.name.strip() for l in node.descendants[1].get_leaves()
                ]

                branch_one_species = set([
                    seq_to_species[s] for s in branch_one_seq
                    if s in seq_to_species.keys()
                ])
                branch_two_species = set([
                    seq_to_species[s] for s in branch_two_seq
                    if s in seq_to_species.keys()
                ])

                all_species = branch_one_species.union(branch_two_species)

                clade, _ = phylo.get_clade(all_species, clade_to_species)
                duplication = phylo.is_duplication(branch_one_species,
                                                   branch_two_species,
                                                   clade_to_species)

                duplication_consistency = None
                if duplication:
                    duplication_consistency = phylo.duplication_consistency(
                        branch_one_species, branch_two_species)

                tags = [
                    clade_to_id[clade] if clade is not None else 0,
                    'D' if duplication else 'S',
                    duplication_consistency if duplication else 0
                ]

                node.name = '_'.join([str(t) for t in tags])

                if clade is not None:
                    for seq_one in branch_one_seq:
                        for seq_two in branch_two_seq:
                            new_associations.append({
                                'sequence_one_id':
                                seq_to_id[seq_one],
                                'sequence_two_id':
                                seq_to_id[seq_two],
                                'tree_id':
                                t.id,
                                'clade_id':
                                clade_to_id[clade],
                                'duplication':
                                1 if duplication else 0,
                                'duplication_consistency_score':
                                duplication_consistency
                            })
                            new_associations.append({
                                'sequence_one_id':
                                seq_to_id[seq_two],
                                'sequence_two_id':
                                seq_to_id[seq_one],
                                'tree_id':
                                t.id,
                                'clade_id':
                                clade_to_id[clade],
                                'duplication':
                                1 if duplication else 0,
                                'duplication_consistency_score':
                                duplication_consistency
                            })

            if len(new_associations) > 400:
                db.engine.execute(
                    SequenceSequenceCladeAssociation.__table__.insert(),
                    new_associations)
                new_associations = []

            # add newick tree to memory
            phyloxml_data[t.id] = newick.dumps([tree])

        db.engine.execute(SequenceSequenceCladeAssociation.__table__.insert(),
                          new_associations)

        # Update PhyloXML data file for all trees
        for t in self.trees:
            if t.id in phyloxml_data.keys():
                t.data_phyloxml = phyloxml_data[t.id]

        db.session.commit()
示例#33
0
def test_leaf_name_removal():
    tree = loads('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;')[0]
    tree.remove_leaf_names()
    assert dumps(tree) == '((:0.2,(:0.3,:0.4)E:0.5)F:0.1)A;'
示例#34
0
def add_trees_general():
    form = AddGeneralTreesForm(request.form)

    if request.method == 'POST':
        new_method = TreeMethod()

        new_method.description = request.form.get('description')
        new_method.gene_family_method_id = request.form.get(
            'gene_family_method_id')

        db.session.add(new_method)

        try:
            # Commit to DB remainder
            db.session.commit()
        except Exception as _:
            db.session.rollback()
            flash('Failed to add TreeMethod to the DB!', 'danger')
            return redirect(url_for('admin.index'))

        # Get original gene family names (used to link trees to families)
        gfs = GeneFamily.query.filter(
            GeneFamily.method_id == new_method.gene_family_method_id).all()
        name_to_id = {gf.name: gf.id for gf in gfs}
        tree_data = request.files[form.general_tree_archive.name].read()

        fd, temp_path = mkstemp()

        with open(temp_path, 'wb') as tree_data_writer:
            tree_data_writer.write(tree_data)

        new_trees = []
        with tarfile.open(temp_path, mode='r:gz') as tf:
            for name, entry in zip(tf.getnames(), tf):
                tree_string = str(
                    tf.extractfile(entry).read().decode('utf-8')).replace(
                        '\r', '').replace('\n', '')

                # get the gene families original name from the filename
                current_tree_name = str(name.split('.')[0])
                gf_id = None

                if current_tree_name in name_to_id.keys():
                    gf_id = name_to_id[current_tree_name]
                else:
                    print(
                        '%s: Family %s not found in gene families generated using method %d !'
                        % (name, current_tree_name,
                           new_method.gene_family_method_id))

                tree = newick.loads(tree_string)[0]

                new_trees.append({
                    "gf_id": gf_id,
                    "label": current_tree_name + "_tree",
                    "method_id": new_method.id,
                    "data_newick": tree_string,
                    "data_phyloxml": newick.dumps([tree])
                })

                # add 400 trees at the time, more can cause problems with some database engines
                if len(new_trees) > 400:
                    db.engine.execute(Tree.__table__.insert(), new_trees)
                    new_trees = []

            # add the last set of trees
            db.engine.execute(Tree.__table__.insert(), new_trees)

            flash('Added trees to DB.', 'success')
            return redirect(url_for('admin.index'))
    else:
        if not form.validate():
            flash('Unable to validate data, potentially missing fields',
                  'danger')
            return redirect(url_for('admin.index'))
        else:
            abort(405)
示例#35
0
文件: nj.py 项目: Zargess/AlgBio
        for m in range(shape[0]):
            if m != i and m != j:
                newDist = 0.5 * (disMatrix[i, m] + disMatrix[j, m] -
                                 disMatrix[i, j])
                disMatrix[shape[0], m] = newDist
                disMatrix[m, shape[0]] = newDist
        shape = (shape[0] + 1, shape[1] + 1)

        disMatrix[i, :] = np.nan  #float("inf")
        disMatrix[j, :] = np.nan  #float("inf")
        disMatrix[:, i] = np.nan  #float("inf")
        disMatrix[:, j] = np.nan  #float("inf")

    keys = list(S.keys())
    dim = disMatrix[keys[0], keys[2]]
    dij = disMatrix[keys[0], keys[1]]
    djm = disMatrix[keys[1], keys[2]]
    gamma_v_i = 0.5 * (dij + dim - djm)
    gamma_v_j = 0.5 * (dij + djm - dim)
    gamma_v_m = 0.5 * (dim + djm - dij)

    S[keys[0]].length = gamma_v_i
    S[keys[1]].length = gamma_v_j
    S[keys[2]].length = gamma_v_m

    S["root"] = newick.Node.create(
        name="root", descendants=[S[keys[0]], S[keys[1]], S[keys[2]]])

    rename_recursive(S["root"])
    print(newick.dumps(S["root"]))
示例#36
0
 def test_length_removal(self):
     tree = loads('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;')[0]
     tree.remove_lengths()
     nameless = dumps(tree)
     self.assertEqual(nameless, '((B,(C,D)E)F)A;')
示例#37
0
def test_internal_name_removal():
    tree = loads('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;')[0]
    tree.remove_internal_names()
    assert dumps(tree) == '((B:0.2,(C:0.3,D:0.4):0.5):0.1);'
示例#38
0
def tree_data(req,
              species_query,
              experiment_count=lambda s: s.count_experiments):
    node_data = {}
    ntrees = []
    colormap = collections.Counter()
    colormap2 = collections.Counter()
    count_leafs = species_query.count()
    species = species_query.order_by(
        Species.kingdom, Species.phylum_sortkey, Species.klass_sortkey,
        Species.order_sortkey, Species.family_sortkey, Species.genus_sortkey,
        Species.sortkey).options(
            joinedload(common.Language.valuesets).joinedload(
                common.ValueSet.values))
    coverage = {}
    nodes = []

    ngenus = 0
    for kingdom, items1 in itertools.groupby(species, lambda s: s.kingdom):
        node1 = newick.Node()
        for phylum, items2 in itertools.groupby(items1, lambda s: s.phylum):
            nid = '_'.join((phylum, ))
            nodes.append((nid, 'Phylum', 'classes'))
            if phylum not in coverage:
                coverage[phylum] = {}

            node2 = newick.Node(nid)
            for klass, items3 in itertools.groupby(items2, lambda s: s.klass):
                nid = '_'.join((phylum, klass))
                nodes.append((nid, 'Class', 'orders'))
                if klass not in coverage[phylum]:
                    coverage[phylum][klass] = {}

                node3 = newick.Node(nid)
                for order, items4 in itertools.groupby(items3,
                                                       lambda s: s.order):
                    nid = '_'.join((phylum, klass, order))
                    nodes.append((nid, 'Order', 'families'))
                    if order not in coverage[phylum][klass]:
                        coverage[phylum][klass][order] = {}

                    node4 = newick.Node(nid)
                    for family, items5 in itertools.groupby(
                            items4, lambda s: s.family):
                        nid = '_'.join((phylum, klass, order, family))
                        nodes.append((nid, 'Family', 'genera'))
                        if family not in coverage[phylum][klass][order]:
                            coverage[phylum][klass][order][family] = {}

                        node5 = newick.Node(nid)
                        for genus, items6 in itertools.groupby(
                                items5, lambda s: s.genus):
                            ngenus += 1
                            nid = '_'.join(
                                (phylum, klass, order, family, genus))
                            nodes.append((nid, 'Genus', 'species'))

                            items6 = list(items6)
                            coverage[phylum][klass][order][family][
                                genus] = len(items6)

                            colormap.update([s.family for s in items6])
                            colormap2.update([s.klass for s in items6])
                            node6 = newick.Node.create(
                                name=nid,
                                descendants=[
                                    newick.Node(
                                        '%s{__id__%s}' %
                                        (s.name.replace(' ', '_'), s.id))
                                    for s in items6
                                ])
                            node_data.update({
                                s.id: species_node(s, req, experiment_count(s))
                                for s in items6
                            })
                            node5.add_descendant(node6)
                        node4.add_descendant(node5)
                    node3.add_descendant(node4)
                node2.add_descendant(node3)
            node1.add_descendant(node2)
        ntrees.append(node1)

    node_data.update({
        nid: coverage_data(req, nid, rank, subranks, coverage)
        for nid, rank, subranks in nodes
    })

    res = dict(
        count_leafs=count_leafs,
        newick=newick.dumps(ntrees),
        colormap={
            k[0]: (v, svg.data_url(svg.icon(v.replace('#', 'c'))))
            for k, v in zip(colormap.most_common(),
                            color.qualitative_colors(len(colormap)))
        },
        colormap2={
            k[0]: (v, svg.data_url(svg.icon(v.replace('#', 's'))))
            for k, v in zip(colormap2.most_common(),
                            color.qualitative_colors(len(colormap), set='tol'))
        },
        node_data=node_data)
    res['edgecolors'] = {k: v[0] for k, v in res['colormap2'].items()}
    return res
示例#39
0
def test_all_removal():
    tree = loads('((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;')[0]
    tree.remove_names()
    tree.remove_lengths()
    topology_only = dumps(tree)
    assert topology_only == '((,(,)));'