Пример #1
0
    def check(self, path, length):
        input_records = SeqIO.to_dict(SeqIO.parse(path, "fasta"))
        self.assertEqual(len(input_records), length)
        # Any filesnames with spaces should get escaped with quotes
        #  automatically.
        # Using keyword arguments here.
        cline = _Fasttree.FastTreeCommandline(fasttree_exe, input=path, nt=True)
        self.assertEqual(str(eval(repr(cline))), str(cline))
        out, err = cline()
        self.assertTrue(err.strip().startswith("FastTree"))
        tree = Phylo.read(StringIO(out), "newick")

        def lookup_by_names(tree):
            names = {}
            for clade in tree.find_clades():
                if clade.name:
                    if clade.name in names:
                        raise ValueError("Duplicate key: %s" % clade.name)
                    names[clade.name] = clade
            return names
        names = lookup_by_names(tree)
        self.assertGreater(len(names), 0)

        def terminal_neighbor_dists(self):
            """Return a list of distances between adjacent terminals."""
            def generate_pairs(self):
                pairs = itertools.tee(self)
                next(pairs[1])  # Advance second iterator one step
                return zip(pairs[0], pairs[1])
            return [self.distance(*i) for i in
                    generate_pairs(self.find_clades(terminal=True))]
        for dist in terminal_neighbor_dists(tree):
            self.assertGreater(dist, 0.0)
Пример #2
0
    def phylogeny(self):
        script_fname = sys.argv[0]
        script_path = os.path.abspath(os.path.dirname(script_fname))

        self.phylogenetic_tree = os.path.join(self.dir_path, "temp",
                                              "%s_phylotree" % (self.time))
        cmd = _Fasttree.FastTreeCommandline(
            'fasttree',
            input=os.path.abspath(self.multiple_sequence_alignment_2),
            out=os.path.abspath(self.phylogenetic_tree))

        self.qiime_phylo = os.path.join(self.qiime_out,
                                        "%s_qiime_phylo.phy" % (self.time))
        cmd()
        copyfile(self.phylogenetic_tree, self.qiime_phylo)
Пример #3
0
#Create a temp fasta file with a space in the name
temp_filename_with_spaces = "Clustalw/temp horses.fasta"
handle = open(temp_filename_with_spaces, "w")
SeqIO.write(SeqIO.parse("Phylip/hennigian.phy", "phylip"), handle, "fasta")
handle.close()

for input_file in ["Quality/example.fasta", "Clustalw/temp horses.fasta"]:
    input_records = SeqIO.to_dict(SeqIO.parse(input_file, "fasta"))
    print("")
    print("Calling fasttree on %s (with %i records)" \
          % (repr(input_file), len(input_records)))

    #Any filesnames with spaces should get escaped with quotes automatically.
    #Using keyword arguments here.
    cline = _Fasttree.FastTreeCommandline(fasttree_exe, input=input_file, nt=True)
    assert str(eval(repr(cline)))==str(cline)

    out, err = cline()
    assert err.strip().startswith("FastTree")

    print("")
    print("Checking generation of tree terminals")
    tree = Phylo.read(StringIO(out), 'newick')

    def lookup_by_names(tree):
        names = {}
        for clade in tree.find_clades():
            if clade.name:
                if clade.name in names:
                    raise ValueError("Duplicate key: %s" % clade.name)
Пример #4
0
        protein_tree_output = protein_tree_folder + "/" + cluster + ".tre"
        nucleotide_tree_output = dna_tree_folder + "/" + cluster + ".tre"

        unaligned_DNA.writeToFile(dna_unaligned_folder + "/" + cluster +
                                  ".fna",
                                  format="fasta")
        unaligned_AA.writeToFile(protein_unaligned_folder + "/" + cluster +
                                 ".faa",
                                 format="fasta")
        aligned_DNA.writeToFile(aligned_dna_file, format="fasta")
        aligned_AA.writeToFile(aligned_aa_file, format="fasta")

        #Make protein trees with FastTree
        make_dna_tree = _Fasttree.FastTreeCommandline(
            cmd='FastTree',
            input=aligned_dna_file,
            out=nucleotide_tree_output,
            slow=True,
            nt=True)
        make_aa_tree = _Fasttree.FastTreeCommandline(cmd="FastTree",
                                                     input=aligned_aa_file,
                                                     out=protein_tree_output,
                                                     slow=True)

        dna_out, dna_err = make_dna_tree()
        aa_out, aa_err = make_aa_tree()

        #Make protein trees using FastTree
        #protein_tree = build_tree_fasttree(aligned_AA, PROTEIN, best_tree=True)
        #protein_tree_output.write(protein_tree.getNewick(with_distances=True))
        #protein_tree.writeToFile(protein_tree_output)
        #protein_tree_output.close()
Пример #5
0
    ("Quality/example.fasta", "temp_test.tree"),
    ("Clustalw/temp horses.fasta", "temp_test.tree"),
]:
    input_records = SeqIO.to_dict(SeqIO.parse(input_file, "fasta"),
                                  lambda rec: rec.id)
    if os.path.isfile(output_file):
        os.remove(output_file)
    print
    print "Calling fasttree on %s (with %i records)" \
          % (repr(input_file), len(input_records))
    print "using output file %s" % repr(output_file)

    #Any filesnames with spaces should get escaped with quotes automatically.
    #Using keyword arguments here.
    cline = _Fasttree.FastTreeCommandline(fasttree_exe,
                                          input=input_file,
                                          out=output_file)
    assert str(eval(repr(cline))) == str(cline)

    #print cline
    out, err = cline()
    assert err.strip().startswith("FastTree")

    print
    print "Checking generation of tree terminals"
    tree = Phylo.read(output_file, 'newick')

    def lookup_by_names(tree):
        names = {}
        for clade in tree.find_clades():
            if clade.name: