Exemplo n.º 1
0
 def test_Dialign_simple(self):
     """Simple round-trip through app with infile."""
     # Test using keyword arguments:
     cmdline = DialignCommandline(dialign_exe, input=self.infile1)
     self.assertEqual(str(cmdline), dialign_exe + " Fasta/f002")
     stdout, stderr = cmdline()
     self.assertEqual(stderr, "")
     self.assertEqual(stdout, "")
     self.assertTrue(os.path.exists(self.outfile1))
Exemplo n.º 2
0
 def test_Dialign_simple_with_MSF_output(self):
     """Simple round-trip through app with infile, output MSF."""
     cmdline = DialignCommandline(dialign_exe)
     # Test with properties
     cmdline.input = self.infile1
     cmdline.msf = True
     self.assertEqual(str(cmdline), dialign_exe + " -msf Fasta/f002")
     stdout, stderr = cmdline()
     self.assertEqual(stderr, "")
     self.assertEqual(stdout, "")
     self.assertTrue(os.path.exists(self.outfile1))
     self.assertTrue(os.path.exists(self.outfile2))
Exemplo n.º 3
0
 def test_Dialign_simple_with_options(self):
     """Simple round-trip through app with infile and options."""
     cmdline = DialignCommandline(dialign_exe)
     cmdline.set_parameter("input", self.infile1)
     cmdline.set_parameter("-max_link", True)
     cmdline.set_parameter("stars", 4)
     self.assertEqual(str(cmdline), dialign_exe +
                      " -max_link -stars 4 Fasta/f002")
     stdout, stderr = cmdline()
     self.assertEqual(stderr, "")
     self.assertEqual(stdout, "")
     self.assertTrue(os.path.exists(self.outfile1))
Exemplo n.º 4
0
 def test_Dialign_simple(self):
     """Simple round-trip through app with infile.
     """
     #Test using keyword arguments:
     cmdline = DialignCommandline(dialign_exe, input=self.infile1)
     self.assertEqual(str(cmdline), dialign_exe + " Fasta/f002")
     result, stdout, stderr = Application.generic_run(cmdline)
     #If there is a problem, the output can be very helpful to see,
     #so check this before looking at the return code:
     self.assertEqual(stderr.read(), "")
     self.assertEqual(stdout.read(), "")
     self.assertEqual(result.return_code, 0)
     self.assert_(os.path.exists(self.outfile1))
     self.assertEqual(str(result._cl), str(cmdline))
Exemplo n.º 5
0
 def test_Dialign_simple_with_MSF_output(self):
     """Simple round-trip through app with infile, output MSF
     """
     cmdline = DialignCommandline(dialign_exe)
     #Test with properties
     cmdline.input = self.infile1
     cmdline.msf = True
     self.assertEqual(str(cmdline), dialign_exe + " -msf Fasta/f002")
     result, stdout, stderr = Application.generic_run(cmdline)
     self.assertEqual(stdout.read(), "")
     self.assertEqual(stderr.read(), "")
     self.assertEqual(result.return_code, 0)
     self.assert_(os.path.exists(self.outfile1))
     self.assert_(os.path.exists(self.outfile2))
     self.assertEqual(str(result._cl), str(cmdline))
Exemplo n.º 6
0
 def test_Dialign_simple_with_options(self):
     """Simple round-trip through app with infile and options
     """
     cmdline = DialignCommandline(dialign_exe)
     cmdline.set_parameter("input", self.infile1)
     cmdline.set_parameter("-max_link", True)
     cmdline.set_parameter("stars", 4)
     self.assertEqual(str(cmdline), dialign_exe + \
                      " -max_link -stars 4 Fasta/f002")
     result, stdout, stderr = Application.generic_run(cmdline)
     self.assertEqual(stderr.read(), "")
     self.assertEqual(stdout.read(), "")
     self.assertEqual(result.return_code, 0)
     self.assert_(os.path.exists(self.outfile1))
     self.assertEqual(str(result._cl), str(cmdline))
Exemplo n.º 7
0
 def test_Dialign_complex_command_line(self):
     """Round-trip through app with complex command line."""
     cmdline = DialignCommandline(dialign_exe)
     cmdline.set_parameter("input", self.infile1)
     cmdline.set_parameter("-nt", True)
     cmdline.set_parameter("-thr", 4)
     cmdline.set_parameter("stars", 9)
     cmdline.set_parameter("-ow", True)
     cmdline.set_parameter("mask", True)
     cmdline.set_parameter("-cs", True)
     self.assertEqual(str(cmdline), dialign_exe +
                      " -cs -mask -nt -ow -stars 9 -thr 4 Fasta/f002")
     stdout, stderr = cmdline()
     self.assertEqual(stderr, "")
     self.assertTrue(os.path.exists(self.outfile1))
     self.assertTrue(stdout.startswith(" e_len = 633"))
Exemplo n.º 8
0
 def test_Dialign_complex_command_line(self):
     """Round-trip through app with complex command line."""
     cmdline = DialignCommandline(dialign_exe)
     cmdline.set_parameter("input", self.infile1)
     cmdline.set_parameter("-nt", True)
     cmdline.set_parameter("-thr", 4)
     cmdline.set_parameter("stars", 9)
     cmdline.set_parameter("-ow", True)
     cmdline.set_parameter("mask", True)
     cmdline.set_parameter("-cs", True)
     self.assertEqual(str(cmdline), dialign_exe + \
                      " -cs -mask -nt -ow -stars 9 -thr 4 Fasta/f002")
     result, stdout, stderr = Application.generic_run(cmdline)
     self.assertEqual(stderr.read(), "")
     self.assertEqual(result.return_code, 0)
     self.assert_(os.path.exists(self.outfile1))
     self.assert_(stdout.read().startswith(" e_len = 633"))
     self.assertEqual(str(result._cl), str(cmdline))
from Bio.Align.Applications import DialignCommandline
dialign_cline = DialignCommandline(input="example.fasta",
                                   fn="aligned",
                                   fa=True)

print(dialign_cline)

#dialign2-2 -fa -fn aligned unaligned.fasta
def run_multiple_sequence_alignment(records, workdir, msa):
    """
    This runs the MSA, user can choose between emma, clustalw (old and busted), clustal omega (recommended for proteins and
    also uses HMM), MUSCLE or MAFFT (recommended for nucleotide data, and MUSCLE should be pretty fast), T-Coffee
    (good for distantly related sequences).
    FUTURE: Add more iterative methods to improve runtime? Add HMMER? HHpred is also quite fast
    """
    #get filename for fasta file
    sequence_list_file = os.path.join(workdir, "msa.fasta")
    #write sequences
    SeqIO.write(records, sequence_list_file, "fasta")
    #prepare filenames for MSA output
    outfile = os.path.join(workdir, "msa.aln")
    treefile = os.path.join(workdir, "msa.dnd")
    #Prepare command line according to chosen algorithm
    if msa.lower() == "emma":  #output is fasta
        print "Aligning by emma"
        cmd = EmmaCommandline(sequence=sequence_list_file,
                              outseq=outfile,
                              dendoutfile=treefile)
    elif msa.lower() == "clustalo" or msa.lower(
    ) == "clustal_omega" or msa.lower() == "clustal-omega":
        print "Aligning by Clustal Omega"
        cmd = ClustalOmegaCommandline(infile=sequence_list_file,
                                      outfile=outfile,
                                      verbose=True,
                                      auto=True,
                                      guidetree_out=treefile,
                                      outfmt="clu",
                                      force=True)
    elif msa.lower() == "t-coffee" or msa.lower(
    ) == "t_coffee":  #should output tree file automatically
        print "Aligning by T-Coffeee"
        cmd = TCoffeeCommandline(infile=sequence_list_file,
                                 output="clustalw",
                                 outfile=outfile)
    elif msa.lower() == "muscle":
        print "Aligning by MUSCLE"
        #cmd = MuscleCommandline(input=sequence_list_file, out=outfile, tree2=treefile, clw=True)
        cmd = MuscleCommandline(input=sequence_list_file,
                                out=outfile,
                                tree2=treefile)
    elif msa.lower() == "mafft":  #probably gonna save tree as input.tree
        print "Aligning by MAFFT"
        cmd = MafftCommandline(input=sequence_list_file,
                               clustalout=True,
                               treeout=True)
    elif msa.lower() == "clustalw" or msa.lower() == "clustalw2":
        print "Aligning by ClustalW2"
        cmd = ClustalwCommandline("clustalw",
                                  infile=sequence_list_file,
                                  outfile=outfile,
                                  tree=True,
                                  newtree=treefile)
    elif msa.lower(
    ) == "prank":  #output is fasta, tree will be outputted to .dnd file?
        print "Aligning by PRANK"
        cmd = PrankCommandline(d=sequence_list_file,
                               o=outfile,
                               f=8,
                               showtree=True,
                               noxml=True)
    elif msa.lower() == "msaprobs":  #doesn't use a guide tree
        print "Aligning by MSAprobs"
        cmd = MSAProbsCommandline(infile=sequence_list_file,
                                  outfile=outfile,
                                  clustalw=True)
    elif msa.lower() == "probcons":
        print "Aligning by ProbCons"
        cmd = ProbconsCommandline(input=sequence_list_file, clustalw=True)
    elif msa.lower(
    ) == "dialign":  #phylip tree should be created automatically, names are a mystery?
        print "Aligning by Dialign"
        cmd = DialignCommandline(input=sequence_list_file, cw=True, fn=outfile)
    else:
        raise BaseException(
            "Only Multiple Sequence Alignment algorithms currently supported are emma, clustalo, t_coffee, muscle and mafft"
        )
    #Execute the command
    stdout, stderr = cmd()
    #For algorithms that don't have an option to save ouptut to file, capture the stdout
    if msa.lower() == "mafft" or msa.lower() == "probcons":
        with open(outfile, "w") as handle:
            handle.write(stdout)