示例#1
0
def clustal_align_protein(rec_1, rec_2, work_dir):
    """Align the two given proteins with clustalw.
    """
    fasta_file = op.join(work_dir, "prot-start.fasta")
    align_file = op.join(work_dir, "prot.aln")
    SeqIO.write((rec_1, rec_2), file(fasta_file, "w"), "fasta")

    clustal_cl = Clustalw.MultipleAlignCL(fasta_file, command=CLUSTALW_BIN)
    clustal_cl.set_output(align_file, output_order="INPUT")
    clustal_cl.set_type("PROTEIN")
    Clustalw.do_alignment(clustal_cl)
    aln_file = file(clustal_cl.output_file)
    alignment = AlignIO.read(aln_file, "clustal")
    print >>sys.stderr, "\tDoing clustalw alignment: %s" % clustal_cl
    return alignment.format("fasta")
def Align_Results(OutputFileName):
    import os
    
    FileIN_Name = """/users/rwbarrettemac/bioinformatics/pythonfolders/FMDanalysisScript/FMDserotypingARRAY/Consensus_Results/%s.FASTA""" % (OutputFileName)
    FileOUT_ALN = """/users/rwbarrettemac/bioinformatics/pythonfolders/FMDanalysisScript/FMDserotypingARRAY/Consensus_Results/%s.ALN""" % (OutputFileName)
    print FileIN_Name
    print FileOUT_ALN
    
    from Bio.Clustalw import MultipleAlignCL
    from Bio import Clustalw

    cline = MultipleAlignCL(os.path.join(os.curdir, FileIN_Name))
    cline.set_output(FileOUT_ALN)
    
    alignment = Clustalw.do_alignment(cline)

    cline.close()
示例#3
0
    def align(self):
        "Aligns the sequences using CLUSTAL, storing the results"

	if len(self.sequences) == 0:
		return

        self.sequencesToFile( self.tmpFileName )
        commandLine = MultipleAlignCL(os.path.join(os.curdir, self.tmpFileName), self.clustalPath)
        alignment = Clustalw.do_alignment(commandLine) 
        allRecords = alignment.get_all_seqs()
        length = alignment.get_alignment_length()
        
        alignmentStrings = []
        for record in allRecords:
            f = fasta.Record()
            f.title = record.description.strip()
            f.sequence = record.seq.tostring()
            alignmentStrings.append( f )

        self.alignments = alignmentStrings
        self.alignmentLength = length
            
        os.remove(self.tmpFileName)
示例#4
0
# biopython
from Bio.Alphabet import IUPAC
from Bio import Clustalw
from Bio.Clustalw import MultipleAlignCL
from Bio.Align import AlignInfo
from Bio.SubsMat import FreqTable

# create the command line to run clustalw
# this assumes you've got clustalw somewhere on your path, otherwise
# you need to pass a second argument to MultipleAlignCL with the complete
# path to clustalw
cline = MultipleAlignCL(os.path.join(os.curdir, 'opuntia.fasta'))
cline.set_output('test.aln')

# actually perform the alignment and get back an alignment object
alignment = Clustalw.do_alignment(cline)

# get the records in the alignment
all_records = alignment.get_all_seqs()

print 'description:', all_records[0].description
print 'sequence:', all_records[0].seq

# get the length of the alignment
print 'length', alignment.get_alignment_length()

print alignment

# print out interesting information about the alignment
summary_align = AlignInfo.SummaryInfo(alignment)
if not clustalw_exe:
    raise MissingExternalDependencyError(\
        "Install clustalw or clustalw2 if you want to use Bio.Clustalw.")

#################################################################

print "Checking error conditions"
print "========================="

print "Empty file"
input_file = "does_not_exist.fasta"
assert not os.path.isfile(input_file)
cline = MultipleAlignCL(input_file, command=clustalw_exe)
try:
    align = Clustalw.do_alignment(cline)
    assert False, "Should have failed, returned %s" % repr(align)
except IOError, err:
    print "Failed (good)"
    #Python 2.3 on Windows gave (0, 'Error')
    #Python 2.5 on Windows gives [Errno 0] Error
    assert "Cannot open sequence file" in str(err) \
           or "not produced" in str(err) \
           or str(err) == "[Errno 0] Error" \
           or str(err) == "(0, 'Error')", str(err)

print
print "Single sequence"
input_file = "Fasta/f001"
assert os.path.isfile(input_file)
assert len(list(SeqIO.parse(input_file, "fasta"))) == 1