Пример #1
0
def clustal_align_protein(rec_1, rec_2, work_dir):
    """Align the two given proteins with clustalw.
    """
    fasta_file = op.join(work_dir, "prot-start.fasta")
    align_file = op.join(work_dir, "prot.aln")
    SeqIO.write((rec_1, rec_2), file(fasta_file, "w"), "fasta")

    clustal_cl = Clustalw.MultipleAlignCL(fasta_file, command=CLUSTALW_BIN)
    clustal_cl.set_output(align_file, output_order="INPUT")
    clustal_cl.set_type("PROTEIN")
    Clustalw.do_alignment(clustal_cl)
    aln_file = file(clustal_cl.output_file)
    alignment = AlignIO.read(aln_file, "clustal")
    print >>sys.stderr, "\tDoing clustalw alignment: %s" % clustal_cl
    return alignment.format("fasta")
Пример #2
0
 def loadAlignment( self, path ):
    """ path is a path to an alignment file in .aln format"""
    alignment = Clustalw.parse_file( path )
    self.allseq = alignment.get_all_seqs()
    self.summary = AlignInfo.SummaryInfo(alignment)
    self.l = alignment.get_alignment_length()
    self.insertLoadedBioAlignment()
def Align_Results(OutputFileName):
    import os
    
    FileIN_Name = """/users/rwbarrettemac/bioinformatics/pythonfolders/FMDanalysisScript/FMDserotypingARRAY/Consensus_Results/%s.FASTA""" % (OutputFileName)
    FileOUT_ALN = """/users/rwbarrettemac/bioinformatics/pythonfolders/FMDanalysisScript/FMDserotypingARRAY/Consensus_Results/%s.ALN""" % (OutputFileName)
    print FileIN_Name
    print FileOUT_ALN
    
    from Bio.Clustalw import MultipleAlignCL
    from Bio import Clustalw

    cline = MultipleAlignCL(os.path.join(os.curdir, FileIN_Name))
    cline.set_output(FileOUT_ALN)
    
    alignment = Clustalw.do_alignment(cline)

    cline.close()
Пример #4
0
    def loadAlignment( self, alignmentFile ):
        "Populates this object with the given alignment data from a CLUSTAL .aln file."

	# ***NOTE*** the CLUSTAL parser does not handle windows line breaks well...
        alignment = Clustalw.parse_file(alignmentFile)
        alignments = alignment.get_all_seqs()
        self.alignmentLength = alignment.get_alignment_length()
        
        for seq in alignments:
            sequence = fasta.Record()
            align = fasta.Record()

            sequence.title = seq.description
            align.title = seq.description

            align.sequence = seq.seq.tostring()
            sequence.sequence = seq.seq.tostring().replace("-","")

            self.alignments.append( align )
            self.sequences.append( sequence )
Пример #5
0
    def align(self):
        "Aligns the sequences using CLUSTAL, storing the results"

	if len(self.sequences) == 0:
		return

        self.sequencesToFile( self.tmpFileName )
        commandLine = MultipleAlignCL(os.path.join(os.curdir, self.tmpFileName), self.clustalPath)
        alignment = Clustalw.do_alignment(commandLine) 
        allRecords = alignment.get_all_seqs()
        length = alignment.get_alignment_length()
        
        alignmentStrings = []
        for record in allRecords:
            f = fasta.Record()
            f.title = record.description.strip()
            f.sequence = record.seq.tostring()
            alignmentStrings.append( f )

        self.alignments = alignmentStrings
        self.alignmentLength = length
            
        os.remove(self.tmpFileName)
Пример #6
0
#!/usr/bin/env python
"""Example of generating a substitution matrix from an alignment.
"""
# standard library
import sys

# Biopython
from Bio import SubsMat
from Bio import Clustalw
from Bio.Alphabet import IUPAC
from Bio.Align import AlignInfo

# get an alignment object from a Clustalw alignment output
c_align = Clustalw.parse_file('protein.aln', IUPAC.protein)
summary_align = AlignInfo.SummaryInfo(c_align)

# get a replacement dictionary and accepted replacement matrix
# exclude all amino acids that aren't charged polar
replace_info = summary_align.replacement_dictionary(["G", "A", "V", "L", "I",
                                                     "M", "P", "F", "W", "S",
                                                     "T", "N", "Q", "Y", "C"])

my_arm = SubsMat.SeqMat(replace_info)

print replace_info

my_lom = SubsMat.make_log_odds_matrix(my_arm)

print 'log_odds_mat:', my_lom

my_lom.print_mat()
Пример #7
0
assert alignment[::-1][2].id == "mixed"

del alignment
del letters

print "testing reading and writing clustal format..."
test_dir = os.path.join(os.getcwd(), 'Clustalw')
test_names = ['opuntia.aln', 'cw02.aln']

test_files = []
for name in test_names:
    test_files.append(os.path.join(test_dir, name))

for test_file in test_files:
    # parse the alignment file and get an aligment object
    alignment = Clustalw.parse_file(test_file)

    # print the alignment back out
    print alignment

alignment = Clustalw.parse_file(os.path.join(test_dir, test_names[0]))

# test the base alignment stuff
print 'all_seqs...'
for seq_record in alignment:
    print 'description:', seq_record.description
    print 'seq:', repr(seq_record.seq)
print 'length:', alignment.get_alignment_length()

print 'Calculating summary information...'
align_info = AlignInfo.SummaryInfo(alignment)
Пример #8
0
# biopython
from Bio.Alphabet import IUPAC
from Bio import Clustalw
from Bio.Clustalw import MultipleAlignCL
from Bio.Align import AlignInfo
from Bio.SubsMat import FreqTable

# create the command line to run clustalw
# this assumes you've got clustalw somewhere on your path, otherwise
# you need to pass a second argument to MultipleAlignCL with the complete
# path to clustalw
cline = MultipleAlignCL(os.path.join(os.curdir, 'opuntia.fasta'))
cline.set_output('test.aln')

# actually perform the alignment and get back an alignment object
alignment = Clustalw.do_alignment(cline)

# get the records in the alignment
all_records = alignment.get_all_seqs()

print 'description:', all_records[0].description
print 'sequence:', all_records[0].seq

# get the length of the alignment
print 'length', alignment.get_alignment_length()

print alignment

# print out interesting information about the alignment
summary_align = AlignInfo.SummaryInfo(alignment)
Пример #9
0
    # Check Bio.AlignIO.read(...)
    alignment = AlignIO.read(handle=open(t_filename), format="clustal")
    assert isinstance(alignment, Alignment)
    assert compare(alignment, alignments[0])

    print "Using Bio.AlignIO.read(...)"
    #print "~" * 75
    #handle = StringIO()
    #AlignIO.write([alignment], handle, "clustal")
    #handle.seek(0)
    #print handle.read()
    #print "~" * 75

    print "Using Bio.Clustalw.parse_file(...)"
    c_alignment = Clustalw.parse_file(t_filename)
    assert isinstance(c_alignment, Alignment)
    assert isinstance(c_alignment, Clustalw.ClustalAlignment)

    #print "  Using Bio.Clustalw.parse_file(...)"
    #print "~" * 75
    #print c_alignment
    #print "~" * 75
    #print

    # Compare the two...
    assert compare(alignment, c_alignment)

    # Check Bio.AlignIO can read the Bio.Clustalw's string output
    n_alignment = AlignIO.read(StringIO(str(c_alignment)), "clustal")
    assert isinstance(alignment, Alignment)
Пример #10
0
assert alignment[::-1][2].id == "mixed"

del alignment
del letters

print "testing reading and writing clustal format..."
test_dir = os.path.join(os.getcwd(), 'Clustalw')
test_names = ['opuntia.aln', 'cw02.aln']

test_files = []
for name in test_names:
    test_files.append(os.path.join(test_dir, name))

for test_file in test_files:
    # parse the alignment file and get an aligment object
    alignment = Clustalw.parse_file(test_file)

    # print the alignment back out
    print alignment

alignment = Clustalw.parse_file(os.path.join(test_dir, test_names[0]))

# test the base alignment stuff
print 'all_seqs...'
all_seqs = alignment.get_all_seqs()
for seq_record in all_seqs:
    print 'description:', seq_record.description
    print 'seq:', repr(seq_record.seq)
print 'length:', alignment.get_alignment_length()

print 'Calculating summary information...'
if not clustalw_exe:
    raise MissingExternalDependencyError(\
        "Install clustalw or clustalw2 if you want to use Bio.Clustalw.")

#################################################################

print "Checking error conditions"
print "========================="

print "Empty file"
input_file = "does_not_exist.fasta"
assert not os.path.isfile(input_file)
cline = MultipleAlignCL(input_file, command=clustalw_exe)
try:
    align = Clustalw.do_alignment(cline)
    assert False, "Should have failed, returned %s" % repr(align)
except IOError, err:
    print "Failed (good)"
    #Python 2.3 on Windows gave (0, 'Error')
    #Python 2.5 on Windows gives [Errno 0] Error
    assert "Cannot open sequence file" in str(err) \
           or "not produced" in str(err) \
           or str(err) == "[Errno 0] Error" \
           or str(err) == "(0, 'Error')", str(err)

print
print "Single sequence"
input_file = "Fasta/f001"
assert os.path.isfile(input_file)
assert len(list(SeqIO.parse(input_file, "fasta"))) == 1
Пример #12
0
#!/usr/bin/env python
"""Example of generating a substitution matrix from an alignment.
"""
# standard library
import sys

# Biopython
from Bio import SubsMat
from Bio import Clustalw
from Bio.Alphabet import IUPAC
from Bio.Align import AlignInfo

# get an alignment object from a Clustalw alignment output
c_align = Clustalw.parse_file("protein.aln", IUPAC.protein)
summary_align = AlignInfo.SummaryInfo(c_align)

# get a replacement dictionary and accepted replacement matrix
# exclude all amino acids that aren't charged polar
replace_info = summary_align.replacement_dictionary(
    ["G", "A", "V", "L", "I", "M", "P", "F", "W", "S", "T", "N", "Q", "Y", "C"]
)

my_arm = SubsMat.SeqMat(replace_info)

print (replace_info)

my_lom = SubsMat.make_log_odds_matrix(my_arm)

print "log_odds_mat:", my_lom

my_lom.print_mat()
Пример #13
0
    # Check Bio.AlignIO.read(...)
    alignment = AlignIO.read(handle=open(t_filename), format="clustal")
    assert isinstance(alignment, Alignment)
    assert compare(alignment, alignments[0])

    print "Using Bio.AlignIO.read(...)"
    #print "~" * 75
    #handle = StringIO()
    #AlignIO.write([alignment], handle, "clustal")
    #handle.seek(0)
    #print handle.read()
    #print "~" * 75

    print "Using Bio.Clustalw.parse_file(...)"
    c_alignment = Clustalw.parse_file(t_filename)
    assert isinstance(c_alignment, Alignment)
    assert isinstance(c_alignment, Clustalw.ClustalAlignment)

    #print "  Using Bio.Clustalw.parse_file(...)"
    #print "~" * 75
    #print c_alignment
    #print "~" * 75
    #print

    # Compare the two...
    assert compare(alignment, c_alignment)

    # Check Bio.AlignIO can read the Bio.Clustalw's string output
    n_alignment = AlignIO.read(StringIO(str(c_alignment)), "clustal")
    assert isinstance(alignment, Alignment)