示例#1
0
def blast_remos(r, db='nr'):
    """Uses blast to find remos in a genome"""
    from Bio.Blast import NCBIWWW, NCBIXML
    import cStringIO
    b_parser = NCBIXML.BlastParser()
    E_VALUE_THRESH = 0.04
    for s in r.get_aligned_sequences():
        for remo in r.get_remos_for(s):
            seq = remo.get_sequence_for(s.centre_sequence, False)
            print 'Blasting: %s...' % (seq[:60])
            result_handle = NCBIWWW.qblast('blastn', db, seq)
            blast_results = result_handle.read()
            blast_out = cStringIO.StringIO(blast_results)
            b_record = b_parser.parse(blast_out)
            for alignment in b_record.alignments:
                for hsp in alignment.hsps:
                    if hsp.expect < E_VALUE_THRESH:
                        print '****Alignment****'
                        print 'sequence:', alignment.title
                        print 'length:', alignment.length
                        print 'e value:', hsp.expect
                        print 'sbjct_start:', hsp.sbjct_start
                        print hsp.query[0:75] + '...'
                        print hsp.match[0:75] + '...'
                        print hsp.sbjct[0:75] + '...'
            break
        break
def blast(blastRootDirectory):
    if sys.platform == 'win32':
        blast_db = os.path.join(blastRootDirectory, 'blastDB.fasta')
    else:
        if not os.path.isdir('/tmp/BLAST'):
            print "making directory '/tmp/BLAST'"
            os.mkdir('/tmp/BLAST/')
        if not os.path.exists('/tmp/BLAST/formatdb'):
            shutil.copy(os.path.join(blastRootDirectory, 'formatdb'),
                        '/tmp/BLAST')
            print "copying 'formatdb' to '/tmp/BLAST/'"
        blast_db = os.path.join('/tmp/BLAST', 'blastDB.fasta')
    #print 'path to blastDB.fasta:', blast_db

    blast_file = os.path.join(blastRootDirectory, 'filetoblast.txt')
    #print 'path to filetoblast.txt:', blast_file

    if sys.platform == 'win32':
        blastall_name = 'Blastall.exe'
        blast_exe = os.path.join(blastRootDirectory, blastall_name)
    else:
        blastall_name = 'blastall'
        blast_exe = os.path.join(os.getcwd(), '../../BLAST/bin/',
                                 blastall_name)

    #print 'path to blastall:', blast_exe

    if sys.platform == 'win32':
        import win32api
        blast_db = win32api.GetShortPathName(blast_db)
        blast_file = win32api.GetShortPathName(blast_file)
        blast_exe = win32api.GetShortPathName(blast_exe)

    #cont = raw_input('blah')
    #try:
    blast_out, error_info = NCBIStandalone.blastall(blast_exe,
                                                    'blastp',
                                                    blast_db,
                                                    blast_file,
                                                    align_view=7)
    #except:
    #  f = open(blast_file, 'r')
    #  s = file.read()
    #  print s

    #print 'done BLASTing'

    print 'errors:', error_info.read()
    print 'blast output:', blast_out.read()

    b_parser = NCBIXML.BlastParser()
    #print 'got parser'

    b_record = b_parser.parse(blast_out)
    b_iterator = NCBIStandalone.Iterator(blast_out, b_parser)
    #print 'got iterator'
    results = []
    recordnumber = 0
    nonmatchingQueries = []
    while 1:
        recordnumber += 1
        b_record = b_iterator.next()

        if not b_record: break
        print 'query:', b_record.query
        if b_record is None:
            break
        e_value_thresh = 0.001
        print 'number of alignments:', len(b_record.alignments)
        significant = False
        for alignment in b_record.alignments:
            for hsp in alignment.hsps:
                if hsp.expect < e_value_thresh:
                    alignment.title = alignment.title.replace(">", "")
                    if b_record.query != alignment.title:
                        significant = True
                        print 'adding', b_record.query, 'and', alignment.title, 'to the list of matches'
                        results.append(
                            (b_record.query, alignment.title, hsp.expect))
        print b_record.query, significant
        if not significant:
            print 'adding', b_record.query, 'to the list of queries without matches'
            nonmatchingQueries.append(b_record.query)

    return nonmatchingQueries, results