Exemplo n.º 1
0
     prefs = {'evalue': 0.001, 'outfmt_pref': 6}
     print "blasting,",
     if blast_mode == 'n':
         local_blastn_2file(infile, dbfile_path, outfile, prefs)
     elif blast_mode == 'tx':
         local_tblastx_2file(infile, dbfile_path, outfile, prefs)
     elif blast_mode == 'tn':
         local_tblastn_2file(infile, dbfile_path, outfile, prefs)
 except Exception:
     print "failed to blast"
     break
 else:
     print "finding range,"
     try:
         # load blast results from file
         rec_array = read_array(outfile, blast_dtypes)
     except IOError:
         print "failed to load blast results"
         break
     except Exception:
         print "failed to load blast results (unknown error)"
         break
     else:
         print rec_array[0]
         # take first line (= best hit) only
         try:
             line = rec_array[0]
         except IndexError:
             print "empty blast results"
             break
         if line[8] < line[9]:
Exemplo n.º 2
0
from sys import argv
from Bio.SeqRecord import SeqRecord
from libs.common import read_array, blast_dtypes, load_fasta, write_fasta, ensure_dir

data_dir = "data/" + argv[1] + "/"
main_in = data_dir + argv[2] + "_results.txt"
main_out = data_dir + argv[2] + "_ctxt.fas"
ctx_dir = data_dir + "context/"
capture_span = int(argv[3])

ensure_dir([ctx_dir])

records = []

rec_array = read_array(main_in, blast_dtypes)

descript = "Context of " + argv[2] + " (" + argv[3] + " bp either side)"

for line in rec_array:

    query = line[0]
    subject = line[1]

    print subject

    rev_flag = False
    if line[8] < line[9]:
        q_start, q_stop = line[8] - 1, line[9]
        rev_flag = False
    else:
Exemplo n.º 3
0
     prefs = {'evalue': 0.001, 'outfmt_pref': 6}
     print "blasting,",
     if blast_mode == 'n':
         local_blastn_2file(infile, dbfile_path, outfile, prefs)
     elif blast_mode == 'tx':
         local_tblastx_2file(infile, dbfile_path, outfile, prefs)
     elif blast_mode == 'tn':
         local_tblastn_2file(infile, dbfile_path, outfile, prefs)
 except Exception:
     print "failed to blast"
     break
 else:
     print "finding range,"
     try:
         # load blast results from file
         rec_array = read_array(outfile, blast_dtypes)
     except IOError:
         print "failed to load blast results"
         break
     except Exception:
         print "failed to load blast results (unknown error)"
         break
     else:
         print rec_array[0]
         # take first line (= best hit) only
         try:
             line = rec_array[0]
         except IndexError:
             print "empty blast results"
             break
         if line[8] < line[9]:
Exemplo n.º 4
0
from libs.common import from_dir, read_array, blast_dtypes, load_fasta, write_fasta

data_dir = "data/" + argv[1] + "/"
blast_out_dir = "data/" + argv[1] + "/blast_out/"
idp = int(argv[2])

main_out = open(data_dir + "comp_results.txt", 'w')

records_dict = {}

# list files in blast results directory
filenames = from_dir(blast_out_dir, re.compile(r'.*\.txt.*'))
for filename in filenames:
    counter = 0
    # load text
    rec_array = read_array(blast_out_dir + filename, blast_dtypes)
    # parse lines
    for line in rec_array:
        # if idp is higher than spec'd:
        if line[2] > idp:
            query = line[0]
            subject = line[1]
            # write line to compiled results file
            main_out.write("\t".join([str(item) for item in line]) + "\n")
            outfile = data_dir + query + "_results.txt"
            if not path.exists(outfile):
                # create file
                out_handle = open(outfile, 'w')
            else:
                counter += 1
                out_handle = open(outfile, 'a')
Exemplo n.º 5
0
from sys import argv
from Bio.SeqRecord import SeqRecord
from libs.common import read_array, blast_dtypes, load_fasta, write_fasta, ensure_dir

data_dir = "data/"+argv[1]+"/"
main_in = data_dir+argv[2]+"_results.txt"
main_out = data_dir+argv[2]+"_ctxt.fas"
ctx_dir = data_dir+"context/"
capture_span = int(argv[3])

ensure_dir([ctx_dir])

records = []

rec_array = read_array(main_in, blast_dtypes)

descript = "Context of "+argv[2]+" ("+argv[3]+" bp either side)"

for line in rec_array:

    query = line[0]
    subject = line[1]

    print subject

    rev_flag = False
    if line[8] < line[9]:
        q_start, q_stop = line[8]-1, line[9]
        rev_flag = False
    else:
Exemplo n.º 6
0
                else:
                    # tblastn against the reference DB
                    infile = data_dir + 'temp.fas'
                    outfile = data_dir + 'temp.txt'
                    write_fasta(infile,
                                SeqRecord(rec.seq.translate(), id='temp'))
                    prefs = {'evalue': 0.001, 'outfmt_pref': 6}
                    try:
                        local_tblastn_2file(infile, db_path, outfile, prefs)
                    except Exception:
                        print "failed to blast"
                        exit()

                    # parse output -- take only first hit
                    try:
                        hit = read_array(outfile, blast_dtypes)[0]
                    except IndexError:
                        print '-',
                        # add record to DB as new symbol
                        sym_cnt += 1
                        symbol = 'N' + str(sym_cnt)
                        rec.id = symbol
                        symbolDB[symbol] = [rec]
                        g_vector.append(symbol)
                        init_DB = True
                    else:
                        if hit[2] > threshold:
                            print '+',
                            # add record to DB as secondary match
                            symbol = hit[1]
                            rec.id = symbol + '_' + str(
Exemplo n.º 7
0
from libs.common import from_dir, read_array, blast_dtypes, load_fasta, write_fasta

data_dir = "data/"+argv[1]+"/"
blast_out_dir = "data/"+argv[1]+"/blast_out/"
idp = int(argv[2])

main_out = open(data_dir+"comp_results.txt", 'w')

records_dict = {}

# list files in blast results directory
filenames = from_dir(blast_out_dir, re.compile(r'.*\.txt.*'))
for filename in filenames:
    counter = 0
    # load text
    rec_array = read_array(blast_out_dir+filename, blast_dtypes)
    # parse lines
    for line in rec_array:
        # if idp is higher than spec'd:
        if line[2] > idp:
            query = line[0]
            subject = line[1]
            # write line to compiled results file
            main_out.write("\t".join([str(item) for item in line])+"\n")
            outfile = data_dir+query+"_results.txt"
            if not path.exists(outfile):
                # create file
                out_handle = open(outfile, 'w')
            else:
                counter +=1
                out_handle = open(outfile, 'a')
Exemplo n.º 8
0
                else:
                    # tblastn against the reference DB
                    infile = data_dir+'temp.fas'
                    outfile = data_dir+'temp.txt'
                    write_fasta(infile, SeqRecord(rec.seq.translate(), id='temp'))
                    prefs = {'evalue': 0.001, 'outfmt_pref': 6}
                    try:
                        local_tblastn_2file(infile, db_path, outfile, prefs)
                    except Exception:
                        print "failed to blast"
                        exit()

                    # parse output -- take only first hit
                    try:
                        hit = read_array(outfile, blast_dtypes)[0]
                    except IndexError:
                        print '-',
                        # add record to DB as new symbol
                        sym_cnt +=1
                        symbol = 'N'+str(sym_cnt)
                        rec.id = symbol
                        symbolDB[symbol] = [rec]
                        g_vector.append(symbol)
                        init_DB = True
                    else:
                        if hit[2] > threshold:
                            print '+',
                            # add record to DB as secondary match
                            symbol = hit[1]
                            rec.id = symbol+'_'+str(len(symbolDB[symbol])+1)