def read_write_and_compare(self, source_file, source_format, out_file,
                            out_format, **kwargs):
     """Compares read QueryResults after it has been written to a file."""
     source_qresult = SearchIO.read(source_file, source_format, **kwargs)
     SearchIO.write(source_qresult, out_file, out_format, **kwargs)
     out_qresult = SearchIO.read(out_file, out_format, **kwargs)
     self.assertTrue(compare_search_obj(source_qresult, out_qresult))
 def read_write_and_compare(self, source_file, source_format, out_file,
         out_format, **kwargs):
     """Compares read QueryResults after it has been written to a file."""
     source_qresult = SearchIO.read(source_file, source_format, **kwargs)
     SearchIO.write(source_qresult, out_file, out_format, **kwargs)
     out_qresult = SearchIO.read(out_file, out_format, **kwargs)
     self.assertTrue(compare_search_obj(source_qresult, out_qresult))
 def parse_write_and_compare(self, source_file, source_format, out_file, out_format, **kwargs):
     """Compares parsed QueryResults after they have been written to a file."""
     source_qresults = list(SearchIO.parse(source_file, source_format, **kwargs))
     SearchIO.write(source_qresults, out_file, out_format, **kwargs)
     out_qresults = list(SearchIO.parse(out_file, out_format, **kwargs))
     for source, out in zip(source_qresults, out_qresults):
         self.assertTrue(compare_search_obj(source, out))
 def parse_write_and_compare(self, source_file, source_format, out_file,
                             out_format, **kwargs):
     """Compares parsed QueryResults after they have been written to a file."""
     source_qresults = list(
         SearchIO.parse(source_file, source_format, **kwargs))
     SearchIO.write(source_qresults, out_file, out_format, **kwargs)
     out_qresults = list(SearchIO.parse(out_file, out_format, **kwargs))
     for source, out in zip(source_qresults, out_qresults):
         self.assertTrue(compare_search_obj(source, out))
def start_queryResult_generator(inFile, fDic, work_sheet):
    """ invoking the parse function to return a 'generator' that can allow you 
        to step though the record one QueryResult Object at a time but invoking
        nextQuery = (next)generator on it.This approach can allow you to save 
        on memory. I have found with my current task casting this generator with
        (list) works fine but it is really not called for in this current 
        task of parsing and sorting the records.
    """
    """ http://biopython.org/DIST/docs/api/Bio.SearchIO.BlastIO-module.html"""
    qGenerator = SearchIO.parse(inFile, 'blast-xml')
    max_hits = 0
    query_count = 1
    # Step through all the records in the lump xml data file and write out
    # each separate hit to file. Also write the summary information to the
    # work sheet.
    for query_result in qGenerator:
        print('Processing Query BLAST return ' + str(query_count))
        number_hits = int(len(query_result.hits))
        # Extend header out right if new MAXHITS
        if number_hits > max_hits:
            max_hits = number_hits       
        if number_hits == 0:
            # Construct path plus file name for no hit query
            filename = str(fDic['topDir'] + fDic['noHit'] + 'Query_' 
                           + str(query_count) + '_H_none.xml')
            # Write out any Queries that had to hits to a no Hit subfolder
            SearchIO.write(query_result, filename, 'blast-xml')
            write_qr_to_ws(query_count, query_result, work_sheet)
        else :
            # Now set up a counter of 'hits' in the QueryResult so hit's
            # can be sliced away into their own record cleanly.
            hit_count = 0;
            for hit in query_result.hits:
                total_hsps = len (hit.hsps)
                lowest_eval = hit.hsps[0].evalue
                best_hsp = hit.hsps[0]
                for hsp in hit.hsps:
                    if hsp.evalue < lowest_eval:
                        lowest_eval = hsp.evalue
                        best_hsp = hsp
                filename = str(fDic['topDir'] + outputFileName(query_count, hit, best_hsp))
                SearchIO.write(query_result[hit_count:(hit_count + 1)], filename , 'blast-xml')
                hit_count += 1
            # Write out query_result to worksheet           
            write_qr_to_ws(query_count, query_result, work_sheet)
        query_count += 1
        # break is debugging code
        # if query_count == 20:
        #   break
    build_ws_header(work_sheet, max_hits)
    return qGenerator
示例#6
0
def MergeBLASTsAndWrite(results):
    """
    Merge all individual BLASTp searches together and write to file in tabular format (without comments this time).
    One thing to note is we have to fool SearchIO into correctly parsing all the results as one big "file" by removing
    the last two lines ("# BLAST processed x queries" &c) from each result object while we're merging everything
    together (see join line).
    """
    # Filter last two lines of each BLASTp result and join remaining lines together, making one big SearchIO object.
    logging.info(
        "BLASTAll: Merging all-vs.-all results together and parsing into tabular format."
    )
    merged = "\n".join(
        (["\n".join(result.split("\n")[:-2]) for result in results if result]))
    parsed = SearchIO.parse(cStringIO.StringIO(merged),
                            "blast-tab",
                            comments=True)

    # Write merged BLASTp results to file for PanOCT.
    logging.info("BLASTAll: Writing BLASTp results to file panoct.blast.")
    SearchIO.write(parsed, "panoct.blast", "blast-tab")
示例#7
0
    def setUp(self):
        ff, blast_xml = tempfile.mkstemp(prefix='rba_test_xml', suffix='_t21')
        blast = SearchIO.read(blast_in, 'blast-xml')
        blast1 = deepcopy(blast)
        blast2 = deepcopy(blast)
        dbblast = [
            blast1.hsp_filter(lambda x: x.bitscore_raw >= 95),
            blast2.hsp_filter(lambda x: 80 < x.bitscore_raw < 95)
        ]
        with os.fdopen(ff, 'w') as b:
            SearchIO.write(dbblast, b, 'blast-xml')
        self.blast_xml = blast_xml

        ff, double_fasta = tempfile.mkstemp(prefix='rba_test_double_fasta',
                                            suffix='_t22')
        with os.fdopen(ff, 'w') as t, open(blast_query, 'r') as s:
            q = s.read()
            t.write('{}\n{}\n'.format(q, q))
        self.query_double = double_fasta

        ff, csv = tempfile.mkstemp(prefix='rba_', suffix='_t7.csv')
        os.close(ff)
        self.csv = csv

        ff, html = tempfile.mkstemp(prefix='rba_', suffix='_t8.html')
        os.close(ff)
        self.html = html

        ff, pandas_dump = tempfile.mkstemp(prefix='rba_',
                                           suffix='_t9.pandas_dump')
        os.close(ff)
        self.pandas_dump = pandas_dump

        ff, json_file = tempfile.mkstemp(prefix='rba_', suffix='_t10.json')
        os.close(ff)
        self.json = json_file
示例#8
0
 def deleteFromAlignResult(self, projectPath, db, file, alignResult):
     blastResultPath = projectPath + "/" + alignResult + "/" + db
     for bases, dirs, files in os.walk(blastResultPath):
         for f in os.listdir(bases):
             filePath = bases + '/' + f
             # si el nombre del archivo a leer contiene el nombre del archivo a borrar, borrarlo
             if (self.contains(f, file[:-3])):
                 os.remove(filePath)
             # sino puede estar adentro la alineacion
             else:
                 with open(filePath) as originalXml:
                     result = SearchIO.read(originalXml, "blast-xml")
                     i = 0
                     for hits in result:
                         hsp = hits[0]
                         id = hsp.hit.id
                         if (id == file[:-3]):
                             result.hit_keys.pop(i)
                             result.hits.pop(i)
                             result.hsps.pop(i)
                             result.pop(i)
                         i = i + 1
                 os.remove(filePath)
                 SearchIO.write(result, filePath, 'blast-xml')
示例#9
0
 def write_filtered_xml(self):
     print('Writing output file...')
     SearchIO.write(self.filtered_list,
                    self.output_file,
                    format='blast-xml')
示例#10
0
        def iterator(blast_dict):
            for entry in blast_dict:
                entry_hits = []
                for hit in blast_dict[entry].hits:
                    if hit.id not in black_list:
                        # filter hits
                        entry_hits.append(hit)
                if entry_hits:
                    yield QueryResult(hits=entry_hits, id=entry)

    elif args.mode == "both":

        def iterator(blast_dict):
            for entry in blast_dict:
                if entry not in black_list:
                    entry_hits = []
                    for hit in blast_dict[entry].hits:
                        if hit.id not in black_list:
                            # filter hits
                            entry_hits.append(hit)
                    if entry_hits:
                        yield QueryResult(hits=entry_hits, id=entry)


blast_results = SearchIO.index(args.input, args.format)

SearchIO.write(iterator(blast_results), args.output, args.format)
if args.output != "output":
    out_fd.close()
if args.input != "stdin":
    in_fd.close()
示例#11
0
def __main__():


    #Parse Command Line
    parser = argparse.ArgumentParser()


    #- a default argument for the blast file
    parser.add_argument('blast_file', type=str, help="A blast file to process")
    parser.add_argument('-o', type=str, help="A name for the output file")
    args = parser.parse_args()



    blast_data = []
    
    # check for a valid file
    if not os.path.isfile( args.blast_file ):
        sys.exit("\n%s is not a valid file!\n" % args.blast_file)

    else:

        # this sets the output file
        output_file = ""
        if args.o is None:
            output_file = "filtered_blast.txt"
        else:
            output_file = args.o
        
        #
        # Class object for what is being read in is:
        #   http://biopython.org/DIST/docs/api/Bio.SearchIO._model.query.QueryResult-class.html
        #
        num_total_qresults = 0
        num_filtered_qresults = 0
        
        for qresult in SearchIO.parse(args.blast_file, 'blast-text'):

            # if no hits we just keep going
            if len(qresult.hits) == 0:
                continue
            else:
                num_total_qresults += 1
                
                
            hits_in_qresult = sum([len(h) for h in qresult.hits])

            if hits_in_qresult == 1:

                blast_data.append(qresult)

            else:

                num_filtered_qresults += 1
            
            
            # commented out this stuff!
            # this counts the total number of hits loaded for this query
            #num_total_qresults += sum([len(h) for h in qresult.hits])

            #filtered_qresult = filter_max_bitscore(qresult)
                
            #num_filtered_qresults += sum([len(h) for h in filtered_qresult.hits])

            #blast_data.append(filtered_qresult)

            
        #pdb.set_trace()
        print "Filted out %d positions from %d total query results" % (num_filtered_qresults, num_total_qresults)

        output_handle = open(output_file, "w")
        
        count = SearchIO.write(blast_data, output_handle, 'blast-tab') # not working just yet
        print "Wrote out %s blast records to '%s'" % (count,output_file)

        output_handle.close()
示例#12
0
from Bio.Blast.Applications import NcbiblastnCommandline
from Bio import SearchIO


humdb="/mithril/Data/Pacbio/Aligned/151019_proc/blast/humiso_blast"

blastn_cline=NcbiblastnCommandline(query="temp.fasta", db=humdb, gapopen=1, gapextend=2, word_size=9, reward=1, evalue=10, outfmt=5, out="try.xml")

stdout, stderr=blastn_cline()

bres=SearchIO.read("try.xml", 'blast-xml')
SearchIO.write(bres, 'try.tsv', 'blast-tab')

##ok - this was nice, but can't output because blast is pairwise, and I think we actually want a MAF

示例#13
0
from Bio.Blast.Applications import NcbiblastnCommandline
from Bio import SearchIO

humdb = "/mithril/Data/Pacbio/Aligned/151019_proc/blast/humiso_blast"

blastn_cline = NcbiblastnCommandline(query="temp.fasta",
                                     db=humdb,
                                     gapopen=1,
                                     gapextend=2,
                                     word_size=9,
                                     reward=1,
                                     evalue=10,
                                     outfmt=5,
                                     out="try.xml")

stdout, stderr = blastn_cline()

bres = SearchIO.read("try.xml", 'blast-xml')
SearchIO.write(bres, 'try.tsv', 'blast-tab')

##ok - this was nice, but can't output because blast is pairwise, and I think we actually want a MAF