def _netblast_search(self, fastaRecord, excludelist=[], usecache=True): """ Blast against genbank over web """ ### THIS IS A HACK FOR TESTING PURPOSES TO REMOVED AGAIN ########################################## if self.options.ONEMISSING: orig_limit_query = self.options.limitquery genus, species = re.search(r'_([^_]+)_([^_]+)$', fastaRecord.title).groups() self.options.limitquery = "barcode[keyword] NOT %s %s[ORGN]" % ( genus, species) print "REFORMATTING LIMIT QUERY TO", self.options.limitquery ### THIS IS A HACK FOR TESTING PURPOSES TO REMOVED AGAIN ########################################## # Make a query to filter the returned results: if excludelist: entrezQuery = '(' + self.options.limitquery + ') NOT (uncultured[WORD] OR ' + '[ORGN] OR '.join( excludelist) + '[ORGN])' else: entrezQuery = '(' + self.options.limitquery + ') NOT uncultured[WORD]' ### THIS IS A HACK FOR TESTING PURPOSES TO REMOVED AGAIN ########################################## if self.options.ONEMISSING: self.options.limitquery = orig_limit_query ### THIS IS A HACK FOR TESTING PURPOSES TO REMOVED AGAIN ########################################## fileSuffix = '' for name in excludelist: l = re.split(r'\s+', name) for n in l: fileSuffix += n[0] if fileSuffix: fileSuffix = '_' + fileSuffix # File name used for blast cache blastFileName = os.path.join( self.options.blastcache, "%s.%d_%s%s.xml" % (fastaRecord.title, self.options.maxblasthits, self.options.minsignificance, fileSuffix)) if usecache and os.path.exists( blastFileName) and os.path.getsize(blastFileName) > 0: # Use cached blast result if excludelist: print "\n\t\tUsing cached Blast results (excluding %s)..." % ', '.join( excludelist), else: print "\n\t\tUsing cached Blast results...", sys.stdout.flush() else: # Make a query to filter the returned results: if excludelist: print "\n\t\tSearching database (excluding %s)..." % ', '.join( excludelist), else: print "\n\t\tSearching database...", sys.stdout.flush() fastaRecordFileName = os.path.join(self.options.project, utils.randomString(8)) fastaRecordFile = open(fastaRecordFileName, 'w') fastaRecordFile.write(str(fastaRecord)) fastaRecordFile.close() resultHandle = None if self.options.nolowcomplexfilter: filterOption = '-dust no' else: filterOption = '-dust yes' # FIXME: Check that this is an ok default... It is not the defalut in blastn if self.options.blastwordsize: wordSize = '-word_size %s' % self.options.blastwordsize else: wordSize = '' blastCmd = 'blastn -remote -outfmt 5 -db nt %s %s -evalue %s -max_target_seqs %s -entrez_query "%s" -query %s -out %s' \ % (wordSize, filterOption, self.options.minsignificance, self.options.maxblasthits, \ entrezQuery, fastaRecordFileName, blastFileName) for i in range(20): time.sleep(2 * i) error = utils.systemCall(blastCmd, stdout='IGNORE', stderr='IGNORE') try: # retval = os.system(blastCmd) # if retval != 0: # print "Netblast failed with return value %d. Trying again..." % retval error = utils.systemCall(blastCmd) if error or not os.path.exists( blastFileName) or os.path.getsize( blastFileName) == 0: print "Netblast failed. Trying again..." continue break except KeyboardInterrupt: sys.exit() except: print "Netblast failed. Trying again..." pass os.remove(fastaRecordFileName) # Read file from cache blastHandle = open(blastFileName, 'r') # Parse the result: try: blastRecord = NCBIXML.read(blastHandle) print "done.\n\t\t\t", sys.stdout.flush() except: blastRecord = None blastHandle.close() return blastRecord
def _netblast_search(self, fastaRecord, excludelist=[], usecache=True): """ Blast against genbank over web """ ### THIS IS A HACK FOR TESTING PURPOSES TO REMOVED AGAIN ########################################## if self.options.ONEMISSING: orig_limit_query = self.options.limitquery genus, species = re.search(r'_([^_]+)_([^_]+)$', fastaRecord.title).groups() self.options.limitquery = "barcode[keyword] NOT %s %s[ORGN]" % (genus, species) print "REFORMATTING LIMIT QUERY TO", self.options.limitquery ### THIS IS A HACK FOR TESTING PURPOSES TO REMOVED AGAIN ########################################## # Make a query to filter the returned results: if excludelist: entrezQuery = '(' + self.options.limitquery + ') NOT (uncultured[WORD] OR ' + '[ORGN] OR '.join(excludelist) + '[ORGN])' else: entrezQuery = '(' + self.options.limitquery + ') NOT uncultured[WORD]' ### THIS IS A HACK FOR TESTING PURPOSES TO REMOVED AGAIN ########################################## if self.options.ONEMISSING: self.options.limitquery = orig_limit_query ### THIS IS A HACK FOR TESTING PURPOSES TO REMOVED AGAIN ########################################## fileSuffix = '' for name in excludelist: l = re.split(r'\s+', name) for n in l: fileSuffix += n[0] if fileSuffix: fileSuffix = '_' + fileSuffix # File name used for blast cache blastFileName = os.path.join(self.options.blastcache, "%s.%d_%s%s.xml" % (fastaRecord.title, self.options.maxblasthits, self.options.minsignificance, fileSuffix)) if usecache and os.path.exists(blastFileName) and os.path.getsize(blastFileName)>0: # Use cached blast result if excludelist: print "\n\t\tUsing cached Blast results (excluding %s)..." % ', '.join(excludelist), else: print "\n\t\tUsing cached Blast results...", sys.stdout.flush() else: # Make a query to filter the returned results: if excludelist: print "\n\t\tSearching database (excluding %s)..." % ', '.join(excludelist), else: print "\n\t\tSearching database...", sys.stdout.flush() fastaRecordFileName = os.path.join(self.options.project, utils.randomString(8)) fastaRecordFile = open(fastaRecordFileName, 'w') fastaRecordFile.write(str(fastaRecord)) fastaRecordFile.close() resultHandle = None if self.options.nolowcomplexfilter: filterOption = '-dust no' else: filterOption = '-dust yes' # FIXME: Check that this is an ok default... It is not the defalut in blastn if self.options.blastwordsize: wordSize = '-word_size %s' % self.options.blastwordsize else: wordSize = '' blastCmd = 'blastn -remote -outfmt 5 -db nt %s %s -evalue %s -max_target_seqs %s -entrez_query "%s" -query %s -out %s' \ % (wordSize, filterOption, self.options.minsignificance, self.options.maxblasthits, \ entrezQuery, fastaRecordFileName, blastFileName) for i in range(20): time.sleep(2 * i) error = utils.systemCall(blastCmd, stdout='IGNORE', stderr='IGNORE') try: # retval = os.system(blastCmd) # if retval != 0: # print "Netblast failed with return value %d. Trying again..." % retval error = utils.systemCall(blastCmd) if error or not os.path.exists(blastFileName) or os.path.getsize(blastFileName) == 0: print "Netblast failed. Trying again..." continue break except KeyboardInterrupt: sys.exit() except: print "Netblast failed. Trying again..." pass os.remove(fastaRecordFileName) # Read file from cache blastHandle = open(blastFileName, 'r') # Parse the result: try: blastRecord = NCBIXML.read(blastHandle) print "done.\n\t\t\t", sys.stdout.flush() except: blastRecord = None blastHandle.close() return blastRecord