def dump_hmm_matches(fasta_file, hits_file, dbpath, port, scantype, idmap, args): hits_header = ("#query_name", "hit", "evalue", "sum_score", "query_length", "hmmfrom", "hmmto", "seqfrom", "seqto", "query_coverage") # Cache previous results if resuming is enabled VISITED = set() if args.resume and pexists(hits_file): print colorify("Resuming previous run. Reading computed output from %s" % hits_file, 'yellow') VISITED = set([line.split('\t')[0].strip() for line in open(hits_file) if not line.startswith('#')]) print len(VISITED), 'queries skipped' OUT = open(hits_file, 'a') else: OUT = open(hits_file, 'w') print colorify("Sequence mapping starts now!", 'green') if not args.no_file_comments: print >>OUT, get_call_info() print >>OUT, '# ' + '\t'.join(hits_header) total_time = 0 last_time = time.time() start_time = time.time() qn = 0 for qn, (name, elapsed, hits, querylen, seq) in enumerate(search.iter_hits( fasta_file, args.translate, args.qtype, args.dbtype, scantype, dbpath, port, evalue_thr=args.evalue, score_thr=args.score, qcov_thr=args.qcov, fixed_Z=args.Z, max_hits=args.maxhits, skip=VISITED, maxseqlen=args.maxseqlen, cpus=args.cpu)): if elapsed == -1: # error occurred print >>OUT, '\t'.join( [name] + ['ERROR'] * (len(hits_header) - 1)) elif not hits: print >>OUT, '\t'.join([name] + ['-'] * (len(hits_header) - 1)) else: for hitindex, (hid, heval, hscore, hmmfrom, hmmto, sqfrom, sqto, domscore) in enumerate(hits): hitname = hid if idmap: hitname = idmap[hid][0] print >>OUT, '\t'.join(map(str, [name, hitname, heval, hscore, int(querylen), int(hmmfrom), int(hmmto), int(sqfrom), int(sqto), float(sqto - sqfrom) / querylen])) OUT.flush() # monitoring total_time += time.time() - last_time last_time = time.time() if qn and (qn % 25 == 0): print >>sys.stderr, qn + \ 1, total_time, "%0.2f q/s" % ((float(qn + 1) / total_time)) sys.stderr.flush() # Writes final stats elapsed_time = time.time() - start_time if not args.no_file_comments: print >>OUT, '# %d queries scanned' % (qn + 1) print >>OUT, '# Total time (seconds):', elapsed_time print >>OUT, '# Rate:', "%0.2f q/s" % ((float(qn + 1) / elapsed_time)) OUT.close() print colorify(" Processed queries:%s total_time:%s rate:%s" %\ (qn+1, elapsed_time, "%0.2f q/s" % ((float(qn+1) / elapsed_time))), 'lblue')
def dump_hmm_matches(fasta_file, hits_file, dbpath, port, scantype, idmap, args): hits_header = ("#query_name", "hit", "evalue", "sum_score", "query_length", "hmmfrom", "hmmto", "seqfrom", "seqto", "query_coverage") # Cache previous results if resuming is enabled VISITED = set() if args.resume and pexists(hits_file): print colorify( "Resuming previous run. Reading computed output from %s" % hits_file, 'yellow') VISITED = set([ line.split('\t')[0].strip() for line in open(hits_file) if not line.startswith('#') ]) print len(VISITED), 'queries skipped' OUT = open(hits_file, 'a') else: OUT = open(hits_file, 'w') print colorify("Sequence mapping starts now!", 'green') if not args.no_file_comments: print >> OUT, get_call_info() print >> OUT, '# ' + '\t'.join(hits_header) total_time = 0 last_time = time.time() start_time = time.time() qn = 0 # in case nothing to loop bellow for qn, (name, elapsed, hits, querylen, seq) in enumerate( search.iter_hits(fasta_file, args.translate, args.qtype, args.dbtype, scantype, dbpath, port, evalue_thr=args.evalue, score_thr=args.score, qcov_thr=args.qcov, fixed_Z=args.Z, max_hits=args.maxhits, skip=VISITED, maxseqlen=args.maxseqlen, cpus=args.cpu, base_tempdir=args.temp_dir)): if elapsed == -1: # error occurred print >> OUT, '\t'.join([name] + ['ERROR'] * (len(hits_header) - 1)) elif not hits: print >> OUT, '\t'.join([name] + ['-'] * (len(hits_header) - 1)) else: for hitindex, (hid, heval, hscore, hmmfrom, hmmto, sqfrom, sqto, domscore) in enumerate(hits): hitname = hid if idmap: hitname = idmap[hid][0] print >> OUT, '\t'.join( map(str, [ name, hitname, heval, hscore, int(querylen), int(hmmfrom), int(hmmto), int(sqfrom), int(sqto), float(sqto - sqfrom) / querylen ])) OUT.flush() # monitoring total_time += time.time() - last_time last_time = time.time() if qn and (qn % 25 == 0): print >>sys.stderr, qn + \ 1, total_time, "%0.2f q/s" % ((float(qn + 1) / total_time)) sys.stderr.flush() # Writes final stats elapsed_time = time.time() - start_time if not args.no_file_comments: print >> OUT, '# %d queries scanned' % (qn + 1) print >> OUT, '# Total time (seconds):', elapsed_time print >> OUT, '# Rate:', "%0.2f q/s" % ((float(qn + 1) / elapsed_time)) OUT.close() print colorify(" Processed queries:%s total_time:%s rate:%s" %\ (qn+1, elapsed_time, "%0.2f q/s" % ((float(qn+1) / elapsed_time))), 'lblue')