def main(): #do our inputs args = do_inputs() global of of = sys.stdout if args.output: if args.output[-4:] == '.bam': cmd = 'samtools view -Sb - -o '+args.output p = Popen(cmd.split(),stdin=PIPE) of = p.stdin else: sys.stderr.write("ERROR: stdout and .bam are the only valid output formats\n") sys.exit() inf = sys.stdin if args.input != '-': if args.input[-3:] == '.gz': inf = gzip.open(args.input) else: inf = open(args.input) sys.stderr.write("reading reference genome\n") ref = FastaData(open(args.reference).read()) #shared = manager.dict() shared = {} for chr in sorted(ref.keys()): sys.stderr.write("reading "+chr+"\n") shared[chr] = ref[chr].upper() ref.remove(chr) sys.stderr.write("finished reading shared memory reference\n") sys.stderr.write("Now make the header\n") of.write("@HD\tVN:1.0\tSO:unknown\n") of.write("@PG\tID:SLR\n") for chr in sorted(shared.keys()): of.write("@SQ\tSN:"+chr+"\tLN:"+str(len(shared[chr]))+"\n") if args.threads > 1: poo = Pool(processes=args.threads) buffer = [] max_buffer = 1 z = 0 for line in inf: z += 1 if z%1000==0: sys.stderr.write(str(z)+" \r") buffer.append(line) if len(buffer) >= max_buffer: if args.threads == 1: results = do_buffer(buffer,shared,args) do_out(results) else: poo.apply_async(do_buffer,args=(buffer[:],shared,args,),callback=do_out) buffer = [] if len(buffer) > 0: if args.threads ==1: results = do_buffer(buffer,shared,args) do_out(results) else: poo.apply_async(do_buffer,args=(buffer[:],shared,args,),callback=do_out) if args.threads > 1: poo.close() poo.join() sys.stderr.write("\n") if args.output: p.communicate() else: of.close() # Temporary working directory step 3 of 3 - Cleanup if not args.specific_tempdir: rmtree(args.tempdir)
def main(): #do our inputs args = do_inputs() global of of = sys.stdout if args.output: if args.output[-4:] == '.bam': cmd = 'samtools view -Sb - -o ' + args.output p = Popen(cmd.split(), stdin=PIPE) of = p.stdin else: sys.stderr.write( "ERROR: stdout and .bam are the only valid output formats\n") sys.exit() inf = sys.stdin if args.input != '-': if args.input[-3:] == '.gz': inf = gzip.open(args.input) else: inf = open(args.input) sys.stderr.write("reading reference genome\n") ref = FastaData(open(args.reference).read()) #shared = manager.dict() shared = {} for chr in sorted(ref.keys()): sys.stderr.write("reading " + chr + "\n") shared[chr] = ref[chr].upper() ref.remove(chr) sys.stderr.write("finished reading shared memory reference\n") sys.stderr.write("Now make the header\n") of.write("@HD\tVN:1.0\tSO:unknown\n") of.write("@PG\tID:SLR\n") for chr in sorted(shared.keys()): of.write("@SQ\tSN:" + chr + "\tLN:" + str(len(shared[chr])) + "\n") if args.threads > 1: poo = Pool(processes=args.threads) buffer = [] max_buffer = 1 z = 0 for line in inf: z += 1 if z % 1000 == 0: sys.stderr.write(str(z) + " \r") buffer.append(line) if len(buffer) >= max_buffer: if args.threads == 1: results = do_buffer(buffer, shared, args) do_out(results) else: poo.apply_async(do_buffer, args=( buffer[:], shared, args, ), callback=do_out) buffer = [] if len(buffer) > 0: if args.threads == 1: results = do_buffer(buffer, shared, args) do_out(results) else: poo.apply_async(do_buffer, args=( buffer[:], shared, args, ), callback=do_out) if args.threads > 1: poo.close() poo.join() sys.stderr.write("\n") if args.output: p.communicate() else: of.close() # Temporary working directory step 3 of 3 - Cleanup if not args.specific_tempdir: rmtree(args.tempdir)