示例#1
0
def main():
  #do our inputs
  args = do_inputs()
  global of
  of = sys.stdout
  if args.output:
    if args.output[-4:] == '.bam':
      cmd = 'samtools view -Sb - -o '+args.output
      p = Popen(cmd.split(),stdin=PIPE)
      of = p.stdin
    else:
      sys.stderr.write("ERROR: stdout and .bam are the only valid output formats\n")
      sys.exit()
  inf = sys.stdin
  if args.input != '-':
    if args.input[-3:] == '.gz':
      inf = gzip.open(args.input)
    else: inf = open(args.input)
  sys.stderr.write("reading reference genome\n")
  ref = FastaData(open(args.reference).read())
  #shared = manager.dict()
  shared = {}
  for chr in sorted(ref.keys()): 
    sys.stderr.write("reading "+chr+"\n")
    shared[chr] = ref[chr].upper()
    ref.remove(chr)
  sys.stderr.write("finished reading shared memory reference\n")
  sys.stderr.write("Now make the header\n")
  of.write("@HD\tVN:1.0\tSO:unknown\n")
  of.write("@PG\tID:SLR\n")
  for chr in sorted(shared.keys()):
    of.write("@SQ\tSN:"+chr+"\tLN:"+str(len(shared[chr]))+"\n")

  if args.threads > 1:
    poo = Pool(processes=args.threads)

  buffer = []
  max_buffer = 1
  z = 0
  for line in inf:
    z += 1
    if z%1000==0: sys.stderr.write(str(z)+"   \r")
    buffer.append(line)
    if len(buffer) >= max_buffer:
      if args.threads == 1:
        results = do_buffer(buffer,shared,args)
        do_out(results)
      else:
        poo.apply_async(do_buffer,args=(buffer[:],shared,args,),callback=do_out)
      buffer = []
  if len(buffer) > 0:
    if args.threads ==1:
      results = do_buffer(buffer,shared,args)
      do_out(results)
    else:
      poo.apply_async(do_buffer,args=(buffer[:],shared,args,),callback=do_out)

  if args.threads > 1:
    poo.close()
    poo.join()

  sys.stderr.write("\n")
  if args.output:
    p.communicate()
  else: of.close()

  # Temporary working directory step 3 of 3 - Cleanup
  if not args.specific_tempdir:
    rmtree(args.tempdir)
示例#2
0
def main():
    #do our inputs
    args = do_inputs()
    global of
    of = sys.stdout
    if args.output:
        if args.output[-4:] == '.bam':
            cmd = 'samtools view -Sb - -o ' + args.output
            p = Popen(cmd.split(), stdin=PIPE)
            of = p.stdin
        else:
            sys.stderr.write(
                "ERROR: stdout and .bam are the only valid output formats\n")
            sys.exit()
    inf = sys.stdin
    if args.input != '-':
        if args.input[-3:] == '.gz':
            inf = gzip.open(args.input)
        else:
            inf = open(args.input)
    sys.stderr.write("reading reference genome\n")
    ref = FastaData(open(args.reference).read())
    #shared = manager.dict()
    shared = {}
    for chr in sorted(ref.keys()):
        sys.stderr.write("reading " + chr + "\n")
        shared[chr] = ref[chr].upper()
        ref.remove(chr)
    sys.stderr.write("finished reading shared memory reference\n")
    sys.stderr.write("Now make the header\n")
    of.write("@HD\tVN:1.0\tSO:unknown\n")
    of.write("@PG\tID:SLR\n")
    for chr in sorted(shared.keys()):
        of.write("@SQ\tSN:" + chr + "\tLN:" + str(len(shared[chr])) + "\n")

    if args.threads > 1:
        poo = Pool(processes=args.threads)

    buffer = []
    max_buffer = 1
    z = 0
    for line in inf:
        z += 1
        if z % 1000 == 0: sys.stderr.write(str(z) + "   \r")
        buffer.append(line)
        if len(buffer) >= max_buffer:
            if args.threads == 1:
                results = do_buffer(buffer, shared, args)
                do_out(results)
            else:
                poo.apply_async(do_buffer,
                                args=(
                                    buffer[:],
                                    shared,
                                    args,
                                ),
                                callback=do_out)
            buffer = []
    if len(buffer) > 0:
        if args.threads == 1:
            results = do_buffer(buffer, shared, args)
            do_out(results)
        else:
            poo.apply_async(do_buffer,
                            args=(
                                buffer[:],
                                shared,
                                args,
                            ),
                            callback=do_out)

    if args.threads > 1:
        poo.close()
        poo.join()

    sys.stderr.write("\n")
    if args.output:
        p.communicate()
    else:
        of.close()

    # Temporary working directory step 3 of 3 - Cleanup
    if not args.specific_tempdir:
        rmtree(args.tempdir)