def main(args):

    of = sys.stdout
    if args.output:
        if re.search('\.gz$', args.output):
            of = gzip.open(args.output, 'w')
        else:
            of = open(args.output, 'w')

    #read the reference gpd
    rinf = None
    global txome
    txome = {}
    if re.search('\.gz$', args.reference):
        rinf = gzip.open(args.reference)
    else:
        rinf = open(args.reference)
    sys.stderr.write("Reading in reference\n")
    z = 0
    # populate txome with reference transcripts for each chromosome
    for line in rinf:
        z += 1
        gpd = GPD(line)
        gpd.set_payload(z)
        if z % 100 == 0: sys.stderr.write(str(z) + "          \r")
        if gpd.value('chrom') not in txome: txome[gpd.value('chrom')] = []
        r = gpd.get_range()
        r.set_payload(gpd)
        txome[gpd.value('chrom')].append(r)
    rinf.close()
    sys.stderr.write(str(z) + "          \r")
    sys.stderr.write("\n")
    inf = sys.stdin
    if args.input != '-':
        if re.search('\.gz$', args.input):
            inf = gzip.open(args.input)
        else:
            inf = open(args.input)

    #def annotate_line(gpd,txome,args):
    sys.stderr.write("annotating\n")
    p = Pool(processes=args.threads)
    csize = 100
    #for v in generate_tx(inf,args):
    #  res = annotate_line(v)
    #  if not res: continue
    #  print res.rstrip()
    results2 = p.imap(func=annotate_line,
                      iterable=generate_tx(inf, args),
                      chunksize=csize)
    #sys.stderr.write("done map\n")
    for res in results2:
        if not res: continue
        of.write(res)
    of.close()
示例#2
0
def main(args):

  of = sys.stdout
  if args.output:
    if re.search('\.gz$',args.output):
      of = gzip.open(args.output,'w')
    else:
      of = open(args.output,'w')

  #read the reference gpd
  rinf = None
  global txome
  txome = {}
  if re.search('\.gz$',args.reference):
    rinf = gzip.open(args.reference)
  else:
    rinf = open(args.reference)
  sys.stderr.write("Reading in reference\n")
  z = 0
  # populate txome with reference transcripts for each chromosome
  for line in rinf:
    z += 1
    gpd = GPD(line)
    gpd.set_payload(z)
    if z%100 == 0:  sys.stderr.write(str(z)+"          \r")
    if gpd.value('chrom') not in txome: txome[gpd.value('chrom')] = []
    r = gpd.get_range()
    r.set_payload(gpd)
    txome[gpd.value('chrom')].append(r)
  rinf.close()
  sys.stderr.write(str(z)+"          \r")
  sys.stderr.write("\n")
  inf = sys.stdin
  if args.input != '-':
    if re.search('\.gz$',args.input):
      inf = gzip.open(args.input)
    else:
      inf = open(args.input)

  #def annotate_line(gpd,txome,args):
  sys.stderr.write("annotating\n")
  p = Pool(processes=args.threads)
  csize = 100
  #for v in generate_tx(inf,args):
  #  res = annotate_line(v)
  #  if not res: continue
  #  print res.rstrip()
  results2 = p.imap(func=annotate_line,iterable=generate_tx(inf,args),chunksize=csize)
  #sys.stderr.write("done map\n")
  for res in results2:
    if not res: continue
    of.write(res)
  of.close()
示例#3
0
def main(args):
    of = sys.stdout
    if args.output:
        if args.output[-3:] == '.gz':
            of = gzip.open(args.output, 'w')
    color = '0,0,0'

    if args.color:
        if args.color == 'blue':
            color = '67,162,202'
        elif args.color == 'green':
            color = '49,163,84'
        elif args.color == 'orange':
            color = '254,178,76'
        elif args.color == 'purple':
            color = '136,86,167'
        elif args.color == 'red':
            color = '240,59,32'

    # set up the header if one is desired
    header = ''
    if not args.noheader:
        newname = 'longreads'
        m = re.search('([^\/]+)$', args.input)
        if m:
            newname = m.group(1)
        newname = re.sub('[\s]+', '_', newname)
        if args.headername:
            newname = args.headername
        elif args.input == '-':
            newname = 'STDIN'
        header += "track\tname=" + newname + "\t"
        description = newname + ' GenePred Entries'
        if args.headerdescription:
            description = args.headerdescription
        header += 'description="' + description + '"' + "\t"
        header += 'itemRgb="On"'
        of.write(header + "\n")

    gpd_handle = sys.stdin
    if args.input != '-':
        if args.input[-3:] == '.gz':
            gpd_handle = gzip.open(args.input)
        else:
            gpd_handle = open(args.input)
    gs = GPDStream(gpd_handle)
    #with gpd_handle as infile:
    for gpd in gs:
        #for line in infile:
        #if re.match('^#',line):
        #  continue
        #genepred_entry = GenePredBasics.line_to_entry(line)
        if args.minintron:
            gpd = GPD(gpd.smooth_gaps(args.minintron).get_gpd_line())
        exoncount = gpd.get_exon_count()
        ostring = gpd.value('chrom') + "\t"
        ostring += str(gpd.value('exonStarts')[0]) + "\t"
        ostring += str(gpd.value('exonEnds')[exoncount - 1]) + "\t"
        if args.namefield == 1:
            ostring += gpd.value('gene_name') + "\t"
        else:
            ostring += gpd.value('name')
        ostring += '1000' + "\t"
        ostring += gpd.value('strand') + "\t"
        ostring += str(gpd.value('exonStarts')[0]) + "\t"
        ostring += str(gpd.value('exonEnds')[exoncount - 1]) + "\t"
        ostring += color + "\t"
        ostring += str(exoncount) + "\t"
        for i in range(0, exoncount):
            ostring += str(
                gpd.value('exonEnds')[i] - gpd.value('exonStarts')[i]) + ','
        ostring += "\t"
        for i in range(0, exoncount):
            ostring += str(
                gpd.value('exonStarts')[i] - gpd.value('exonStarts')[0]) + ','
        of.write(ostring + "\n")
        #for i in range(0,len(genepred_entry['exonStarts'])):
    gpd_handle.close()
    of.close()
示例#4
0
def main(args):

    global of
    if args.output:
        if re.search('\.gz$', args.output):
            of = gzip.open(args.output, 'w')
        else:
            of = open(args.output, 'w')

    #read the reference gpd
    rinf = None
    txome = {}
    if re.search('\.gz$', args.reference):
        rinf = gzip.open(args.reference)
    else:
        rinf = open(args.reference)
    sys.stderr.write("Reading in reference\n")
    z = 0
    for line in rinf:
        z += 1
        gpd = GPD(line)
        gpd.set_payload(z)
        if z % 100 == 0: sys.stderr.write(str(z) + "          \r")
        if gpd.value('chrom') not in txome: txome[gpd.value('chrom')] = []
        r = gpd.get_range()
        r.set_payload(gpd)
        txome[gpd.value('chrom')].append(r)
    rinf.close()
    sys.stderr.write(str(z) + "          \r")
    sys.stderr.write("\n")
    inf = sys.stdin
    if args.input != '-':
        if re.search('\.gz$', args.input):
            inf = gzip.open(args.input)
        else:
            inf = open(args.input)
    z = 0
    chroms = {}
    sys.stderr.write("Buffering reads\n")
    for line in inf:
        z += 1
        m = re.match('[^\t]*\t[^\t]*\t([^\t]+)', line)
        chrom = m.group(1)
        if z % 100 == 0: sys.stderr.write(str(z) + "      \r")
        if chrom not in chroms:
            chroms[chrom] = []
        chroms[chrom].append([line, z])
    sys.stderr.write("\n")
    sys.stderr.write("Finished buffering reads\n")
    if args.threads > 1:
        p = Pool(processes=args.threads)
    results = []
    global chrtotal
    chrtotal = len(chroms)
    for chrom in chroms:
        if chrom not in txome: continue
        if args.threads > 1:
            v = p.apply_async(do_buffer,
                              args=(chroms[chrom], {
                                  chrom: txome[chrom]
                              }, args),
                              callback=do_out)
            results.append(v)
        else:
            v = do_buffer(chroms[chrom], {chrom: txome[chrom]}, args)
            results.append(Queue(v))
            do_out(v)
    if args.threads > 1:
        p.close()
        p.join()
    sys.stderr.write("\n")
    for res in [x.get() for x in results]:
        for oline in res:
            of.write(oline)
    inf.close()
    of.close()
def main(args):
  of = sys.stdout
  if args.output:
    if args.output[-3:]=='.gz':
      of = gzip.open(args.output,'w')
  color = '0,0,0'

  if args.color:
    if args.color == 'blue':
      color = '67,162,202'
    elif args.color == 'green':
      color = '49,163,84'
    elif args.color == 'orange':
      color = '254,178,76'
    elif args.color == 'purple':
      color = '136,86,167'
    elif args.color == 'red':
      color = '240,59,32'

  # set up the header if one is desired
  header = ''
  if not args.noheader:
    newname = 'longreads'
    m = re.search('([^\/]+)$',args.input)
    if m:
      newname = m.group(1)
    newname = re.sub('[\s]+','_',newname)
    if args.headername:
      newname = args.headername
    elif args.input == '-':
      newname = 'STDIN'
    header += "track\tname="+newname+"\t"
    description = newname+' GenePred Entries'
    if args.headerdescription:
       description = args.headerdescription
    header += 'description="'+description + '"'+"\t"
    header += 'itemRgb="On"'
    of.write(header+"\n")
  
  gpd_handle = sys.stdin
  if args.input != '-': 
    if args.input[-3:]=='.gz':
      gpd_handle = gzip.open(args.input)
    else:
      gpd_handle = open(args.input)
  gs = GPDStream(gpd_handle)
  #with gpd_handle as infile:
  for gpd in gs:
      #for line in infile:
      #if re.match('^#',line):
      #  continue
      #genepred_entry = GenePredBasics.line_to_entry(line)
      if args.minintron:
        gpd = GPD(gpd.smooth_gaps(args.minintron).get_gpd_line())
      exoncount = gpd.get_exon_count()
      ostring  = gpd.value('chrom') + "\t" 
      ostring += str(gpd.value('exonStarts')[0]) + "\t"
      ostring += str(gpd.value('exonEnds')[exoncount-1]) + "\t"
      if args.namefield == 1:
        ostring += gpd.value('gene_name') + "\t"
      else: 
        ostring += gpd.value('name')
      ostring += '1000' + "\t"
      ostring += gpd.value('strand') + "\t" 
      ostring += str(gpd.value('exonStarts')[0]) + "\t"
      ostring += str(gpd.value('exonEnds')[exoncount-1]) + "\t"      
      ostring += color+"\t"
      ostring += str(exoncount) + "\t"
      for i in range(0,exoncount):
        ostring += str(gpd.value('exonEnds')[i]-gpd.value('exonStarts')[i]) + ','
      ostring += "\t"
      for i in range(0,exoncount):
        ostring += str(gpd.value('exonStarts')[i]-gpd.value('exonStarts')[0])+','
      of.write(ostring+"\n")
      #for i in range(0,len(genepred_entry['exonStarts'])):
  gpd_handle.close()
  of.close()