Exemplo n.º 1
0
def read_fastq(fastq_file,maxcnt):
  gfr = GenericFastqFileReader(fastq_file)
  ecnt = 0
  qseen = set()
  lenmax = 0
  lenmin = float('inf')
  entries = []
  bases = 0
  while True:
    e = gfr.read_entry()
    if not e or ecnt > maxcnt: break
    ecnt += 1
    slen = len(e['seq'])
    if slen < lenmin: lenmin = slen
    if slen > lenmax: lenmax = slen
    seq = e['seq']
    bases += len(seq)
    for v in [ord(x) for x in e['quality']]:
      qseen.add(v)
    entries.append(e)
  gfr.close()
  qmin = min(qseen)
  qmax = max(qseen)
  stats  = {}
  stats['qmin'] = qmin
  stats['qmax'] = qmax
  stats['lenmin'] = lenmin
  stats['lenmax'] = lenmax
  stats['readcount'] = len(entries)
  stats['basecount'] = bases
  return [entries,stats]
def check_for_uniquely_named_reads(args):
  observed_reads = set()
  reads = {}
  if args.fastq_reads:
    gfr = GenericFastqFileReader(args.fastq_reads)
    while True:
      e = gfr.read_entry()
      if not e: break
      reads[e['name']] = e['seq']
      if e['name'] in observed_reads:
        sys.stderr.write("ERROR observed reads must be uniquely named")
        sys.exit()
      observed_reads.add(e['name'])
  elif args.fasta_reads:
    gfr = GenericFastaFileReader(args.fasta_reads)
    while True:
      e = gfr.read_entry()
      if not e: break
      reads[e['name']] = e['seq']
      if e['name'] in observed_reads:
        sys.stderr.write("ERROR observed reads must be uniquely named")
        sys.exit()
      observed_reads.add(e['name'])
  return reads
Exemplo n.º 3
0
def check_for_uniquely_named_reads(args):
    observed_reads = set()
    reads = {}
    if args.fastq_reads:
        gfr = GenericFastqFileReader(args.fastq_reads)
        while True:
            e = gfr.read_entry()
            if not e: break
            reads[e['name']] = e['seq']
            if e['name'] in observed_reads:
                sys.stderr.write("ERROR observed reads must be uniquely named")
                sys.exit()
            observed_reads.add(e['name'])
    elif args.fasta_reads:
        gfr = GenericFastaFileReader(args.fasta_reads)
        while True:
            e = gfr.read_entry()
            if not e: break
            reads[e['name']] = e['seq']
            if e['name'] in observed_reads:
                sys.stderr.write("ERROR observed reads must be uniquely named")
                sys.exit()
            observed_reads.add(e['name'])
    return reads