def inspect_reads(fastq_files, output_prefix, quals): """ uncompresses reads, renames reads, and converts quality scores to 'sanger' format """ # setup file iterators filehandles = [open_compressed(f) for f in fastq_files] fqiters = [parse_lines(f, numlines=4) for f in filehandles] output_files = [(output_prefix + "_%d.fq" % (x+1)) for x in xrange(len(fastq_files))] outfhs = [open(f, "w") for f in output_files] qual_func = get_qual_conversion_func(quals) linenum = 0 try: while True: pelines = [it.next() for it in fqiters] for i,lines in enumerate(pelines): # rename read using line number lines[0] = "@%d/%d" % (linenum,i+1) # ignore redundant header lines[2] = "+" # convert quality score to sanger lines[3] = qual_func(lines[3]) print >>outfhs[i], '\n'.join(lines) linenum += 1 except StopIteration: pass except: logging.error("Unexpected error during FASTQ file processing") for f in output_files: if os.path.exists(f): os.remove(f) return config.JOB_ERROR for fh in filehandles: fh.close() logging.debug("Inspected %d fragments" % (linenum)) return config.JOB_SUCCESS
def process_input_reads(fastq_files, output_prefix, quals, trim5, trim3): """ uncompresses reads, renames reads, and converts quality scores to 'sanger' format """ # setup file iterators for input fastq files infhs = [open_compressed(f) for f in fastq_files] fqiters = [parse_lines(f, numlines=4) for f in infhs] # setup output files output_files = [(output_prefix + "_%d.fq" % (x + 1)) for x in xrange(len(fastq_files))] outfhs = [open(f, "w") for f in output_files] read_name_file = output_prefix + ".txt" read_name_fh = open(read_name_file, 'w') # get quality score conversion function qual_func = get_qual_conversion_func(quals) linenum = 1 try: while True: pelines = [it.next() for it in fqiters] # get read1 first line of fq record, and remove "@" symbol read1_name = pelines[0][0][1:] # remove whitespace and/or read number tags /1 or /2 read1_name = read1_name.split()[0].split("/")[0] # write to read name database print >> read_name_fh, read1_name # convert reads for i, lines in enumerate(pelines): # rename read using line number lines[0] = "@%d/%d" % (linenum, i + 1) # ignore redundant header lines[2] = "+" # trim read total_length = len(lines[1]) pos3p = max(trim5 + 1, total_length - trim3) lines[1] = lines[1][trim5:pos3p] lines[3] = lines[3][trim5:pos3p] # convert quality score to sanger lines[3] = qual_func(lines[3]) print >> outfhs[i], '\n'.join(lines) linenum += 1 except StopIteration: pass except: logging.error("Unexpected error during FASTQ file processing") for fh in outfhs: fh.close() read_name_fh.close() for f in output_files: if os.path.exists(f): os.remove(f) if os.path.exists(read_name_file): os.remove(read_name_file) return config.JOB_ERROR # cleanup for fh in infhs: fh.close() for fh in outfhs: fh.close() read_name_fh.close() logging.debug("Inspected %d fragments" % (linenum)) return config.JOB_SUCCESS
def process_input_reads(fastq_files, output_prefix, quals, trim5, trim3): """ uncompresses reads, renames reads, and converts quality scores to 'sanger' format """ # setup file iterators for input fastq files infhs = [open_compressed(f) for f in fastq_files] fqiters = [parse_lines(f, numlines=4) for f in infhs] # setup output files output_files = [(output_prefix + "_%d.fq" % (x+1)) for x in xrange(len(fastq_files))] outfhs = [open(f, "w") for f in output_files] read_name_file = output_prefix + ".txt" read_name_fh = open(read_name_file, 'w') # get quality score conversion function qual_func = get_qual_conversion_func(quals) linenum = 1 try: while True: pelines = [it.next() for it in fqiters] # get read1 first line of fq record, and remove "@" symbol read1_name = pelines[0][0][1:] # remove whitespace and/or read number tags /1 or /2 read1_name = read1_name.split()[0].split("/")[0] # write to read name database print >>read_name_fh, read1_name # convert reads for i,lines in enumerate(pelines): # rename read using line number lines[0] = "@%d/%d" % (linenum,i+1) # ignore redundant header lines[2] = "+" # trim read total_length = len(lines[1]) pos3p = max(trim5+1, total_length - trim3) lines[1] = lines[1][trim5:pos3p] lines[3] = lines[3][trim5:pos3p] # convert quality score to sanger lines[3] = qual_func(lines[3]) print >>outfhs[i], '\n'.join(lines) linenum += 1 except StopIteration: pass except: logging.error("Unexpected error during FASTQ file processing") for fh in outfhs: fh.close() read_name_fh.close() for f in output_files: if os.path.exists(f): os.remove(f) if os.path.exists(read_name_file): os.remove(read_name_file) return config.JOB_ERROR # cleanup for fh in infhs: fh.close() for fh in outfhs: fh.close() read_name_fh.close() logging.debug("Inspected %d fragments" % (linenum)) return config.JOB_SUCCESS