def convert_fastq(fq,ofq,out_lnum=4,out_baseQ=33,tickon = 10000): nreads = preprocess_radtag_lane.get_read_count(fq) lnum,baseQ = preprocess_radtag_lane.get_fastq_properties(fq) fh = preprocess_radtag_lane.smartopen(fq) ofh = preprocess_radtag_lane.smartopen(ofq,'w') for i in xrange(nreads): if i%tickon == 0: print >> sys.stderr, '\r%s / %s (%0.1f%%)' % (i,nreads,(float(i)/nreads)*100), n,s,qs = preprocess_radtag_lane.next_read_from_fh(fh, lnum) ofh.write(preprocess_radtag_lane.as_fq_line(n,s,qs_to_q(qs,baseQ),out_baseQ,out_lnum)) print >> sys.stderr,'\n'
def convert_fastq(fq, ofq, out_lnum=4, out_baseQ=33, tickon=10000): nreads = preprocess_radtag_lane.get_read_count(fq) lnum, baseQ = preprocess_radtag_lane.get_fastq_properties(fq) fh = preprocess_radtag_lane.smartopen(fq) ofh = preprocess_radtag_lane.smartopen(ofq, 'w') for i in xrange(nreads): if i % tickon == 0: print >> sys.stderr, '\r%s / %s (%0.1f%%)' % (i, nreads, (float(i) / nreads) * 100), n, s, qs = preprocess_radtag_lane.next_read_from_fh(fh, lnum) ofh.write( preprocess_radtag_lane.as_fq_line(n, s, qs_to_q(qs, baseQ), out_baseQ, out_lnum)) print >> sys.stderr, '\n'
for paired end, argv: cutsite,fq1,fq2,outfile1,outfile2 ''' import preprocess_radtag_lane import os, sys barcode_len = 5 tick = 10000 #update progress every this-many reads if __name__ == "__main__": if len(sys.argv) == 4: cutsite, fq, outfile = sys.argv[1:] rc = preprocess_radtag_lane.get_read_count(fq) lnum, baseQ = preprocess_radtag_lane.get_fastq_properties(fq) fh = preprocess_radtag_lane.smartopen(fq) ofh = preprocess_radtag_lane.smartopen(outfile, 'w') found = 0 for i in range(rc): if i > 0 and i % tick == 0: print >> sys.stderr, '\r%s / %s (%0.1f%%) found %s (%0.1f%%)' % \ (i,rc,(float(i)/rc)*100,found,(float(found)/i)*100), n, s, q = preprocess_radtag_lane.next_read_from_fh(fh, lnum) if s[barcode_len:barcode_len + len(cutsite)] == cutsite: line = preprocess_radtag_lane.as_fq_line(n, s, q, None, lnum) ofh.write(line) found += 1 ofh.close()
parser.add_argument('infiles',nargs='+',help='2 fastq files corresponding to reads from a single lane/index, and optionally read 2 sequences for that lane/index') opts = parser.parse_args() if opts.flowcell is None or opts.lane is None: raise ValueError, '--flowcell and --lane (and --index as appropriate) must be specified' if len(opts.infiles) != 2: #PE errstr = '2 input files must be specified; got %s ' % len(opts.infiles) raise ValueError, errstr #check fq4-33 for fq in opts.infiles: print >> sys.stderr, '\nfile: %s' % fq lnum,baseQ = preprocess_radtag_lane.get_fastq_properties(fq) print >> sys.stderr, 'lnum: %s\nbaseQ: %s' % (lnum,baseQ) if not (lnum == 4 and baseQ == 33): fqbase,fqext = fq_splitext(fq) prev_fq = '%s.fq%s-%s%s' % (fqbase,lnum,baseQ,fqext) print >> sys.stderr, 'must be 4-line, base 33 fastq to proceed; convert\nnew file will be %s\noriginal kept as %s\n' % (fq,prev_fq) save_previous_and_covert(prev_fq,fq) adapterstype = get_adapterstype(opts.flowcell,opts.lane,opts.index) adaptseq = get_adaptseq() adaptA,adaptB = adaptseq[adapterstype]['r1'],adaptseq[adapterstype]['r2'] print >> sys.stderr, 'use adapterstype: %s\nadaptA: %s\nadaptB: %s' % (adapterstype,adaptA,adaptB) #run seqprep if opts.seqprep_base: sp_base = opts.seqprep_base