def __main__(): try: fastqFile = sys.argv[1] outFile = sys.argv[2] subCnt = int(sys.argv[3]) # sub sample count except: print __doc__ sys.exit(-1) ## open the file ffh = helper._open_file(fastqFile) ## counting number of reads in fastq file read_cnt = 0 for rec in SeqIO.parse(ffh, 'fastq'): read_cnt += 1 ffh.close() print 'Number of reads in FASTQ: ', read_cnt assert subCnt <= read_cnt, str( subCnt ) + ' (sub-sample count) should be less than total read count ' + str( read_cnt) try: accept_prob = (1.0 * subCnt) / read_cnt except: accept_prob = 1 ## outfile directory check for creating the new file try: subFile = bz2.BZ2File(outFile, 'wb') except Exception as error: sys.exit(error) cnt, sub_cnt = 0, 0 print 'Writing compressed file...' ffh = helper._open_file(fastqFile) for rec in SeqIO.parse(ffh, 'fastq'): rnb = random.random() cnt += 1 if rnb <= accept_prob: sub_cnt += 1 subFile.write(rec.format("fastq")) if subCnt == sub_cnt: print '...done' break ffh.close() subFile.close() print 'Number of reads scanned: ', cnt print 'Number of reads in: ', sub_cnt
def __main__(): try: fastqFile = sys.argv[1] outFile = sys.argv[2] subCnt = int(sys.argv[3]) # sub sample count except: print __doc__ sys.exit(-1) ## open the file ffh = helper._open_file(fastqFile) ## counting number of reads in fastq file read_cnt = 0 for rec in SeqIO.parse(ffh, "fastq"): read_cnt += 1 ffh.close() print "Number of reads in FASTQ: ", read_cnt assert subCnt <= read_cnt, str(subCnt) + " (sub-sample count) should be less than total read count " + str(read_cnt) try: accept_prob = (1.0 * subCnt) / read_cnt except: accept_prob = 1 ## outfile directory check for creating the new file try: subFile = bz2.BZ2File(outFile, "wb") except Exception as error: sys.exit(error) cnt, sub_cnt = 0, 0 print "Writing compressed file..." ffh = helper._open_file(fastqFile) for rec in SeqIO.parse(ffh, "fastq"): rnb = random.random() cnt += 1 if rnb <= accept_prob: sub_cnt += 1 subFile.write(rec.format("fastq")) if subCnt == sub_cnt: print "...done" break ffh.close() subFile.close() print "Number of reads scanned: ", cnt print "Number of reads in: ", sub_cnt
def __main__(): try: fa_name = sys.argv[1] except: print __doc__ sys.exit(-1) seq_info = dict() fah = helper._open_file(fa_name) for rec in SeqIO.parse(fah, "fasta"): seq_info[rec.id] = len(rec.seq) print rec.id, len(rec.seq) fah.close() print print 'Number of FASTA entries: ', len(seq_info) for long_one in sorted(seq_info.items(), key=itemgetter(1), reverse=True): print 'Long contig length (bp): ', long_one[0], long_one[1] break for short_one in sorted(seq_info.items(), key=itemgetter(1)): print 'Short contig length (bp): ', short_one[0], short_one[1] break flength = 0 for ele in sorted(seq_info.items(), key=itemgetter(1)): flength += ele[1] print 'Average length of FASTA contig (bp): ', (flength/len(seq_info)) print