opt = dict() with open(conf) as handle: for line in handle.readlines(): if not line.startswith("//") and line.rstrip() is not "": val = line.split("=") opt[val[0]] = val[1].rstrip() # filename intermediates saileft = opt[base] + ".left.sai" sairight = opt[base] + ".right.sai" samfile = opt[base] + ".sam" bamclean = opt[base] + ".clean.bam" bamfile = opt[base] + ".bam" bamindex = opt[base] + ".bai" ''' create objects from utility libraries ''' seq_obj = seq_util.SplitFile(opt[numseq]) bwa_obj = align_util.BWA(opt[prefix]) st_obj = samtool_util.Use() ''' split large fastq file into multiple smaller fastq files ''' left_files = seq_obj.split_fastq(opt[seqleft]) right_files = seq_obj.split_fastq(opt[seqright]) ''' do alignments ''' rg = "@RG\tID:" + opt[rgid] + "\tSM:" + opt[rgsm] + "\tLB:" + opt[ rglb] + "\tPL:" + opt[rgpl] + "\tPU:" + opt[rgpu] samfiles = bwa_obj.multifastq_call(left_files, right_files, rg, opt[base]) ''' groom sam files () ''' cmd = "groom_sam.py" bamfiles = list() jobids = list() for eachfile in samfiles:
#!/usr/bin/env python import seq_util import sys try: fastq = sys.argv[1] num_seq = sys.argv[2] except IOError as (errno,strerror): print "usage: break_fastq.py fastq_file number_of_seqs_per_file" seq_obj = seq_util.SplitFile(num_seq) files = seq_obj.split_fastq(fastq) for produce in files: print produce
import seq_util import sys lineperseq = 4 try: fastq = sys.argv[1] num_seq = sys.argv[2] # buffer length, default 100Mb try: buff_len = int(sys.argv[3]) except: buff_len = 100000000 # option to clobber file or leave there and skip try: noclobber = bool(sys.argv[4]) # if defined, translates to true except: noclobber = False except IOError as (errno, strerror): print "usage: break_fastq.py fastq_file number_of_seqs_per_file buffer_length_bytes clobber_file (optional) " seq_obj = seq_util.SplitFile(num_seq, lineperseq) files = seq_obj.split_fastq_buffer(fastq, buff_len, noclobber) for produce in files: print produce