def run_align(inputs, paths_in, paths_out): # all arguments = dict '''Bowtie align''' run = inputs['run_bowtie'] files = inputs['files'] threads = inputs['cores'] # bowtie uses 1 core per instance if not files: print("There are no files") return ladder = [] tRNA = [] rRNA = [] chromosome = [] for fname in files: if not run == 'yes': if not os.path.exists(paths_out['path_chr'] + fname + '_match.SAM'): print "ERROR: " + fname + " has not been aligned, change run settings" continue else: print fname + " has been aligned" continue if not os.path.exists(paths_out['path_filter'] + fname + '-trimmed.fastq'): print "ERROR: " + fname + " has no filtered file, has been removed from analysis" inputs['files'].remove(fname) continue file_log = paths_out['path_log'] + fname + '_bowtie' # bowtie_1 will rewrite log bowtie_1 = '%s -v 2 -y -m 1 -a --best --strata -S -p 2 --un ' bowtie_1 += '%s%s_nomatch.fastq --max %s%s_multi.fastq --al %s%s_match.fastq %s ' bowtie_1 += '%s%s %s%s 1>>%s 2>%s' # bowtie will only add info to log bowtie = '%s -v 2 -y -m 1 -a --best --strata -S -p 2 --un ' bowtie += '%s%s_nomatch.fastq --max %s%s_multi.fastq --al %s%s_match.fastq %s ' bowtie += '%s%s %s%s 1>>%s 2>>%s' # first, align to ladder index to subtract bowtie_ladder = bowtie_1 % ( paths_in['path_bowtie'], paths_out['path_ladder'], fname, paths_out['path_ladder'], fname, paths_out['path_ladder'], fname, paths_in['btindex_ladder'], paths_out['path_filter'], fname + '-trimmed.fastq', paths_out['path_temp'], fname + '_ladder_match.SAM', file_log, file_log) ladder.append(bowtie_ladder) # second, align to ladder index to subtract bowtie_tRNA = bowtie % ( paths_in['path_bowtie'], paths_out['path_trna'], fname, paths_out['path_trna'], fname, paths_out['path_trna'], fname, paths_in['btindex_trna'], paths_out['path_ladder'], fname + '_nomatch.fastq', paths_out['path_temp'], fname + '_tRNA_match.SAM', file_log, file_log) tRNA.append(bowtie_tRNA) # third, align to the rRNA index bowtie_rRNA = bowtie % ( paths_in['path_bowtie'], paths_out['path_rrna'], fname, paths_out['path_rrna'], fname, paths_out['path_rrna'], fname, paths_in['btindex_rrna'], paths_out['path_trna'], fname + '_nomatch.fastq', paths_out['path_temp'], fname + '_rRNA_match.SAM', file_log, file_log) rRNA.append(bowtie_rRNA) # then align to the chr index bowtie_chr = bowtie % (paths_in['path_bowtie'], paths_out['path_chr'], fname, paths_out['path_chr'], fname, paths_out['path_chr'], fname, paths_in['btindex_chr'], paths_out['path_rrna'], fname + '_nomatch.fastq', paths_out['path_chr'], fname + '_match.SAM', file_log, file_log) chromosome.append(bowtie_chr) print "\n------ALIGN------" print '\nFiles to align: ' + ', '.join(files) print "\n\tStarted Bowtie alignment at " + str(datetime.now()) ribo_util.subprocess_wf(ladder, threads) print "\tFinished ladder removal at " + str(datetime.now()) ribo_util.subprocess_wf(tRNA, threads) print "\tFinished tRNA removal at " + str(datetime.now()) ribo_util.subprocess_wf(rRNA, threads) print "\tFinished rRNA removal at " + str(datetime.now()) ribo_util.subprocess_wf(chromosome, threads) print "\tFinished chromosome alignment at " + str(datetime.now()) print "\tCOMPLETED ALIGNING" return
def run_filter(inputs, paths_in, paths_out): # all arguments = dict ''' Filter reads using skewer ''' files = inputs['files'] run = inputs['run_filtering'] minlength = inputs['minlength'] maxlength = inputs['maxlength'] phred_cutoff = inputs['phred_cutoff'] linker = inputs['linker'] threads = inputs['threads'] # filterreads has its own threading, filtering = [] log_data = {} if not files: print("There are no files") return for fname in files: file_in = paths_in['path_fastq'] + fname file_out = paths_out['path_filter'] + fname file_log = paths_out['path_log'] + fname + '_filter' if not run == 'yes': if not os.path.exists(file_out + '-trimmed.fastq'): print "ERROR: " + fname + " has not been filtered, change run setting" continue else: print fname + " has been filtered" continue if not os.path.exists(file_in): print "ERROR: " + fname + " has no FASTQ file, has been removed from analysis" inputs['files'].remove(fname) continue command_to_run = 'skewer -x %s -Q %d -l %d -L %d -o %s --quiet -t %d %s 1>>%s 2>%s' % ( linker, phred_cutoff, minlength, maxlength, file_out, threads, file_in, file_log, file_log) #Add filter parameters to log: log_data['settings'] = { 'linker': linker, 'phred_cutoff': phred_cutoff, 'minlength': minlength, 'maxlength': maxlength } log_function = 'ribo_density' ribo_util.analysis_log(fname, log_function, log_data, paths_in, paths_out) filtering.append(command_to_run) print "-----FILTER-----" print '\nFiles to filter: ' + ', '.join(files) print "Filter parameters are: \nmin length = %s \nmax length = %s \nphred cutoff = %s " % ( minlength, maxlength, phred_cutoff) print "\n\tStarted filtering at " + str(datetime.now()) ribo_util.subprocess_wf(filtering, 1) print "\tFinished filtering at " + str(datetime.now()) print "\tCOMPLETED FILTERING" return inputs
def run_filter(inputs, paths_in, paths_out): # all arguments = dict ''' Filter reads using skewer ''' files = inputs['files'] run = inputs['run_filtering'] minlength = inputs['minlength'] maxlength = inputs['maxlength'] phred_cutoff = inputs['phred_cutoff'] linker = inputs['linker'] threads = inputs['threads'] # filterreads has its own threading, filtering = [] log_data = {} # If using Unique Molecular Index (UMI) in library prep. Skewer will not remove UMI # so we will do it manually after. skewer output file will have UMI naming to identify it: if inputs['run_filter_UMI'] == 'yes': # UMI adds 10 nt to read minlength = minlength + 10 maxlength = maxlength + 10 # for naming: UMI UMI = '_UMI' else: UMI = '' # return error if file names not specified if not files: print("There are no files") return # loop through files to filter for fname in files: file_in = paths_in['path_fastq'] + fname file_out = paths_out['path_filter'] + fname + UMI file_log = paths_out['path_log'] + fname + '_filter' # if skewer filtering isnt needed, skip if not run == 'yes': if not os.path.exists(file_out+'-trimmed.fastq'): print "ERROR: " + fname + " has not been filtered, change run setting" continue else: print fname + " has been filtered" continue # return error if input file missing, and continue to next file if not os.path.exists(file_in): print "ERROR: " + fname + " has no FASTQ file, has been removed from analysis" inputs['files'].remove(fname) continue # make commmand string command_to_run = 'skewer -x %s -Q %d -l %d -L %d -o %s --quiet -t %d %s 1>>%s 2>%s' % ( linker, phred_cutoff, minlength, maxlength, file_out, threads, file_in, file_log, file_log ) #Add filter parameters to log: log_data['settings'] = {'linker': linker, 'phred_cutoff': phred_cutoff, 'minlength': minlength, 'maxlength': maxlength} log_function = 'ribo_density' ribo_util.analysis_log(fname, log_function, log_data, paths_in, paths_out) filtering.append(command_to_run) #print start time and run skewer print "-----FILTER-----" print '\nFiles to filter: ' + ', '.join(files) print "Filter parameters are: \nmin length = %s \nmax length = %s \nphred cutoff = %s " % ( minlength, maxlength, phred_cutoff) print "\n\tStarted filtering at " + str(datetime.now()) ribo_util.subprocess_wf(filtering, 1) print "\tFinished filtering at " + str(datetime.now()) print "\tCOMPLETED FILTERING" return inputs