def run_spades(sample_info, input_seqdir, output_maindir): logger.info("Step: Running Spades assembly") output_subdir = os.path.join(output_maindir, "r01_spadesAssembly") if not os.path.isdir(output_subdir): try: os.mkdir(output_subdir) except EnvironmentError: sys.exit("CRITICAL ERROR: Unable to create the directory") script_file = os.path.join(output_maindir, "s01_spades_script.sh") fh1 = open(script_file, 'w') spades_kmers = config.spades_kmers spades_opts = config.spades_opts for id, data in sample_info.iteritems(): logger.debug("data = {}".format(str(data))) fw_readfile = os.path.join(input_seqdir, str(id) + "_FW_CLEAN.fastq") rv_readfile = os.path.join(input_seqdir, str(id) + "_RV_CLEAN.fastq") if os.access(fw_readfile, os.R_OK) and os.access(rv_readfile, os.R_OK): spades_dir_final = os.path.join(output_subdir, str(id)) cmd = "spades.py -k {} {} --pe2-1 {} --pe2-2 {} -o {}" \ .format(spades_kmers, spades_opts, fw_readfile, rv_readfile, spades_dir_final) logger.debug("cmd = {}".format(str(cmd))) sample_info[id][1] = 'Y' fh1.write(cmd + "\n") else: logger.exception( "Either file {} is missing or is not readable".format( rv_readfile)) sample_info[id][1] = "N" continue fh1.close() if os.stat(script_file).st_size > 0: sbatch_params = '--mem=30000M --cpus-per-task=8' util.run_sbatch_script(script_file, 1, output_maindir, sbatch_params) sample_info = check_output(sample_info, output_subdir, 'spades') #pass else: logger.error( "The script file {} is empty. Exiting....".format(script_file), exc_info=True) sys.exit(1) logger.info("sample_info = {}".format(str(sample_info))) return sample_info
def fix_paired_reads(sample_info, input_seqdir, output_maindir, step_num, temp_dir): logger.info("Step 5: Fix paired end reads") f_suffix = config_opts.general['file_suffix_fw'] r_suffix = config_opts.general['file_suffix_rv'] output_subdir = os.path.join(output_maindir, "r05_clean_reads") script_file = os.path.join(output_maindir, "s05_fix_pairedend.sh") if not os.path.isdir(output_subdir): try: os.mkdir(output_subdir) except EnvironmentError: sys.exit("CRITICAL ERROR: Unable to create the directory") fh5 = open(script_file, 'w') logger.debug("Step 5 sample info =>{}".format(str(sample_info))) for id, data in sample_info.iteritems(): if data[4] == 'Y': fwin = os.path.join( input_seqdir, "{}_{}_TRIMMED_PAIRED_FILTERED".format(str(id), str(f_suffix)), str(id) + "_clean.fq") rvin = os.path.join( input_seqdir, "{}_{}_TRIMMED_PAIRED_FILTERED".format(str(id), str(r_suffix)), str(id) + "_clean.fq") fwout = os.path.join( output_subdir, "{}_{}_CLEAN.fastq".format(str(id), str(f_suffix))) rvout = os.path.join( output_subdir, "{}_{}_CLEAN.fastq".format(str(id), str(r_suffix))) singletons = os.path.join( output_subdir, "{}_SINGLETON_CLEAN.fastq".format(str(id))) cmd = "repair.sh ow=t in={in_fw} in2={in_rv} out={out_fw} out2={out_rv} outs={out_sl} repair=t"\ .format( in_fw = fwin, in_rv = rvin, out_fw = fwout, out_rv = rvout, out_sl = singletons ) #cmd = "fix_paired_end_MB.py " + fwin + " " + rvin + " " + fwout + " " + rvout logger.debug("cmd = {}".format(str(cmd))) sample_info[id][5] = 'Y' fh5.write(cmd + '\n') else: sample_info[id][5] = 'N' fh5.close() if os.stat(script_file).st_size > 0: sbatch_params = '--mem=30000M' util.run_sbatch_script(script_file, 1, output_maindir, sbatch_params, temp_dir) sample_info = check_output(sample_info, output_subdir, step_num) #pass else: logger.error( "The script file {} is empty. Exiting....".format(script_file), exc_info=True) sys.exit(1) logger.debug("sample info = {}".format(str(sample_info))) return sample_info
def filter_human_reads(sample_info, input_seqdir, output_maindir, step_num, temp_dir): logger.info("Step 4: Filter human reads using deconseq") f_suffix = config_opts.general['file_suffix_fw'] r_suffix = config_opts.general['file_suffix_rv'] output_subdir = os.path.join(output_maindir, "r04_filtered_reads") script_file = os.path.join(output_maindir, "s04_run_deconseq.sh") #deconseq_path = util.find_exe_in_path("deconseq.pl") deconseq_path = config_opts.deconseq['deconseq_path'] if not deconseq_path: logger.warning("Deconseq is not loaded in the module. \ We will use the default option from config file") deconseq_path = config_opts.deconseq['deconseq_path'] deconseq_db = config_opts.deconseq['deconseq_db'] #suffix = deconseq_out_suffix if not os.path.isdir(output_subdir): try: os.mkdir(output_subdir) except EnvironmentError: sys.exit("CRITICAL ERROR: Unable to create the directory") fh4 = open(script_file, 'w') for id, data in sample_info.iteritems(): if data[3] == 'Y': fwin = os.path.join( input_seqdir, "{}_{}_TRIMMED_PAIRED.fastq".format(str(id), str(f_suffix))) rvin = os.path.join( input_seqdir, "{}_{}_TRIMMED_PAIRED.fastq".format(str(id), str(r_suffix))) fwout = os.path.join( output_subdir, "{}_{}_TRIMMED_PAIRED_FILTERED".format(str(id), str(f_suffix))) rvout = os.path.join( output_subdir, "{}_{}_TRIMMED_PAIRED_FILTERED".format(str(id), str(r_suffix))) cmd = "{} -f {} -o {} -dbs {} -id {} ; {} -f {} -o {} -dbs {} -id {}" \ .format(deconseq_path, fwin, fwout, deconseq_db, str(id), deconseq_path, rvin, rvout, deconseq_db, str(id)) logger.debug("cmd = {}".format(str(cmd))) #sample_info[id][4] = 'Y' fh4.write(cmd + '\n') else: sample_info[id][4] = 'N' fh4.close() if os.stat(script_file).st_size > 0: sbatch_params = '--mem=60000M' util.run_sbatch_script(script_file, 1, output_maindir, sbatch_params, temp_dir) sample_info = check_output(sample_info, output_subdir, step_num) #pass else: logger.error( "The script file {} is empty. Exiting....".format(script_file), exc_info=True) sys.exit(1) logger.debug("sample info = {}".format(str(sample_info))) return sample_info
def trim_reads(sample_info, input_seqdir, output_maindir, step_num, temp_dir): logger.info("Step 3: Run Trimmomatic") f_suffix = config_opts.general['file_suffix_fw'] r_suffix = config_opts.general['file_suffix_rv'] output_subdir = os.path.join(output_maindir, "r03_trimmed_reads") script_file = os.path.join(output_maindir, "s03_run_trimmomatic.sh") #trim_home = str(os.getenv('TRIMMOMATIC_HOME',"")) trim_exe = str(util.find_exe_in_path("trimmomatic")) if trim_exe: trim_path = os.path.dirname(os.path.realpath(trim_exe)) trim_home = os.path.dirname(os.path.realpath(trim_path)) #jardir = os.path.join(trim_home, 'trimmomatic-' + str(os.path.basename(trim_home)) + ".jar") logger.debug("trimmomatic home dir = {}".format(trim_home)) else: logger.error("Trimmomatic not installated or in the PATH environemnt") sys.exit(1) if config_opts.trimmomatic['adapt']: logger.debug('trimmomatic adapter information = {}'.format( config_opts.trimmomatic['adapt'])) if config_opts.trimmomatic['adapt'] == "nextera": adapt_path = str( os.path.join(trim_home, "share", "adapters", 'NexteraPE-PE.fa')) elif config_opts.trimmomatic['adapt'] == "truseq": adapt_path = str( os.path.join(trim_home, "share", "adapters", 'TruSeq3-PE-2.fa')) else: logger.error( "Invalid trimmommatic adapter information provided! Exiting..") sys.exit(1) else: logger.error("Invalid trimmommatic adapter information provided! \ Modify the adapter information under trimmomatic section in td_config.cfg file. \ Available options = nextera|truseq") sys.exit(1) logger.debug("trimmomatic adapt = {}".format(str(adapt_path))) leading = config_opts.trimmomatic['leading'] trailing = config_opts.trimmomatic['trailing'] illuminaclip = config_opts.trimmomatic['illuminaclip'] slidingwindow = config_opts.trimmomatic['sliding_window'] minlen = config_opts.trimmomatic['minlen'] if not os.path.isdir(output_subdir): try: os.mkdir(output_subdir) except EnvironmentError: sys.exit("CRITICAL ERROR: Unable to create the directory") fh3 = open(script_file, 'w') logger.debug("sample_info = ".format(str(sample_info))) for id, data in sample_info.iteritems(): logger.debug("data = {}".format(str(data))) #if data[2] == 'Y': fwfile = os.path.join(input_seqdir, "{}_{}.fastq".format(str(id), str(f_suffix))) rvfile = os.path.join(input_seqdir, "{}_{}.fastq".format(str(id), str(r_suffix))) paired_fw = os.path.join( output_subdir, "{}_{}_TRIMMED_PAIRED.fastq".format(str(id), str(f_suffix))) paired_rv = os.path.join( output_subdir, "{}_{}_TRIMMED_PAIRED.fastq".format(str(id), str(r_suffix))) unpaired_fw = os.path.join( output_subdir, "{}_{}_TRIMMED_UNPAIRED.fastq".format(str(id), str(f_suffix))) unpaired_rv = os.path.join( output_subdir, "{}_{}_TRIMMED_UNPAIRED.fastq".format(str(id), str(r_suffix))) #this command has been changed to include a call to the amount of memory required. (Set to 4G) cmd = "JAVA_ARGS=\"-Xmx4096m\" trimmomatic" + " PE -phred33 " + fwfile + " " \ + rvfile + " " + paired_fw + " " + unpaired_fw + " " + paired_rv + " " + unpaired_rv \ + " ILLUMINACLIP:"+ str(adapt_path) + ":" + str(illuminaclip) + " LEADING:" + str(leading) \ + " TRAILING:" + str(trailing) + " SLIDINGWINDOW:" + str(slidingwindow) + " MINLEN:" + str(minlen) #cmd = "trimmomatic" + " PE -phred33 " + fwfile + " " \ # + rvfile + " " + paired_fw + " " + unpaired_fw + " " + paired_rv + " " + unpaired_rv \ # + " ILLUMINACLIP:"+ str(adapt_path) + ":" + str(illuminaclip) + " LEADING:" + str(leading) \ # + " TRAILING:" + str(trailing) + " SLIDINGWINDOW:" + str(slidingwindow) + " MINLEN:" + str(minlen) #cmd = "java -Xms1024m -Xmx1024m -jar " + jardir + " PE -phred33 " + fwfile + " " \ # + rvfile + " " + paired_fw + " " + unpaired_fw + " " + paired_rv + " " + unpaired_rv \ # + " ILLUMINACLIP:"+ str(adapt_path) + ":" + str(illuminaclip) + " LEADING:" + str(leading) \ # + " TRAILING:" + str(trailing) + " SLIDINGWINDOW:" + str(slidingwindow) + " MINLEN:" + str(minlen) logger.debug("cmd = {}".format(str(cmd))) fh3.write(cmd + '\n') fh3.close() if os.stat(script_file).st_size > 0: sbatch_params = '--mem=16000M' util.run_sbatch_script(script_file, 1, output_maindir, sbatch_params, temp_dir) sample_info = check_output(sample_info, output_subdir, step_num) #pass else: logger.error( "The script file {} is empty. Exiting....".format(script_file), exc_info=True) sys.exit(1) logger.debug("sample info = {}".format(str(sample_info))) return sample_info
def quality_check(sample_info, input_seqdir, output_maindir, step_num, temp_dir): logger.info("Step: " + str(step_num) + " Quality check using fastqc") output_subdir = os.path.join(output_maindir, "r0" + str(step_num) + "_fastqc_output") script_file = os.path.join(output_maindir, "s0" + str(step_num) + "-1_run_fastqc.sh") if not os.path.isdir(output_subdir): try: os.mkdir(output_subdir) except EnvironmentError: sys.exit("CRITICAL ERROR: Unable to create the directory") fh2 = open(script_file, 'w') index = int(step_num) - 1 for id, data in sample_info.iteritems(): #if data[index] == 'Y': cmd = "fastqc -o %s %s" % (str( output_subdir), str(os.path.join(input_seqdir, id + "_*.fastq"))) logger.debug("cmd = {}".format(str(cmd))) fh2.write(cmd + '\n') #else: # sample_info[id][step_num] = 'N' fh2.close() if os.stat(script_file).st_size > 0: sbatch_params = '--mem=16000M ' job_status = util.run_sbatch_script(script_file, 1, output_maindir, sbatch_params, temp_dir) logger.info("job status = {}".format(str(job_status))) if job_status == 'SUCCESS': cmd2 = "multiqc --interactive -m fastqc -f -o {} {}".format( str(os.path.join(output_subdir, "zz_multiqc")), str(output_subdir)) logger.debug("cmd2 = {}".format(str(cmd2))) script_file2 = os.path.join( output_maindir, "s0" + str(step_num) + "-2_run_multiqc.sh") fh3 = open(script_file2, 'w') fh3.write(cmd2 + '\n') fh3.close() sbatch_params = '--mem=16000M ' cmd_status = util.run_sbatch_script(script_file2, 1, output_maindir, sbatch_params, temp_dir) # cmd_status = subprocess.check_call(cmd) logger.debug("cmd_status = {}".format(str(cmd_status))) sample_info = check_output(sample_info, output_subdir, step_num) else: logger.exception('Error occurred at quality check step') sys.exit(1) else: logger.error( "The script file {} is empty. Exiting....".format(script_file), exc_info=True) sys.exit(1) logger.debug("sample info = {}".format(str(sample_info))) return sample_info
#cnts=$(echo $(cat $i | wc -l)/4 | bc); echo -e "${i}\t${cnts}" sample_info[id][1] = 'Y' fh1.write(cmd + "\n") else: logger.exception( "Either file {} is missing or is not readable".format( rv_readfile)) sample_info[id][1] = "N" continue fh1.close() if os.stat(script_file).st_size > 0: sbatch_params = '--mem=16000M' job_status = util.run_sbatch_script(script_file, 10, output_maindir, sbatch_params, temp_dir) logger.info("job status = {}".format(str(job_status))) #sample_info = check_output(sample_info, output_subdir, step_num) #pass else: logger.error( "The script file {} is empty. Exiting....".format(script_file), exc_info=True) sys.exit(1) #if len(missing_ids) > 0: # logger.info("missing_ids = " + str(missing_ids)) logger.debug("sample_info = {}".format(str(sample_info))) return sample_info
def run_velvet(sample_info, input_seqdir, output_maindir): logger.info("Step: Running velvet assembly") output_subdir = os.path.join(output_maindir, "r02_velvetAssembly") if not os.path.isdir(output_subdir): try: os.mkdir(output_subdir) except EnvironmentError: sys.exit("CRITICAL ERROR: Unable to create the directory") script_file = os.path.join(output_maindir, "s02_velvet_script.sh") fh1 = open(script_file, 'w') velvet_kmer_start = config.velvet_kmer_start velvet_kmer_end = config.velvet_kmer_end velvet_kmer_step = config.velvet_kmer_step velvet_opts = '-t 8 --verbose' for id, data in sample_info.iteritems(): logger.debug("data = {}".format(str(data))) fw_readfile = os.path.join(input_seqdir, str(id) + "_FW_CLEAN.fastq") rv_readfile = os.path.join(input_seqdir, str(id) + "_RV_CLEAN.fastq") if os.access(fw_readfile, os.R_OK) and os.access(rv_readfile, os.R_OK): velvet_dir_final = os.path.join(output_subdir, str(id)) velvet_prefix = str(config.velvet_prefix + "_" + str(id)) cmd = "VelvetOptimiser.pl -s {} -e {} -f \'-fastq -shortPaired -separate {} {}\' -t 8 --verbose -d {} -p {}" \ .format(velvet_kmer_start, velvet_kmer_end, fw_readfile, rv_readfile, velvet_dir_final, velvet_prefix) logger.debug("cmd = {}".format(str(cmd))) sample_info[id][1] = 'Y' fh1.write(cmd + "\n") else: logger.exception( "Either file {} is missing or is not readable".format( rv_readfile)) sample_info[id][1] = "N" continue fh1.close() if os.stat(script_file).st_size > 0: sbatch_params = '--cpus-per-task=8 --mem=30000M' util.run_sbatch_script(script_file, 1, output_maindir, sbatch_params) sample_info = check_output(sample_info, output_subdir, 'velvet') #pass else: logger.error( "The script file {} is empty. Exiting....".format(script_file), exc_info=True) sys.exit(1) logger.info("sample_info = {}".format(str(sample_info))) return sample_info