def fastx_barcode_splitter(seqs_str, output_file_format, fastx_barcode_splitter_cmd, preprocess_prov, err_log, stat_log): ''' Runs barcode splitter, returns output data into a dictionary, where key represents a barcdoe and sequence string as value ''' bar_seq = '' preprocess_prov.append(fastx_barcode_splitter_cmd) globaldict = {} P1 = Popen(fastx_barcode_splitter_cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE, shell=True) try: std_out, std_err = P1.communicate(seqs_str) exit_code = P1.returncode yap_log.write_log(fastx_barcode_splitter_cmd, "", exit_code, std_err, err_log, stat_log) bar_seq_split = std_out.replace(" ", "").split("|") for i in range(0, len(bar_seq_split)): if bar_seq_split[i] != '': splited_S = bar_seq_split[i].split("=>") globaldict[splited_S[0]] = splited_S[1] del splited_S except Exception as e: write_data(str(e), err_log) yap_file_io.write_data("\n", err_log) yap_file_io.write_data("\n", stat_log) return globaldict, preprocess_prov
def run_fastq_screen(inp_files_list, fastq_screen_cmd): ''' Runs fastq screen command, writes log information to the files and returns log data list ''' prov = [] file_base_name = inp_files_list[2] err_log = wd.err_log_path + "/" + file_base_name + "_fastqscreen_err.log" stat_log = wd.stat_log_path + "/" + file_base_name + "_fastqscreen_stat.log" fastq_screen_cmd += inp_files_list[0] + " " + inp_files_list[1] + " " fastq_screen_cmd = fastq_screen_cmd.replace( 'output_directory', wd.workflow_output_path + "/" + file_base_name + "/" + "no_barcode_specified" + "/" + "preprocess_output") fastq_screen_cmd = fastq_screen_cmd.replace('pipe1', '') fastq_screen_cmd += " " str_out = "*" * 50 + "FASTQSCREEN STARTED" + "\t" + str( time.strftime("%Y/%m/%d %H:%M:%S", time.localtime())) + "*" * 50 + "\n" yap_file_io.write_data(str_out, err_log) yap_file_io.write_data(str_out, stat_log) prm = Popen(fastq_screen_cmd, stderr=PIPE, shell='False') std_out, std_err = prm.communicate() exit_code = prm.returncode prov.append(fastq_screen_cmd) yap_log.write_log(fastq_screen_cmd, file_base_name, exit_code, std_err, err_log, stat_log) str_out = "*" * 50 + "FASTQSCREEN FINISHED" + "\t" + str( time.strftime("%Y/%m/%d %H:%M:%S", time.localtime())) + "*" * 50 + "\n" yap_file_io.write_data(str_out, err_log) yap_file_io.write_data(str_out, stat_log) return prov
def fastx_barcode_splitter( seqs_str, output_file_format, fastx_barcode_splitter_cmd, preprocess_prov, err_log, stat_log): ''' Runs barcode splitter, returns output data into a dictionary, where key represents a barcdoe and sequence string as value ''' bar_seq = '' preprocess_prov.append(fastx_barcode_splitter_cmd) globaldict = {} P1 = Popen(fastx_barcode_splitter_cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE, shell=True) try: std_out, std_err = P1.communicate(seqs_str) exit_code = P1.returncode yap_log.write_log(fastx_barcode_splitter_cmd, "", exit_code, std_err, err_log, stat_log) bar_seq_split = std_out.replace(" ", "").split("|") for i in range(0, len(bar_seq_split)): if bar_seq_split[i] != '': splited_S = bar_seq_split[i].split("=>") globaldict[splited_S[0]] = splited_S[1] del splited_S except Exception as e: write_data(str(e), err_log) yap_file_io.write_data("\n", err_log) yap_file_io.write_data("\n", stat_log) return globaldict, preprocess_prov
def run_fastq_screen(inp_files_list, fastq_screen_cmd): ''' Runs fastq screen command, writes log information to the files and returns log data list ''' prov = [] file_base_name = inp_files_list[2] err_log = wd.err_log_path + "/" + file_base_name + "_fastqscreen_err.log" stat_log =wd.stat_log_path + "/" + file_base_name + "_fastqscreen_stat.log" fastq_screen_cmd += inp_files_list[0] + " " + inp_files_list[1] + " " fastq_screen_cmd = fastq_screen_cmd.replace('output_directory',wd.workflow_output_path + "/" + file_base_name + "/" + "no_barcode_specified" + "/" + "preprocess_output") fastq_screen_cmd = fastq_screen_cmd.replace('pipe1', '') fastq_screen_cmd += " " str_out="*" * 50 + "FASTQSCREEN STARTED" + "\t" + str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime())) + "*" * 50 + "\n" yap_file_io.write_data(str_out,err_log) yap_file_io.write_data(str_out,stat_log) prm = Popen(fastq_screen_cmd, stderr=PIPE, shell='False') std_out, std_err = prm.communicate() exit_code = prm.returncode prov.append(fastq_screen_cmd) yap_log.write_log(fastq_screen_cmd, file_base_name, exit_code, std_err, err_log, stat_log) str_out="*" * 50 + "FASTQSCREEN FINISHED" + "\t" + str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime())) + "*" * 50 + "\n" yap_file_io.write_data(str_out,err_log) yap_file_io.write_data(str_out,stat_log) return prov
def create_thread(thread_name, ss, fifo_name, err_log, stat_log): ''' function to write data to a file or fifo ''' op1 = open(fifo_name, 'a') try: op1.writelines(ss) except Exception as e: yap_log.write_log("Threading " + thread_name, fifo_name,'EXCEPTION', str(e), err_log, stat_log)
def create_thread(thread_name, ss, fifo_name, err_log, stat_log): ''' function to write data to a file or fifo ''' op1 = open(fifo_name, 'a') try: op1.writelines(ss) except Exception as e: yap_log.write_log("Threading " + thread_name, fifo_name, 'EXCEPTION', str(e), err_log, stat_log)
def align_func(aligner_cmd,aligner_out_basename,err_log,stat_log): ''' function to execute aligner command through subprocess ''' try: pbow = Popen(aligner_cmd,stdout=PIPE,stderr=PIPE,shell=True,close_fds=True) std_out, std_err = pbow.communicate() exit_code = pbow.returncode seqs_str1 = '' seqs_str2 = '' yap_log.write_log(aligner_cmd, aligner_out_basename,exit_code, std_err, err_log, stat_log) return std_out except Exception as e: print e
def run_postprocess_nontee(postprocess_compare_arr, workflow_prov, err_log, stat_log): '''Executes command through subrocess, writes log information''' cmd = postprocess_compare_arr[1] try: std_out = '' std_err = '' prun = Popen(cmd, stdout=PIPE, stderr=PIPE, shell='True') std_out, std_err = prun.communicate() exit_code = prun.returncode yap_log.write_log(str(cmd), '', str(exit_code), std_err, err_log, stat_log) if cmd != '': workflow_prov.append(cmd) except Exception as e: print "Error : while running postprocess command ", cmd, "\n" print e yap_log.write_log(str(cmd), '', '', str(e), err_log, stat_log) return workflow_prov
def align_func(aligner_cmd, aligner_out_basename, err_log, stat_log): ''' function to execute aligner command through subprocess ''' try: pbow = Popen(aligner_cmd, stdout=PIPE, stderr=PIPE, shell=True, close_fds=True) std_out, std_err = pbow.communicate() exit_code = pbow.returncode seqs_str1 = '' seqs_str2 = '' yap_log.write_log(aligner_cmd, aligner_out_basename, exit_code, std_err, err_log, stat_log) return std_out except Exception as e: print e
def run_postprocess_nontee( postprocess_compare_arr, workflow_prov, err_log, stat_log): '''Executes command through subrocess, writes log information''' cmd = postprocess_compare_arr[1] try: std_out = '' std_err = '' prun = Popen(cmd, stdout=PIPE, stderr=PIPE, shell='True') std_out, std_err = prun.communicate() exit_code = prun.returncode yap_log.write_log(str(cmd), '', str(exit_code), std_err, err_log, stat_log) if cmd != '': workflow_prov.append(cmd) except Exception as e: print "Error : while running postprocess command ", cmd, "\n" print e yap_log.write_log(str(cmd), '', '', str(e), err_log, stat_log) return workflow_prov
def run_aligner( seqs_str1, seqs_str2, fname, chunk_number, myrank, workflow_prov, err_log, stat_log): ''' Runs alignment for chunk data or file, polishes commands for input/output paths and creates pipes. ''' aligner_out_str = '' p1 = [] n_cmd = len(wd.aligner_cmd_arr) for i in range(0, n_cmd): scratch_temp_dir = wd.yap_temp_user_dir cmd_type = wd.aligner_cmd_arr[i][0] cmd_meta_data = wd.aligner_cmd_arr[i][1] temp_arr = wd.aligner_cmd_arr[i][2] aligner_cmd_name = temp_arr[0][0] aligner_cmd = temp_arr[0][1] aligner_dir_path, file_name = os.path.split(fname) aligner_cmd = aligner_cmd.replace("output_directory", aligner_dir_path) aligner_cmd = aligner_cmd.replace("output_file", fname) aligner_cmd = aligner_cmd.replace("sample_name", file_name) aligner_cmd = aligner_cmd.replace("input_files_path", wd.input_files_path) aligner_cmd_tmp = '' pipe_var1 = '' pipe_var2 = '' pipe1_basename = '' pipe2_basename = '' if aligner_cmd.find("pipe1") != -1: pipe_var1 = yap_tools.find_variable("pipe1", aligner_cmd) if aligner_cmd.find("pipe2") != -1: pipe_var2 = yap_tools.find_variable("pipe2", aligner_cmd) if wd.data_distribution_method == "file_based": if pipe_var1 != '': aligner_cmd = aligner_cmd.replace( pipe_var1, " " + seqs_str1 + " ", 1) if pipe_var2 != '': aligner_cmd = aligner_cmd.replace( pipe_var2, " " + seqs_str2 + " ", 1) aligner_out_str = align_func(aligner_cmd,fname,err_log,stat_log) else: pipe1_basename = pipe_var1.replace('pipe1',scratch_temp_dir + "/" + file_name + wd.job_id + "_" + wd.random_id + '_pipe_' + str(chunk_number)+ "_" + str(i) + "_1") pipe2_basename = pipe_var2.replace('pipe2',scratch_temp_dir + "/" + file_name + wd.job_id + "_" + wd.random_id + '_pipe_' + str(chunk_number)+ "_" + str(i) + "_2") if pipe_var1 != '': aligner_cmd = aligner_cmd.replace(pipe_var1, " " + pipe1_basename + " ", 1) if pipe_var2 != '': aligner_cmd = aligner_cmd.replace(pipe_var2, " " + pipe2_basename + " ") if pipe_var1 != '' and pipe_var2 != '': if os.path.exists(pipe1_basename) != True: os.mkfifo(pipe1_basename) try: thread.start_new_thread(create_thread,("thread1",seqs_str1,pipe1_basename,err_log,stat_log)) except: print "Error: unable to start thread1" if os.path.exists(pipe2_basename) != True: os.mkfifo(pipe2_basename) try: thread.start_new_thread(create_thread,("thread2",seqs_str2,pipe2_basename,err_log,stat_log)) except: print "Error: unable to start thread2" aligner_out_str = align_func(aligner_cmd,fname,err_log,stat_log) os.unlink(pipe1_basename) os.unlink(pipe2_basename) elif pipe_var1 != '' and pipe_var2 == '': if os.path.exists(pipe1_basename) != True: os.mkfifo(pipe1_basename) try: thread.start_new_thread(create_thread,("thread1",seqs_str1,pipe1_basename,err_log,stat_log)) except: print "Error: unable to start thread1" aligner_out_str = align_func(aligner_cmd,fname,err_log,stat_log) os.unlink(pipe1_basename) elif pipe_var2 != '' and pipe_var1 == '': if os.path.exists(pipe2_basename) != True: os.mkfifo(pipe2_basename) try: thread.start_new_thread(create_thread,("thread2",seqs_str2,pipe2_basename,err_log,stat_log)) except: print "Error: unable to start thread" aligner_out_str = align_func(aligner_cmd,fname,err_log,stat_log) os.unlink(pipe2_basename) else: pbow = Popen(aligner_cmd,stdout=PIPE,stderr=PIPE,shell='True',close_fds='True') aligner_out_str, std_err = pbow.communicate() exit_code = pbow.returncode yap_log.write_log( aligner_cmd, fname, exit_code, std_err, err_log, stat_log) if aligner_cmd_name != '': workflow_prov.append(aligner_cmd) alignment_outfile_pos = 0 format_ext = '' alignment_file_ext = '' while alignment_outfile_pos != -1: aligner_output_filename = '' alignment_outfile_pos = aligner_cmd.rfind(fname) for jj in range(alignment_outfile_pos, len(aligner_cmd)): if aligner_cmd[jj] != ' ': aligner_output_filename += aligner_cmd[jj] else: break aligner_output_filename_base, alignment_file_ext = os.path.splitext( aligner_output_filename) if alignment_file_ext == '.gz' or alignment_file_ext == 'bz2': aligner_output_filename_base, format_ext = os.path.splitext( aligner_output_filename_base) if format_ext == '.sam' or alignment_file_ext == '.sam': alignment_outfile_pos = -1 elif format_ext == '.bam' or alignment_file_ext == '.bam': alignment_outfile_pos = -1 else: aligner_cmd_tmp = aligner_cmd[0:alignment_outfile_pos] aligner_cmd = aligner_cmd_tmp #treat tophat as exception; search if the file has been created and pass for sorting #this is because tophat's output filename cannot be customized according YAP output structure if re.search('tophat', aligner_cmd_name) is not None: if os.path.exists(aligner_dir_path + "/accepted_hits.bam"): aligner_output_filename = aligner_dir_path + "/accepted_hits.bam" if os.path.exists(aligner_dir_path + "/accepted_hits.sam"): aligner_output_filename = aligner_dir_path + "/accepted_hits.sam" #After alignment pass data for sorting sort_alignment_output(chunk_number,aligner_cmd_name,aligner_cmd,aligner_output_filename,workflow_prov,err_log,stat_log) return aligner_out_str, workflow_prov
def execute_merge_alignment( final_output_name, sort_input_files_arr, file_type, file_name, barcode, sort_files_cmd, workflow_prov, err_log, stat_log): ''' Executes merge data commands for alignment output data. ''' sort_cmd_input = '' sort_input_files_new_arr = [] if file_type != "sam": if len(sort_input_files_arr) > 0: if len(sort_input_files_arr) == 1: os.rename(sort_input_files_arr[0], final_output_name) workflow_prov.append( 'RENAMED FILE ' + sort_input_files_arr[0] + ' TO ' + final_output_name) else: for z in range(0, len(sort_input_files_arr)): sort_cmd_input += sort_input_files_arr[z].strip("\n") + " " if wd.alignment_sort_order == "unsorted": sort_files_cmd = "samtools cat -o " + \ final_output_name + ' ' + sort_cmd_input else: sort_files_cmd = sort_files_cmd + ' ' + \ final_output_name + ' ' + sort_cmd_input str_out = "*" * 50 + "MERGE ALIGNMENT STARTED" + "\t" + str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime())) + "*" * 50 + "\n" yap_file_io.write_data(str_out, err_log) yap_file_io.write_data(str_out, stat_log) pmerge = Popen(sort_files_cmd, stdout=PIPE, stderr=PIPE, shell='True') std_out, std_err = pmerge.communicate() exit_code = pmerge.returncode yap_log.write_log(sort_files_cmd, str(sort_input_files_arr).lstrip( '[').rstrip(']'), exit_code, std_err, err_log, stat_log) str_out = "*" * 50 + "MERGE ALIGNMENT FINISHED" + "\t" + str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime())) + "*" * 50 + "\n" yap_file_io.write_data(str_out, err_log) yap_file_io.write_data(str_out, stat_log) if sort_files_cmd != '': workflow_prov.append(sort_files_cmd) if exit_code != 0: if file_name == '': print "Error: chunk merge sort failed for barcode=", barcode, "\n" else: print "Error: chunks merge sort failed for Filename=", file_name, "barcode=", barcode, "\n" for z in range(0, len(sort_input_files_arr)): os.remove(sort_input_files_arr[z]) else: if len(sort_input_files_arr) > 0: if len(sort_input_files_arr) == 1: os.rename(sort_input_files_arr[0], final_output_name) workflow_prov.append( 'RENAMED FILE ' + sort_input_files_arr[0] + ' TO ' + final_output_name) else: str_out = "*" * 50 + "MERGE ALIGNMENT STARTED" + "\t" + str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime())) + "*" * 50 + "\n" yap_file_io.write_data(str_out, err_log) yap_file_io.write_data(str_out, stat_log) for z in range(0, len(sort_input_files_arr)): sam_file_name = sort_input_files_arr[z] sam_file_name_base, ext = os.path.splitext(sam_file_name) sam_to_bam_cmd = "samtools view -bhS " + \ sam_file_name + " -o " + sam_file_name_base + ".bam" pconv = Popen( sam_to_bam_cmd, stdout=PIPE, stderr=PIPE, shell='True') std_out, std_err = pconv.communicate() exit_code = pconv.returncode yap_log.write_log( sam_to_bam_cmd, final_output_name, exit_code, std_err, err_log, stat_log) std_out = "" std_err = "" exit_code = 0 if exit_code != 0: print " Sam to bam conversion failed" sort_input_files_new_arr.append( sam_file_name_base + '.bam') os.remove(sam_file_name) for z in range(0, len(sort_input_files_new_arr)): sort_cmd_input += sort_input_files_new_arr[ z].strip("\n") + " " if wd.alignment_sort_order == "unsorted": sort_files_cmd = "samtools cat -o - " + sort_cmd_input + \ " | samtools view -h - -o " + final_output_name else: sort_files_cmd = sort_files_cmd + ' - ' + ' ' + sort_cmd_input + \ " | samtools view -h - -o " + final_output_name std_out = '' std_err = '' pmerge = Popen( sort_files_cmd, stdout=PIPE, stderr=PIPE, shell='False') std_out, std_err = pmerge.communicate() exit_code = pmerge.returncode if sort_files_cmd != '': workflow_prov.append(sort_files_cmd) if exit_code != 0: if file_name == '': print "Error: chunk merge sort failed for barcode=", barcode, "\n" else: print "Error: chunks merge sort failed for Filename=", file_name, "barcode=", barcode, "\n" yap_log.write_log(sort_files_cmd, str(sort_input_files_arr).lstrip( '[').rstrip(']'), exit_code, std_err, err_log, stat_log) str_out = "*" * 50 + "MERGE ALIGNMENT FINISHED" + "\t" + str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime())) + "*" * 50 + "\n" yap_file_io.write_data(str_out, err_log) yap_file_io.write_data(str_out, stat_log) for z in range(0, len(sort_input_files_new_arr)): os.remove(sort_input_files_new_arr[z])
def run_aligner(seqs_str1, seqs_str2, fname, chunk_number, myrank, workflow_prov, err_log, stat_log): ''' Runs alignment for chunk data or file, polishes commands for input/output paths and creates pipes. ''' aligner_out_str = '' p1 = [] n_cmd = len(wd.aligner_cmd_arr) for i in range(0, n_cmd): scratch_temp_dir = wd.yap_temp_user_dir cmd_type = wd.aligner_cmd_arr[i][0] cmd_meta_data = wd.aligner_cmd_arr[i][1] temp_arr = wd.aligner_cmd_arr[i][2] aligner_cmd_name = temp_arr[0][0] aligner_cmd = temp_arr[0][1] aligner_dir_path, file_name = os.path.split(fname) aligner_cmd = aligner_cmd.replace("output_directory", aligner_dir_path) aligner_cmd = aligner_cmd.replace("output_file", fname) aligner_cmd = aligner_cmd.replace("sample_name", file_name) aligner_cmd = aligner_cmd.replace("input_files_path", wd.input_files_path) aligner_cmd_tmp = '' pipe_var1 = '' pipe_var2 = '' pipe1_basename = '' pipe2_basename = '' if aligner_cmd.find("pipe1") != -1: pipe_var1 = yap_tools.find_variable("pipe1", aligner_cmd) if aligner_cmd.find("pipe2") != -1: pipe_var2 = yap_tools.find_variable("pipe2", aligner_cmd) if wd.data_distribution_method == "file_based": if pipe_var1 != '': aligner_cmd = aligner_cmd.replace(pipe_var1, " " + seqs_str1 + " ", 1) if pipe_var2 != '': aligner_cmd = aligner_cmd.replace(pipe_var2, " " + seqs_str2 + " ", 1) aligner_out_str = align_func(aligner_cmd, fname, err_log, stat_log) else: pipe1_basename = pipe_var1.replace( 'pipe1', scratch_temp_dir + "/" + file_name + wd.job_id + "_" + wd.random_id + '_pipe_' + str(chunk_number) + "_" + str(i) + "_1") pipe2_basename = pipe_var2.replace( 'pipe2', scratch_temp_dir + "/" + file_name + wd.job_id + "_" + wd.random_id + '_pipe_' + str(chunk_number) + "_" + str(i) + "_2") if pipe_var1 != '': aligner_cmd = aligner_cmd.replace(pipe_var1, " " + pipe1_basename + " ", 1) if pipe_var2 != '': aligner_cmd = aligner_cmd.replace(pipe_var2, " " + pipe2_basename + " ") if pipe_var1 != '' and pipe_var2 != '': if os.path.exists(pipe1_basename) != True: os.mkfifo(pipe1_basename) try: thread.start_new_thread( create_thread, ("thread1", seqs_str1, pipe1_basename, err_log, stat_log)) except: print "Error: unable to start thread1" if os.path.exists(pipe2_basename) != True: os.mkfifo(pipe2_basename) try: thread.start_new_thread( create_thread, ("thread2", seqs_str2, pipe2_basename, err_log, stat_log)) except: print "Error: unable to start thread2" aligner_out_str = align_func(aligner_cmd, fname, err_log, stat_log) os.unlink(pipe1_basename) os.unlink(pipe2_basename) elif pipe_var1 != '' and pipe_var2 == '': if os.path.exists(pipe1_basename) != True: os.mkfifo(pipe1_basename) try: thread.start_new_thread( create_thread, ("thread1", seqs_str1, pipe1_basename, err_log, stat_log)) except: print "Error: unable to start thread1" aligner_out_str = align_func(aligner_cmd, fname, err_log, stat_log) os.unlink(pipe1_basename) elif pipe_var2 != '' and pipe_var1 == '': if os.path.exists(pipe2_basename) != True: os.mkfifo(pipe2_basename) try: thread.start_new_thread( create_thread, ("thread2", seqs_str2, pipe2_basename, err_log, stat_log)) except: print "Error: unable to start thread" aligner_out_str = align_func(aligner_cmd, fname, err_log, stat_log) os.unlink(pipe2_basename) else: pbow = Popen(aligner_cmd, stdout=PIPE, stderr=PIPE, shell='True', close_fds='True') aligner_out_str, std_err = pbow.communicate() exit_code = pbow.returncode yap_log.write_log(aligner_cmd, fname, exit_code, std_err, err_log, stat_log) if aligner_cmd_name != '': workflow_prov.append(aligner_cmd) alignment_outfile_pos = 0 format_ext = '' alignment_file_ext = '' while alignment_outfile_pos != -1: aligner_output_filename = '' alignment_outfile_pos = aligner_cmd.rfind(fname) for jj in range(alignment_outfile_pos, len(aligner_cmd)): if aligner_cmd[jj] != ' ': aligner_output_filename += aligner_cmd[jj] else: break aligner_output_filename_base, alignment_file_ext = os.path.splitext( aligner_output_filename) if alignment_file_ext == '.gz' or alignment_file_ext == 'bz2': aligner_output_filename_base, format_ext = os.path.splitext( aligner_output_filename_base) if format_ext == '.sam' or alignment_file_ext == '.sam': alignment_outfile_pos = -1 elif format_ext == '.bam' or alignment_file_ext == '.bam': alignment_outfile_pos = -1 else: aligner_cmd_tmp = aligner_cmd[0:alignment_outfile_pos] aligner_cmd = aligner_cmd_tmp #treat tophat as exception; search if the file has been created and pass for sorting #this is because tophat's output filename cannot be customized according YAP output structure if re.search('tophat', aligner_cmd_name) is not None: if os.path.exists(aligner_dir_path + "/accepted_hits.bam"): aligner_output_filename = aligner_dir_path + "/accepted_hits.bam" if os.path.exists(aligner_dir_path + "/accepted_hits.sam"): aligner_output_filename = aligner_dir_path + "/accepted_hits.sam" #After alignment pass data for sorting sort_alignment_output(chunk_number, aligner_cmd_name, aligner_cmd, aligner_output_filename, workflow_prov, err_log, stat_log) return aligner_out_str, workflow_prov
def execute_merge_alignment(final_output_name, sort_input_files_arr, file_type, file_name, barcode, sort_files_cmd, workflow_prov, err_log, stat_log): ''' Executes merge data commands for alignment output data. ''' sort_cmd_input = '' sort_input_files_new_arr = [] if file_type != "sam": if len(sort_input_files_arr) > 0: if len(sort_input_files_arr) == 1: os.rename(sort_input_files_arr[0], final_output_name) workflow_prov.append('RENAMED FILE ' + sort_input_files_arr[0] + ' TO ' + final_output_name) else: for z in range(0, len(sort_input_files_arr)): sort_cmd_input += sort_input_files_arr[z].strip("\n") + " " if wd.alignment_sort_order == "unsorted": sort_files_cmd = "samtools cat -o " + \ final_output_name + ' ' + sort_cmd_input else: sort_files_cmd = sort_files_cmd + ' ' + \ final_output_name + ' ' + sort_cmd_input str_out = "*" * 50 + "MERGE ALIGNMENT STARTED" + "\t" + str( time.strftime("%Y/%m/%d %H:%M:%S", time.localtime())) + "*" * 50 + "\n" yap_file_io.write_data(str_out, err_log) yap_file_io.write_data(str_out, stat_log) pmerge = Popen(sort_files_cmd, stdout=PIPE, stderr=PIPE, shell='True') std_out, std_err = pmerge.communicate() exit_code = pmerge.returncode yap_log.write_log( sort_files_cmd, str(sort_input_files_arr).lstrip('[').rstrip(']'), exit_code, std_err, err_log, stat_log) str_out = "*" * 50 + "MERGE ALIGNMENT FINISHED" + "\t" + str( time.strftime("%Y/%m/%d %H:%M:%S", time.localtime())) + "*" * 50 + "\n" yap_file_io.write_data(str_out, err_log) yap_file_io.write_data(str_out, stat_log) if sort_files_cmd != '': workflow_prov.append(sort_files_cmd) if exit_code != 0: if file_name == '': print "Error: chunk merge sort failed for barcode=", barcode, "\n" else: print "Error: chunks merge sort failed for Filename=", file_name, "barcode=", barcode, "\n" for z in range(0, len(sort_input_files_arr)): os.remove(sort_input_files_arr[z]) else: if len(sort_input_files_arr) > 0: if len(sort_input_files_arr) == 1: os.rename(sort_input_files_arr[0], final_output_name) workflow_prov.append('RENAMED FILE ' + sort_input_files_arr[0] + ' TO ' + final_output_name) else: str_out = "*" * 50 + "MERGE ALIGNMENT STARTED" + "\t" + str( time.strftime("%Y/%m/%d %H:%M:%S", time.localtime())) + "*" * 50 + "\n" yap_file_io.write_data(str_out, err_log) yap_file_io.write_data(str_out, stat_log) for z in range(0, len(sort_input_files_arr)): sam_file_name = sort_input_files_arr[z] sam_file_name_base, ext = os.path.splitext(sam_file_name) sam_to_bam_cmd = "samtools view -bhS " + \ sam_file_name + " -o " + sam_file_name_base + ".bam" pconv = Popen(sam_to_bam_cmd, stdout=PIPE, stderr=PIPE, shell='True') std_out, std_err = pconv.communicate() exit_code = pconv.returncode yap_log.write_log(sam_to_bam_cmd, final_output_name, exit_code, std_err, err_log, stat_log) std_out = "" std_err = "" exit_code = 0 if exit_code != 0: print " Sam to bam conversion failed" sort_input_files_new_arr.append(sam_file_name_base + '.bam') os.remove(sam_file_name) for z in range(0, len(sort_input_files_new_arr)): sort_cmd_input += sort_input_files_new_arr[z].strip( "\n") + " " if wd.alignment_sort_order == "unsorted": sort_files_cmd = "samtools cat -o - " + sort_cmd_input + \ " | samtools view -h - -o " + final_output_name else: sort_files_cmd = sort_files_cmd + ' - ' + ' ' + sort_cmd_input + \ " | samtools view -h - -o " + final_output_name std_out = '' std_err = '' pmerge = Popen(sort_files_cmd, stdout=PIPE, stderr=PIPE, shell='False') std_out, std_err = pmerge.communicate() exit_code = pmerge.returncode if sort_files_cmd != '': workflow_prov.append(sort_files_cmd) if exit_code != 0: if file_name == '': print "Error: chunk merge sort failed for barcode=", barcode, "\n" else: print "Error: chunks merge sort failed for Filename=", file_name, "barcode=", barcode, "\n" yap_log.write_log( sort_files_cmd, str(sort_input_files_arr).lstrip('[').rstrip(']'), exit_code, std_err, err_log, stat_log) str_out = "*" * 50 + "MERGE ALIGNMENT FINISHED" + "\t" + str( time.strftime("%Y/%m/%d %H:%M:%S", time.localtime())) + "*" * 50 + "\n" yap_file_io.write_data(str_out, err_log) yap_file_io.write_data(str_out, stat_log) for z in range(0, len(sort_input_files_new_arr)): os.remove(sort_input_files_new_arr[z])