Exemplo n.º 1
0
def fastx_barcode_splitter(
        seqs_str,
        output_file_format,
        fastx_barcode_splitter_cmd,
        preprocess_prov,
        err_log,
        stat_log):
    '''
    Runs barcode splitter, returns output data into a dictionary,
    where key represents a barcdoe and sequence string as value
    '''
    bar_seq = ''
    preprocess_prov.append(fastx_barcode_splitter_cmd)
    globaldict = {}
    P1 = Popen(fastx_barcode_splitter_cmd, stdin=PIPE,
               stdout=PIPE, stderr=PIPE, shell=True)
    try:
        std_out, std_err = P1.communicate(seqs_str)
        exit_code = P1.returncode
        yap_log.write_log(fastx_barcode_splitter_cmd, "",
                  exit_code, std_err, err_log, stat_log)
        bar_seq_split = std_out.replace(" ", "").split("|")
        for i in range(0, len(bar_seq_split)):
            if bar_seq_split[i] != '':
                splited_S = bar_seq_split[i].split("=>")
                globaldict[splited_S[0]] = splited_S[1]
                del splited_S
    except Exception as e:
        write_data(str(e), err_log)
    yap_file_io.write_data("\n", err_log)
    yap_file_io.write_data("\n", stat_log)
    return globaldict, preprocess_prov
Exemplo n.º 2
0
def run_fastq_screen(inp_files_list, fastq_screen_cmd):
    ''' 
    Runs fastq screen command, writes log information to the files and returns log data list
    '''
    prov = []
    file_base_name = inp_files_list[2]
    err_log = wd.err_log_path + "/" + file_base_name + "_fastqscreen_err.log"
    stat_log =wd.stat_log_path + "/" + file_base_name + "_fastqscreen_stat.log"
    fastq_screen_cmd += inp_files_list[0] + " " + inp_files_list[1] + " "
    fastq_screen_cmd = fastq_screen_cmd.replace('output_directory',wd.workflow_output_path + "/" + file_base_name + "/" + "no_barcode_specified" + "/" + "preprocess_output")
    fastq_screen_cmd = fastq_screen_cmd.replace('pipe1', '')
    fastq_screen_cmd += " "
    str_out="*" * 50 + "FASTQSCREEN STARTED" + "\t" + str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime())) + "*" * 50 + "\n"
    yap_file_io.write_data(str_out,err_log)
    yap_file_io.write_data(str_out,stat_log)
    prm = Popen(fastq_screen_cmd, stderr=PIPE, shell='False')
    std_out, std_err = prm.communicate()
    exit_code = prm.returncode
    prov.append(fastq_screen_cmd)
    yap_log.write_log(fastq_screen_cmd, file_base_name,
              exit_code, std_err, err_log, stat_log)
    str_out="*" * 50 + "FASTQSCREEN FINISHED" + "\t" + str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime())) + "*" * 50 + "\n"
    yap_file_io.write_data(str_out,err_log)
    yap_file_io.write_data(str_out,stat_log)
    return prov
Exemplo n.º 3
0
def create_thread(thread_name, ss, fifo_name, err_log, stat_log):
        '''
	function to write data to a file or fifo
	'''

	op1 = open(fifo_name, 'a')
	try:
        	op1.writelines(ss)
        except Exception as e:
        	yap_log.write_log("Threading " + thread_name, fifo_name,'EXCEPTION', str(e), err_log, stat_log)
Exemplo n.º 4
0
def align_func(aligner_cmd,aligner_out_basename,err_log,stat_log):
	'''
	function to execute aligner command through subprocess
	'''

	try:
            pbow = Popen(aligner_cmd,stdout=PIPE,stderr=PIPE,shell=True,close_fds=True)
            std_out, std_err = pbow.communicate()
            exit_code = pbow.returncode
            seqs_str1 = ''
            seqs_str2 = ''
            yap_log.write_log(aligner_cmd, aligner_out_basename,exit_code, std_err, err_log, stat_log)
            return std_out
        except Exception as e:
            print e
Exemplo n.º 5
0
def run_postprocess_nontee(
        postprocess_compare_arr,
        workflow_prov,
        err_log,
        stat_log):
    '''Executes command through subrocess, writes log information'''
    cmd = postprocess_compare_arr[1]
    try:
        std_out = ''
        std_err = ''
        prun = Popen(cmd, stdout=PIPE, stderr=PIPE, shell='True')
        std_out, std_err = prun.communicate()
        exit_code = prun.returncode
        yap_log.write_log(str(cmd), '', str(exit_code), std_err, err_log, stat_log)
        if cmd != '':
            workflow_prov.append(cmd)
    except Exception as e:
        print "Error : while running postprocess command ", cmd, "\n"
        print e
        yap_log.write_log(str(cmd), '', '', str(e), err_log, stat_log)
    return workflow_prov
Exemplo n.º 6
0
def run_aligner(
        seqs_str1,
        seqs_str2,
        fname,
        chunk_number,
	myrank,
        workflow_prov,
        err_log,
        stat_log):
    '''
    Runs alignment for chunk data or file,
    polishes commands for input/output paths and creates pipes.
    '''
    aligner_out_str = ''
    p1 = []
    n_cmd = len(wd.aligner_cmd_arr)
    for i in range(0, n_cmd):
        scratch_temp_dir = wd.yap_temp_user_dir
        cmd_type = wd.aligner_cmd_arr[i][0]
        cmd_meta_data = wd.aligner_cmd_arr[i][1]
        temp_arr = wd.aligner_cmd_arr[i][2]
        aligner_cmd_name = temp_arr[0][0]
        aligner_cmd = temp_arr[0][1]
        aligner_dir_path, file_name = os.path.split(fname)
        aligner_cmd = aligner_cmd.replace("output_directory", aligner_dir_path)
        aligner_cmd = aligner_cmd.replace("output_file", fname)
        aligner_cmd = aligner_cmd.replace("sample_name", file_name)
        aligner_cmd = aligner_cmd.replace("input_files_path", wd.input_files_path)
        aligner_cmd_tmp = ''
        pipe_var1 = ''
        pipe_var2 = ''
        pipe1_basename = ''
        pipe2_basename = ''
        if aligner_cmd.find("pipe1") != -1:
            pipe_var1 = yap_tools.find_variable("pipe1", aligner_cmd)
        if aligner_cmd.find("pipe2") != -1:
            pipe_var2 = yap_tools.find_variable("pipe2", aligner_cmd)
        if wd.data_distribution_method == "file_based":
            if pipe_var1 != '':
                aligner_cmd = aligner_cmd.replace(
                    pipe_var1, " " + seqs_str1 + " ", 1)
            if pipe_var2 != '':
                aligner_cmd = aligner_cmd.replace(
                    pipe_var2, " " + seqs_str2 + " ", 1)
            aligner_out_str = align_func(aligner_cmd,fname,err_log,stat_log)
        else:
            pipe1_basename = pipe_var1.replace('pipe1',scratch_temp_dir + "/" + file_name + wd.job_id + "_" + wd.random_id + '_pipe_' + str(chunk_number)+ "_" + str(i) + "_1")
            pipe2_basename = pipe_var2.replace('pipe2',scratch_temp_dir + "/" + file_name + wd.job_id + "_" + wd.random_id + '_pipe_' + str(chunk_number)+ "_" + str(i) + "_2")
            if pipe_var1 != '':
                aligner_cmd = aligner_cmd.replace(pipe_var1, " " + pipe1_basename + " ", 1)
            if pipe_var2 != '':
                aligner_cmd = aligner_cmd.replace(pipe_var2, " " + pipe2_basename + " ")
            if pipe_var1 != '' and pipe_var2 != '':
                if os.path.exists(pipe1_basename) != True:
                	os.mkfifo(pipe1_basename)
                try:
                    thread.start_new_thread(create_thread,("thread1",seqs_str1,pipe1_basename,err_log,stat_log))
                except:
                    print "Error: unable to start thread1"
                
		if os.path.exists(pipe2_basename) != True:
                	os.mkfifo(pipe2_basename)
                try:
                    thread.start_new_thread(create_thread,("thread2",seqs_str2,pipe2_basename,err_log,stat_log))
                except:
                    print "Error: unable to start thread2"
                aligner_out_str = align_func(aligner_cmd,fname,err_log,stat_log)
                os.unlink(pipe1_basename)
                os.unlink(pipe2_basename)
            elif pipe_var1 != '' and pipe_var2 == '':
                if os.path.exists(pipe1_basename) != True:
                	os.mkfifo(pipe1_basename)
                try:
                    thread.start_new_thread(create_thread,("thread1",seqs_str1,pipe1_basename,err_log,stat_log))
                except:
                    print "Error: unable to start thread1"
                aligner_out_str = align_func(aligner_cmd,fname,err_log,stat_log)
                os.unlink(pipe1_basename)

            elif pipe_var2 != '' and pipe_var1 == '':
                if os.path.exists(pipe2_basename) != True:
                	os.mkfifo(pipe2_basename)
                try:
                    thread.start_new_thread(create_thread,("thread2",seqs_str2,pipe2_basename,err_log,stat_log))
                except:
                    print "Error: unable to start thread"
                aligner_out_str = align_func(aligner_cmd,fname,err_log,stat_log)
                os.unlink(pipe2_basename)
            else:
                pbow = Popen(aligner_cmd,stdout=PIPE,stderr=PIPE,shell='True',close_fds='True')
                aligner_out_str, std_err = pbow.communicate()
                exit_code = pbow.returncode
                yap_log.write_log( aligner_cmd, fname, exit_code, std_err, err_log, stat_log)
        if aligner_cmd_name != '':
            workflow_prov.append(aligner_cmd)
        alignment_outfile_pos = 0
        format_ext = ''
        alignment_file_ext = ''
        while alignment_outfile_pos != -1:
            aligner_output_filename = ''
            alignment_outfile_pos = aligner_cmd.rfind(fname)
            for jj in range(alignment_outfile_pos, len(aligner_cmd)):
                if aligner_cmd[jj] != ' ':
                    aligner_output_filename += aligner_cmd[jj]
                else:
                    break
            aligner_output_filename_base, alignment_file_ext = os.path.splitext(
                aligner_output_filename)
            if alignment_file_ext == '.gz' or alignment_file_ext == 'bz2':
                aligner_output_filename_base, format_ext = os.path.splitext(
                    aligner_output_filename_base)
            if format_ext == '.sam' or alignment_file_ext == '.sam':
                alignment_outfile_pos = -1
            elif format_ext == '.bam' or alignment_file_ext == '.bam':
                alignment_outfile_pos = -1
            else:
                aligner_cmd_tmp = aligner_cmd[0:alignment_outfile_pos]
                aligner_cmd = aligner_cmd_tmp
	#treat tophat as exception; search if the file has been created and pass for sorting
        #this is because tophat's output filename cannot be customized according YAP output structure
        if re.search('tophat', aligner_cmd_name) is not None:
            if os.path.exists(aligner_dir_path + "/accepted_hits.bam"):
                aligner_output_filename = aligner_dir_path + "/accepted_hits.bam"
            if os.path.exists(aligner_dir_path + "/accepted_hits.sam"):
                aligner_output_filename = aligner_dir_path + "/accepted_hits.sam"
	#After alignment pass data for sorting 
        sort_alignment_output(chunk_number,aligner_cmd_name,aligner_cmd,aligner_output_filename,workflow_prov,err_log,stat_log)
    return aligner_out_str, workflow_prov
Exemplo n.º 7
0
def execute_merge_alignment(
        final_output_name,
        sort_input_files_arr,
        file_type,
        file_name,
        barcode,
        sort_files_cmd,
        workflow_prov,
        err_log,
        stat_log):
    '''
    Executes merge data commands for alignment output data.
    '''
    sort_cmd_input = ''
    sort_input_files_new_arr = []
    if file_type != "sam":
        if len(sort_input_files_arr) > 0:
            if len(sort_input_files_arr) == 1:
                os.rename(sort_input_files_arr[0], final_output_name)
                workflow_prov.append(
                    'RENAMED FILE ' +
                    sort_input_files_arr[0] +
                    ' TO ' +
                    final_output_name)
            else:
                for z in range(0, len(sort_input_files_arr)):
                    sort_cmd_input += sort_input_files_arr[z].strip("\n") + " "
                if wd.alignment_sort_order == "unsorted":
                    sort_files_cmd = "samtools cat -o " + \
                        final_output_name + ' ' + sort_cmd_input
                else:
                    sort_files_cmd = sort_files_cmd + ' ' + \
                        final_output_name + ' ' + sort_cmd_input
                str_out = "*" * 50 + "MERGE ALIGNMENT STARTED" + "\t" + str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime())) + "*" * 50 + "\n"
                yap_file_io.write_data(str_out, err_log)
                yap_file_io.write_data(str_out, stat_log)
                pmerge = Popen(sort_files_cmd, stdout=PIPE, stderr=PIPE, shell='True')
                std_out, std_err = pmerge.communicate()
                exit_code = pmerge.returncode
                yap_log.write_log(sort_files_cmd, str(sort_input_files_arr).lstrip(
                    '[').rstrip(']'), exit_code, std_err, err_log, stat_log)
                str_out = "*" * 50 + "MERGE ALIGNMENT FINISHED" + "\t" + str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime())) + "*" * 50 + "\n"
                yap_file_io.write_data(str_out, err_log)
                yap_file_io.write_data(str_out, stat_log)
                if sort_files_cmd != '':
                    workflow_prov.append(sort_files_cmd)
                if exit_code != 0:
                    if file_name == '':
                        print "Error: chunk merge sort failed for barcode=", barcode, "\n"
                    else:
                        print "Error: chunks  merge sort failed for Filename=", file_name, "barcode=", barcode, "\n"
                for z in range(0, len(sort_input_files_arr)):
                    os.remove(sort_input_files_arr[z])
    else:
        if len(sort_input_files_arr) > 0:
            if len(sort_input_files_arr) == 1:
                os.rename(sort_input_files_arr[0], final_output_name)
                workflow_prov.append(
                    'RENAMED FILE ' +
                    sort_input_files_arr[0] +
                    ' TO ' +
                    final_output_name)
            else:
                str_out = "*" * 50 + "MERGE ALIGNMENT STARTED" + "\t" + str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime())) + "*" * 50 + "\n"
                yap_file_io.write_data(str_out, err_log)
                yap_file_io.write_data(str_out, stat_log)
                for z in range(0, len(sort_input_files_arr)):
                    sam_file_name = sort_input_files_arr[z]
                    sam_file_name_base, ext = os.path.splitext(sam_file_name)
                    sam_to_bam_cmd = "samtools view -bhS " + \
                        sam_file_name + " -o " + sam_file_name_base + ".bam"
                    pconv = Popen(
                        sam_to_bam_cmd, stdout=PIPE, stderr=PIPE, shell='True')
                    std_out, std_err = pconv.communicate()
                    exit_code = pconv.returncode
                    yap_log.write_log(
                        sam_to_bam_cmd,
                        final_output_name,
                        exit_code,
                        std_err,
                        err_log,
                        stat_log)
                    std_out = ""
                    std_err = ""
                    exit_code = 0
                    if exit_code != 0:
                        print " Sam to bam conversion failed"
                    sort_input_files_new_arr.append(
                        sam_file_name_base + '.bam')
                    os.remove(sam_file_name)
                for z in range(0, len(sort_input_files_new_arr)):
                    sort_cmd_input += sort_input_files_new_arr[
                        z].strip("\n") + " "
                if  wd.alignment_sort_order == "unsorted":
                    sort_files_cmd = "samtools cat -o - " + sort_cmd_input + \
                        " | samtools view -h - -o " + final_output_name
                else:
                    sort_files_cmd = sort_files_cmd + ' - ' + ' ' + sort_cmd_input + \
                        " | samtools view -h - -o " + final_output_name
                std_out = ''
                std_err = ''
                pmerge = Popen(
                    sort_files_cmd, stdout=PIPE, stderr=PIPE, shell='False')
                std_out, std_err = pmerge.communicate()
                exit_code = pmerge.returncode
                if sort_files_cmd != '':
                    workflow_prov.append(sort_files_cmd)
                if exit_code != 0:
                    if file_name == '':
                        print "Error: chunk merge sort failed for barcode=", barcode, "\n"
                    else:
                        print "Error: chunks  merge sort failed for Filename=", file_name, "barcode=", barcode, "\n"
                yap_log.write_log(sort_files_cmd, str(sort_input_files_arr).lstrip(
                    '[').rstrip(']'), exit_code, std_err, err_log, stat_log)
                str_out = "*" * 50 + "MERGE ALIGNMENT FINISHED" + "\t" + str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime())) + "*" * 50 + "\n"
                yap_file_io.write_data(str_out, err_log)
                yap_file_io.write_data(str_out, stat_log)
                for z in range(0, len(sort_input_files_new_arr)):
                    os.remove(sort_input_files_new_arr[z])