def initialize_dir_struct(): print "Initializing YAP..." print "Creating output directory structure" #create temporary workspace directory yap_tools.create_dir(wd.yap_temp_user_dir) os.system("mkdir" + " " + wd.workflow_output_path) os.system("mkdir" + " " + wd.temp_dir_path) os.system("mkdir " + wd.consolidated_output) # create output directory for regrouped samples if wd.regroup_output =='yes': os.system("mkdir" + " " + wd.regroup_output_path) for i in wd.regroup_arr: # iterate over regroup arr regroup_title = i[0] file_dir_path = wd.regroup_output_path + "/" + regroup_title os.system("mkdir" + " " + file_dir_path) for barcode in wd.barcode_dict.iterkeys(): barcode_dir_path = file_dir_path + "/" + barcode os.system("mkdir" + " " + barcode_dir_path) preprocess_dir_path = barcode_dir_path + \ "/" + "preprocess_output" aligner_output_dir_path = barcode_dir_path + \ "/" + "aligner_output" barcode_basecount_dir = preprocess_dir_path + \ "/" + "basecount_analysis" postprocess_dir_path = barcode_dir_path + \ "/" + "postprocess_output" os.system("mkdir" + " " + postprocess_dir_path) os.system("mkdir" + " " + preprocess_dir_path) os.system("mkdir" + " " + aligner_output_dir_path) # create log file to track the regroup samples fw = open(wd.sample_track_log, 'wb') for group in wd.inp_files_list: each_file = group[2] file_dir_path = wd.workflow_output_path + "/" + each_file fw.write(each_file + "\t=>\t" + group[0] + "," + group[1] + "\n") os.system("mkdir" + " " + file_dir_path) for barcode in wd.barcode_dict.iterkeys(): barcode_dir_path = file_dir_path + "/" + barcode os.system("mkdir" + " " + barcode_dir_path) preprocess_dir_path = barcode_dir_path + "/" + "preprocess_output" aligner_output_dir_path = barcode_dir_path + "/" + "aligner_output" barcode_basecount_dir = preprocess_dir_path + \ "/" + "basecount_analysis" postprocess_dir_path = barcode_dir_path + \ "/" + "postprocess_output" os.system("mkdir" + " " + postprocess_dir_path) os.system("mkdir" + " " + preprocess_dir_path) os.system("mkdir" + " " + aligner_output_dir_path) fw.close() os.system("mkdir " + wd.log_path) os.system("mkdir " + wd.err_log_path) os.system("mkdir " + wd.stat_log_path) for group in wd.inp_files_list: each_file = group[2] temp_err_path = "" temp_stat_path = "" temp_err_path = wd.err_log_path + "/" + each_file + "_log_temp" temp_stat_path = wd.stat_log_path + "/" + each_file + "_log_temp" os.system("mkdir" + " " + temp_err_path) os.system("mkdir" + " " + temp_stat_path)
def initialize_dir_struct(): print "Initializing YAP..." print "Creating output directory structure" #create temporary workspace directory yap_tools.create_dir(wd.yap_temp_user_dir) os.system("mkdir" + " " + wd.workflow_output_path) os.system("mkdir" + " " + wd.temp_dir_path) os.system("mkdir " + wd.consolidated_output) # create output directory for regrouped samples if wd.regroup_output == 'yes': os.system("mkdir" + " " + wd.regroup_output_path) for i in wd.regroup_arr: # iterate over regroup arr regroup_title = i[0] file_dir_path = wd.regroup_output_path + "/" + regroup_title os.system("mkdir" + " " + file_dir_path) for barcode in wd.barcode_dict.iterkeys(): barcode_dir_path = file_dir_path + "/" + barcode os.system("mkdir" + " " + barcode_dir_path) preprocess_dir_path = barcode_dir_path + \ "/" + "preprocess_output" aligner_output_dir_path = barcode_dir_path + \ "/" + "aligner_output" barcode_basecount_dir = preprocess_dir_path + \ "/" + "basecount_analysis" postprocess_dir_path = barcode_dir_path + \ "/" + "postprocess_output" os.system("mkdir" + " " + postprocess_dir_path) os.system("mkdir" + " " + preprocess_dir_path) os.system("mkdir" + " " + aligner_output_dir_path) # create log file to track the regroup samples fw = open(wd.sample_track_log, 'wb') for group in wd.inp_files_list: each_file = group[2] file_dir_path = wd.workflow_output_path + "/" + each_file fw.write(each_file + "\t=>\t" + group[0] + "," + group[1] + "\n") os.system("mkdir" + " " + file_dir_path) for barcode in wd.barcode_dict.iterkeys(): barcode_dir_path = file_dir_path + "/" + barcode os.system("mkdir" + " " + barcode_dir_path) preprocess_dir_path = barcode_dir_path + "/" + "preprocess_output" aligner_output_dir_path = barcode_dir_path + "/" + "aligner_output" barcode_basecount_dir = preprocess_dir_path + \ "/" + "basecount_analysis" postprocess_dir_path = barcode_dir_path + \ "/" + "postprocess_output" os.system("mkdir" + " " + postprocess_dir_path) os.system("mkdir" + " " + preprocess_dir_path) os.system("mkdir" + " " + aligner_output_dir_path) fw.close() os.system("mkdir " + wd.log_path) os.system("mkdir " + wd.err_log_path) os.system("mkdir " + wd.stat_log_path) for group in wd.inp_files_list: each_file = group[2] temp_err_path = "" temp_stat_path = "" temp_err_path = wd.err_log_path + "/" + each_file + "_log_temp" temp_stat_path = wd.stat_log_path + "/" + each_file + "_log_temp" os.system("mkdir" + " " + temp_err_path) os.system("mkdir" + " " + temp_stat_path)
exit() #communicate all the workflows to all processors workflow_struct = comm.bcast(workflow_struct, root=0) #loop over each workflow for data processing for wk in range(1, len(workflow_struct)): #store the current workflow details into a dictionary workflow_config_dict = workflow_struct[wk] #make workflow configuration variables global workflow_obj=wd.workflow_dictionary() workflow_obj.make_global(workflow_config_dict) #summary file to store all the workflow provenance details f_summary_file = wd.workflow_output_path + "/" + wd.workflow_name + "_workflow_summary.txt" basecount_metrics_flag = '' file_basecount_dict=wd.file_basecount_dict #create temp directory, this is local for everynode yap_tools.create_dir(wd.yap_temp_user_dir) if (myrank == 0): #create output directory structure yap_init.initialize_dir_struct() # printing analysis summary yap_print_info.print_info() str_out= "-"*20 +" PROVENANCE "+ "-"*20 +"\n\n" yap_file_io.write_data(str_out,f_summary_file) comm.barrier() if wd.run_preprocess_analysis == "yes": #if preprocess is set to 'yes', perform initial qc commands for i in range(0, len(wd.preprocess_cmd_arr)): preprocess_cmd_name = wd.preprocess_cmd_arr[i][2][0][0] preprocess_cmd = wd.preprocess_cmd_arr[i][2][0][1] if re.search('calculate_basecount_metrics', preprocess_cmd_name) is not None: basecount_metrics_flag = 'True'