def __init__(self,config,key=-1,seq_run=None,**kwargs): if not seq_run is None: self.flowcell_key = seq_run.flowcell_key input_dir = seq_run.output_dir output_dir = os.path.join(config.get("Common_directories","hiseq_run_log"),os.path.basename(seq_run.output_dir)) QsubProcess.__init__(self,config,key=key,output_dir=output_dir,input_dir=input_dir,process_name="illuminate",**kwargs) self.run_qc_metrics_file = os.path.join(output_dir,config.get("Filenames","run_qc_metrics"))
def __init__(self,config,key=int(-1),pipeline_config=None,process_name='md5_check_sum',pipeline=None,input_dir=None,sample_key=None,**kwargs): if not pipeline is None: self.flowcell_key = pipeline.flowcell_key self.md5_file = os.path.join(pipeline.input_dir,pipeline.sample_key + "_checksum.txt") QsubProcess.__init__(self,config,key=key,output_dir=pipeline.input_dir,input_dir=pipeline.input_dir,process_name=process_name,**kwargs) elif not input_dir is None and not sample_key is None: self.md5_file = os.path.join(input_dir,sample_key + "_checksum.txt") QsubProcess.__init__(self,config,key=key,output_dir=input_dir,input_dir=input_dir,process_name=process_name,**kwargs)
def __init__(self,config,key=int(-1),pipeline_config=None,prev_step=None,process_name='index_report',pipeline=None,flowcell_key=None,**kwargs): if not prev_step is None: self.flowcell_key = pipeline.flowcell_key output_dir = os.path.join(prev_step.output_dir,"Undetermined_indices") QsubProcess.__init__(self,config,key=key,output_dir=output_dir,input_dir="None",process_name=process_name,**kwargs) elif not flowcell_key is None: self.flowcell_key = flowcell_key QsubProcess.__init__(self,config,key=key,input_dir="None",process_name=process_name,**kwargs)
def __init__(self,config,key=-1,prev_step=None,pipeline=None,split_by_lane=True,split_by_index_length=True,process_name="casava",**kwargs): """ In addition to initializing, other steps are completed. These are commented below. """ if not prev_step is None: input_dir = os.path.join(pipeline.output_dir,"Data/Intensities/BaseCalls") output_dir = os.path.join(pipeline.output_dir,os.path.basename(pipeline.output_dir)) if not os.path.exists(output_dir): os.makedirs(output_dir) if pipeline.sample_sheet is None: original_sample_sheet_file = os.path.join(pipeline.input_dir,"SampleSheet.csv") else: original_sample_sheet_file = pipeline.sample_sheet if not os.path.isfile(original_sample_sheet_file):#Check to make sure original sample sheet exists send_missing_sample_sheet_email(original_sample_sheet_file) raise SampleSheetFormatException("No sample sheet found: "+str(original_sample_sheet_file)) sample_sheet_obj_list = SampleSheetObjList(sample_sheet_file=original_sample_sheet_file) sample_sheet_obj_list.list[0].sample_sheet_table.__write_file__(os.path.join(output_dir,"SampleSheet.csv"))#Copy sample sheet to final output dir. self.merged = True split_categories = [] self.split_by_lane = split_by_lane if split_by_lane is True: #Split by lane (speed up especially for high throughput) sample_sheet_obj_list = sample_sheet_obj_list.__partition_sample_sheet_objects__("Lane") split_categories.append("Lane") self.merged = False self.split_by_index_length = split_by_index_length if split_by_index_length == True: #Split by index lane (prevents casava from breaking when pool samples have different index lengths) for sample_sheet_obj in sample_sheet_obj_list.list: sample_sheet_obj.__attach_max_column_number__("Index") sample_sheet_obj_list = sample_sheet_obj_list.__partition_sample_sheet_objects__("Index",use_length=True) split_categories.append("Index_length") self.merged = False number_tasks = len(sample_sheet_obj_list.list) temporary_output_directories = sample_sheet_obj_list.__create_meta_directories_and_write_files__(os.path.join(output_dir,"split"),split_categories) self.temporary_output_dir = ":".join(temporary_output_directories) sample_sheets = [os.path.join(d,"SampleSheet.csv") for d in temporary_output_directories] self.sample_sheet = ":".join(sample_sheets) sample_sheet_obj_list.__attach_masks__(run_parameters_path=os.path.join(pipeline.input_dir,"runParameters.xml")) masks = [] for sample_sheet_obj in sample_sheet_obj_list.list: mask = sample_sheet_obj.__get_meta_datum__("mask") mask, number = re.subn(',','-',mask) masks.append(mask) self.mask = ":".join(masks) QsubProcess.__init__(self,config,key=key,output_dir=output_dir,input_dir=input_dir,number_tasks=number_tasks,process_name=process_name,**kwargs) self.flowcell_key = pipeline.flowcell_key self.seq_run_key = pipeline.seq_run_key
def __init__(self,config,sample_keys=None,number=None,key=int(-1),flowcell=None,input_dir=None,base_output_dir=None,output_dir=None,date=strftime("%Y%m%d",localtime()),time=strftime("%H:%M:%S",localtime()),process_name='flowcell_report',complete_file=None,**kwargs): """ Initializes flowcell statistic report. """ if flowcell is None: flowcell = Flowcell(config,key="dummy_flowcell_key") if flowcell.__class__.__name__ != "Flowcell": raise Exception("Trying to start a flowcell statistics reports object on a non-flowcell.") if output_dir is None: if base_output_dir is None: base_output_dir = config.get('Common_directories','flowcell_reports') self.output_dir = os.path.join(os.path.join(base_output_dir,flowcell.key + "_reports"),str(number)) else: self.output_dir = output_dir if complete_file is None: self.complete_file = os.path.join(self.output_dir,"report_" + str(number) + ".complete") else: self.complete_file = complete_file QsubProcess.__init__(self,config,key=key,input_dir=input_dir,base_output_dir=base_output_dir,output_dir=self.output_dir,date=date,time=time,process_name=process_name,complete_file=self.complete_file,**kwargs) self.flowcell_key = flowcell.key if sample_keys is None: self.sample_keys = "" else: self.sample_keys = ";".join(sample_keys) self.number = number #List of samples from the project self.all_samples_file = os.path.join(self.output_dir,'all_samples.ls') if self.key != -1: write_list_file(sample_keys,self.all_samples_file,original_list_file=config.get('Filenames','all_samples')) self.current_samples_file = os.path.join(self.output_dir,'current_samples.ls') if self.key != -1: write_list_file(sample_keys,self.current_samples_file) #Output files self.full_report = os.path.join(self.output_dir,'all_samples_report.csv') self.current_report = os.path.join(self.output_dir,'current_samples_report.csv') self.concordance_jpeg = os.path.join(self.output_dir,'concordance_vs_depth.jpeg') self.dbsnp_jpeg = os.path.join(self.output_dir,'dbsnp_vs_depth.jpeg') self.greater_than_10x_jpeg = os.path.join(self.output_dir,'greater_than_10x_vs_depth.jpeg') self.zero_coverage_jpeg = os.path.join(self.output_dir,'zero_coverage_vs_depth.jpeg') self.hethomratio_jpeg = os.path.join(self.output_dir,'hethomratio_vs_depth.jpeg') self.reads_jpeg = os.path.join(self.output_dir,'reads_vs_depth.jpeg') self.report_pdf = os.path.join(self.output_dir,self.flowcell_key + '_report.pdf') #Flag to keep track if report has been sent self.report_sent = False
def __init__(self,config,key=int(-1),input_dir=None,process_name='dnanexus_upload',pipeline_config=None,pipeline=None,**kwargs): """ Initializes the upload process object. """ if not pipeline_config is None: output_name = os.path.basename(pipeline.input_dir) output_dir = os.path.join(pipeline_config.safe_get("Common_directories","dnanexus_storage"),output_name) QsubProcess.__init__(self,config,key=key,input_dir=pipeline.input_dir,output_dir=output_dir,process_name=process_name,**kwargs) self.flowcell_key = pipeline.flowcell_key flowcell_dir = os.path.basename(pipeline.input_dir.rstrip('/')) self.run_qc_metrics_path = os.path.join(config.get('Common_directories','hiseq_run_log'),flowcell_dir + "/run_qc_metrics.txt") if not os.path.isfile(self.run_qc_metrics_path): #Send an email that run qc metrics file is missing. subject = "Missing run_qc_metrics for " + self.flowcell_key message = "The run qc metrics file in the following path is missing:\n\t" + self.run_qc_metrics_path message += "\nUploading to DNANexus failed." recipients = pipeline_config.safe_get("Email","standard_recipients") send_email(subject,message,recipients) self.flowcell_dir_name = os.path.basename(self.input_dir) self.hiseq_run_log_dir = os.path.join(config.get("Common_directories","hiseq_run_log"),self.flowcell_dir_name) #Look at other object to how to get things from the sys config. self.upload_failed = False
def __init__(self,config,key=int(-1),rm_dir=None,process_name='clean',**kwargs): if not rm_dir is None: QsubProcess.__init__(self,config,key=key,input_dir=rm_dir,process_name=process_name,**kwargs)
def __init__(self,config,key=-1,output_dir=None,input_dir=None,process_name='generic_copy',**kwargs): if not input_dir is None: QsubProcess.__init__(self,config,key=key,output_dir=output_dir,input_dir=input_dir,process_name=process_name,**kwargs)