Exemplo n.º 1
0
 def __init__(self,config,key=-1,input_dir=None,run_qc_metrics_dir=None,flowcell=None,process_name='dnanexusuploadpipeline',running_location='Speed',pipeline_config=None,**kwargs):
     if not input_dir is None:
         GenericPipeline.__init__(self,config,key=key,input_dir=input_dir,output_dir=input_dir,process_name=process_name,**kwargs)
         self.input_dir = input_dir
         self.output_dir = input_dir
         self.flowcell_key = flowcell.key
         self.running_location = running_location
         self.storage_needed = 0
         subject = "Uploading " + self.flowcell_key + " to DNANexus."
         message = "Uploading has been initiated."
         recipients = pipeline_config.safe_get("Email","standard_recipients")
         send_email(subject,message,recipients)
Exemplo n.º 2
0
 def __is_complete__(self,configs,mockdb,*args,**kwargs):
     if GenericProcess.__is_complete__(self,*args,**kwargs):
         return True
     if not os.path.isfile(self.stderr):
         return False
     if os.stat(self.stderr)[6] != 0 and self.upload_failed is False:
         subject = "DNANexus uploading error for " + self.flowcell_key
         message = "DNANexus uploading has encountered an error.  This error is detailed here:\n\t" + self.stderr
         message += "\nThe process has been halted, and the qsub script may be found here:\n\t" + self.qsub_file 
         recipients = configs["pipeline"].safe_get("Email","standard_recipients")
         send_email(subject,message,recipients)
         self.upload_failed = True
         return False
     return True
Exemplo n.º 3
0
 def __is_complete__(self,configs,storage_device):
     """
     Check the complete file of the backup process, retry copying files
     where the keys for the input and output files are not the
     same, and handles notifications (if any).
     """
     if GenericProcess.__is_complete__(self):
         return True
     elif not os.path.isfile(self.complete_file):
         return False
     failed_files = self.__failed_files__(configs['pipeline'])
     if len(failed_files) > 0:
         if self.retry >= configs['pipeline'].get('Backup','retry_threshold'):
             send_email(self.__generate_repeated_error_text__(configs,failed_files))
         self.__fill_qsub_file__(configs,r_list=failed_files)
         self.__launch__(configs,storage_device)
         self.retry += 1
         return False
     return True
Exemplo n.º 4
0
 def __send_reports__(self,config,mockdb):
     """
     For reports that have generated but not been sent,
     this script attaches the appropriate plots and tables
     and sends the email.
     """
     numbers = config.get('Flowcell_reports','numbers').split(',')
     for number in numbers:
         flowcell_report_key = getattr(self,'flowcell_report_' + str(number) + '_key')
         if flowcell_report_key is None:
             continue
         report = mockdb['FlowcellStatisticReport'].objects[flowcell_report_key]
         if report.report_sent is True: #If the report is already sent, next.
             continue
         if not report.__is_complete__(): #If the qsub script is still running, next.
             continue
         if self.sequencing_run_type == 'RapidRun' and str(number) == '16':
             recipients = config.get('Flowcell_reports','last_recipients')
             subject, body = report.__generate_flowcell_report_text__(config,mockdb,report_type="last_report")
             #Add samples to the all sample list
             sample_keys = self.__completed_samples_list__(mockdb)
             write_list_file(sample_keys,config.get('Filenames','all_samples'),original_list_file=config.get('Filenames','all_samples'))
             self.__finish__()
         elif self.sequencing_run_type == 'HighThroughputRun' and str(number) == '64':
             recipients = config.get('Flowcell_reports','last_recipients')
             subject, body = report.__generate_flowcell_report_text__(config,mockdb,report_type="last_report")
             #Add samples to the all sample list
             sample_keys = self.__completed_samples_list__(mockdb)
             write_list_file(sample_keys,config.get('Filenames','all_samples'),original_list_file=config.get('Filenames','all_samples'))
             self.__finish__()
         else:
             recipients = config.get('Flowcell_reports','subset_recipients')
             subject, body = report.__generate_flowcell_report_text__(config,mockdb,report_type="subset_report")
         files = []
         files.append(report.report_pdf)
         files.append(report.full_report)
         files.append(report.current_report)
         send_email(subject,body,recipients=recipients,files=files)
         report.__finish__()
         report.report_sent = True
     return 1
Exemplo n.º 5
0
    def __init__(self,config,key=int(-1),input_dir=None,process_name='dnanexus_upload',pipeline_config=None,pipeline=None,**kwargs):
        """
	  Initializes the upload process object.
        """
        if not pipeline_config is None:
            output_name = os.path.basename(pipeline.input_dir)
            output_dir = os.path.join(pipeline_config.safe_get("Common_directories","dnanexus_storage"),output_name)
            QsubProcess.__init__(self,config,key=key,input_dir=pipeline.input_dir,output_dir=output_dir,process_name=process_name,**kwargs)
	    self.flowcell_key = pipeline.flowcell_key
            flowcell_dir = os.path.basename(pipeline.input_dir.rstrip('/'))
            self.run_qc_metrics_path = os.path.join(config.get('Common_directories','hiseq_run_log'),flowcell_dir + "/run_qc_metrics.txt")
            if not os.path.isfile(self.run_qc_metrics_path):
                #Send an email that run qc metrics file is missing.
                subject = "Missing run_qc_metrics for " + self.flowcell_key
                message = "The run qc metrics file in the following path is missing:\n\t" + self.run_qc_metrics_path
                message += "\nUploading to DNANexus failed." 
                recipients = pipeline_config.safe_get("Email","standard_recipients")
                send_email(subject,message,recipients)
            self.flowcell_dir_name = os.path.basename(self.input_dir)
            self.hiseq_run_log_dir = os.path.join(config.get("Common_directories","hiseq_run_log"),self.flowcell_dir_name) #Look at other object to how to get things from the sys config.
            self.upload_failed = False
Exemplo n.º 6
0
 def __launch__(self,configs,storage_device,node_list=None):
     """
     Checks to make sure there is enough storage.  If
     not, sends email.  If so, sends the job to SGE and 
     records pertinent information.
     """
     #If storage device is full, send a notification and abort.
     if storage_device.__is_full__(configs['pipeline'].get('Storage','required_fastq_size')):
         send_email(self.__generate_full_error_text__(configs,storage_device))
         return False
     #This differs from the previous check by the fact that the previous does not
     #account for jobs that are currently being copied.  This error is not as 
     #restrictive due to the fact that the required_fastq_size should be larger than
     #the actual fastq size thus leaving additional storage once complete.
     if not storage_device.__is_available__(configs['pipeline'].get('Storage','required_fastq_size')) and self.fail_reported == False:
         send_email(self.__generate_storage_error_text__(configs,storage_device))
         self.fail_reported = True
         return False
     if node_list is None:
         node_list = configs['pipeline'].get('Backup','nodes')
     SampleQsubProcess.__launch__(self,configs['system'],node_list=node_list,queue_name='single')
     return True
Exemplo n.º 7
0
 def __archive_sequencing_run_data__(self,configs,input_dir,output_dir):
     """
     This archives the pertinent sequencing run data (a small amount, 
     which is why it is not delegated to qsub) that comes directly
     from the HiSeq machines.
     """
     if not os.path.exists(output_dir):
         os.makedirs(output_dir)
     missing_paths = []
     sub_dirs = ["InterOp"]
     self.flowcell_content_found = True
     if not hasattr(self,"flowcell_content_reported") or self.flowcell_content_reported is None:
         self.flowcell_content_reported = False
     for sub_dir in sub_dirs:
         if os.path.isdir(os.path.join(input_dir,sub_dir)):
             if not os.path.exists(os.path.join(output_dir,sub_dir)):
                 shutil.copytree(os.path.join(input_dir,sub_dir),os.path.join(output_dir,sub_dir))
         else:
             missing_paths.append(os.path.join(input_dir,sub_dir))
             self.flowcell_content_found = False
     files = ["First_Base_Report.htm","RunInfo.xml","runParameters.xml"]
     for file in files:
         if os.path.isfile(os.path.join(input_dir,file)):
             shutil.copy(os.path.join(input_dir,file),output_dir)
         else:
             missing_paths.append(os.path.join(input_dir,file))
             self.flowcell_content_found = False
     if not self.flowcell_content_found:
         if not self.flowcell_content_reported:        
             message = "The flowcell "+self.flowcell_key+" has finished copying over but is missing the following paths:\n"
             message += "\n".join(missing_paths)
             message += "\nPlease check.\n\n"
             recipients = configs["system"].get('Email','recipients_for_fastq')
             send_email("Missing flowcell data.",message,recipients=recipients)  
             self.flowcell_content_reported = True
         return False
     return True
Exemplo n.º 8
0
 def __is_complete__(self,configs,mockdb,*args,**kwargs):
     """
     Due to the inclusion of sub-processes (bclto fastq pipeline, illuminate, and launched pipelines),
     this function contains the logic to check to makes sure all of these processes
     have completed successfully.
     """
     if configs["system"].get("Logging","debug") is "True":
         print "Checking to see if seq run is complete (and advancing post-seq run pipeline)"
     if not os.path.isfile(self.complete_file):
         if configs["system"].get("Logging","debug") is "True":
             print "    Missing complete file " + self.complete_file
         return False
     if not hasattr(self,"interop_archived") or self.interop_archived is False:
         output_name = os.path.basename(self.output_dir)
         if not self.__archive_sequencing_run_data__(configs,self.output_dir,os.path.join(configs["system"].get('Common_directories','hiseq_run_log'),output_name)):
             if configs["system"].get("Logging","debug") is "True":
                 print "    Sequencing run data not archived (InterOp and so forth)."
             return False
     if not hasattr(self,"bcltofastq_pipeline_key") or self.bcltofastq_pipeline_key is None or not hasattr(self,"illuminate_key") or self.illuminate_key is None:
         if not hasattr(self,"bcltofastq_pipeline_key") or self.bcltofastq_pipeline_key is None:
             self.__start_bcltofastq_pipeline__(configs,mockdb)
             if configs["system"].get("Logging","debug") is "True":
                 print "    Starting bcltofastq pipeline."
         if not hasattr(self,"illuminate_key") or self.illuminate_key is None:
             self.__launch_illuminate__(configs,mockdb)
             if configs["system"].get("Logging","debug") is "True":
                 print "    Starting illuminate."
         return False
     illuminate = mockdb['Illuminate'].__get__(configs['system'],self.illuminate_key)
     if not illuminate.__is_complete__(configs,mockdb=mockdb,*args,**kwargs):
         if configs["system"].get("Logging","debug") is "True":
             print "    Illuminate not done"
         return False
     bcl2fastq_pipeline = mockdb['BclToFastqPipeline'].__get__(configs['system'],self.bcltofastq_pipeline_key)
     if not bcl2fastq_pipeline.__is_complete__(configs,mockdb=mockdb,*args,**kwargs):
         if configs["system"].get("Logging","debug") is "True":
             print "    bcltofastq not done"
         return False
     if not hasattr(self,"generic_copy_key") or self.generic_copy_key is None:
         self.__launch_archive_fastq__(configs,mockdb)
         if configs["system"].get("Logging","debug") is "True":
             print "    Launching archive"
         return False
     archive = mockdb['GenericCopy'].__get__(configs['system'],self.generic_copy_key)
     if archive.__is_complete__(*args,**kwargs):
         if not disk_usage(self.fastq_archive) > 30000000:
             if not hasattr(self,'fastq_archive_reported') or self.fastq_archive_reported is None:
                 message = "The flowcell "+self.flowcell_key+" has finished casava, but the archive is not as large as expected.\n"
                 message += "\nPlease check.\n\n"
                 recipients = configs["system"].get('Email','recipients_for_fastq')
                 send_email("Flowcell size problem.",message,recipients=recipients)  
                 self.fastq_archive_reported = True
             return False
         fastq_check = check_fastq_output(self.fastq_archive)
         if fastq_check["md5"] == [] and fastq_check["fastqc"] == [] and fastq_check["index"] is True and fastq_check["sample_sheet"] is True:
             if not hasattr(self,"fastq_check_report") or self.fastq_check_report is None:
                 message = "Just informing you of the completion of the flowcell.\n"
                 recipients = configs["system"].get('Email','recipients_for_fastq')
                 send_email("The fastq have been successully generated for " + self.flowcell_key + ".",message,recipients=recipients)  
                 self.fastq_check_report = True
         else:              
             if not hasattr(self,"fastq_check_report") or self.fastq_check_report is None:
                 message = "Report detailing the issues with the flowcell directory for flowcell " + self.flowcell_key + ".\n"
                 if not fastq_check["sample_sheet"] is True:
                     message += "Sample sheet missing from " + self.archive_fastq + ".\n"
                 else:
                     if not fastq_check["index"]:
                         message += "Index counts not generated.\n"
                     if len(fastq_check["fastqc"]) != 0:
                         message += "The following directories do not have fastqc results:"
                         message += "\n\t".join(fastq_check["fastqc"]) + "\n"
                     if len(fastq_check["md5"]) != 0:
                         message += "The following directories do not have md5 checksums:"
                     message += "\n\t".join(fastq_check["md5"]) + "\n"
                 recipients = configs["system"].get('Email','recipients_for_fastq')
                 send_email("Problem with fastq generation for " + self.flowcell_key + ".",message,recipients=recipients)  
                 self.fastq_check_report = True
             return False
             
         if not hasattr(self,"generic_clean_key") or self.generic_clean_key is None:
             if hasattr(self,'fastq_archive_reported') and self.fastq_archive_reported is True:
                 message = "The flowcell "+self.flowcell_key+" has finished casava, and is now big enough.\n"
                 message += "\nContinuing.\n\n"
                 recipients = configs["system"].get('Email','recipients_for_fastq')
                 send_email("Flowcell size problem resolved.",message,recipients=recipients)  
             self.__launch_clean__(configs,mockdb)
         self.__link_to_web_portal__(configs['system'])
         if configs["system"].get("Logging","debug") is "True":
             print "  Filling stats"
         flowcell = mockdb['Flowcell'].__get__(configs['system'],self.flowcell_key)
         machine = mockdb['HiSeqMachine'].__get__(configs['system'],self.machine_key)
         fill_demultiplex_stats(configs['system'],mockdb,self.output_dir,flowcell,machine)
             #return False
     else:
         if configs["system"].get("Logging","debug") is "True":
             print "    Fastq archive not complete"
         return False     
     clean = mockdb['GenericClean'].__get__(configs['system'],self.generic_clean_key)
     if clean.__is_complete__(*args,**kwargs):
         self.__finish__(*args,**kwargs)
         return True
     return False
Exemplo n.º 9
0
def send_missing_sample_sheet_email(sample_sheet_file):
    message =str(sample_sheet)+" is missing.  Casava cannot run.\n"
    send_email("Missing "+str(sample_sheet),message,recipients='[email protected],[email protected],[email protected]')