Пример #1
0
def things_to_do_if_initializing_pipeline_with_input_directory(configs,storage_devices,mockdb,source_dir,pipeline_name=None,base_output_dir=None,combine_projects=True):
    if combine_project:
        sample_dirs["dummy_project"] = list_sample_dirs(source_dir)
    else:
        sample_dirs = list_project_sample_dirs(source_dir)
    target_config = MyConfigParser()
    target_config.read(configs["system"].get("Filenames","target_config"))
    for project in sample_dirs:
        for sample in sample_dirs[project]:
            running_location = identify_running_location_with_most_currently_available(configs,storage_devices)
            parsed = parse_sample_sheet(configs['system'],mockdb,sample_dirs[project][sample][0])
            if base_output_dir is None:
                base_output_dir = configs['pipeline'].get('Common_directories','archive_directory')
            automation_parameters_config = MyConfigParser()
            automation_parameters_config.read(configs["system"].get("Filenames","automation_config"))
            description_dict = parse_description_into_dictionary(parsed['description'])
            if 'Pipeline' in description_dict:
                pipeline_key =  description_dict['Pipeline']
            else:
                description_pieces = parsed['description'].split('_')
                pipeline_key = description_pieces[-1]
            pipeline_name_for_sample = automation_parameters_config.safe_get("Pipeline",pipeline_key)
            if not pipeline_name_for_sample == pipeline_name:
                continue
            mockdb[pipeline_name].__new__(configs['system'],input_dir=sample_dirs[project][sample][0],pipeline_config=configs["pipeline"],project=parsed['project_name'],pipeline_key=pipeline_key,**parsed)
            flowcell_dict = mockdb['SequencingRun'].__attribute_value_to_object_dict__('flowcell_key')
            flowcell_dict = mockdb['SequencingRun'].__attribute_value_to_object_dict__('flowcell_key')
            if parsed['flowcell'].key in flowcell_dict:
                seq_run = flowcell_dict[parsed['flowcell'].key]
                pass
            else:
                try:
                    base_dir = get_sequencing_run_base_dir(source_dir)
                    [date,machine_key,run_number,side,flowcell_key] = parse_sequencing_run_dir(base_dir)
                    machine = mockdb['HiSeqMachine'].__get__(configs['system'],machine_key)
                    run_type = determine_run_type(base_dir)
                    seq_run = mockdb['SequencingRun'].__new__(configs['system'],flowcell,machine,date,run_number,output_dir=base_dir,side=side,run_type=run_type)
                    fill_demultiplex_stats(configs['system'],mockdb,seq_run.output_dir,flowcell,machine)
                except:
                    pass
    return 1
Пример #2
0
 def __is_complete__(self,configs,mockdb,*args,**kwargs):
     """
     Due to the inclusion of sub-processes (bclto fastq pipeline, illuminate, and launched pipelines),
     this function contains the logic to check to makes sure all of these processes
     have completed successfully.
     """
     if configs["system"].get("Logging","debug") is "True":
         print "Checking to see if seq run is complete (and advancing post-seq run pipeline)"
     if not os.path.isfile(self.complete_file):
         if configs["system"].get("Logging","debug") is "True":
             print "    Missing complete file " + self.complete_file
         return False
     if not hasattr(self,"interop_archived") or self.interop_archived is False:
         output_name = os.path.basename(self.output_dir)
         if not self.__archive_sequencing_run_data__(configs,self.output_dir,os.path.join(configs["system"].get('Common_directories','hiseq_run_log'),output_name)):
             if configs["system"].get("Logging","debug") is "True":
                 print "    Sequencing run data not archived (InterOp and so forth)."
             return False
     if not hasattr(self,"bcltofastq_pipeline_key") or self.bcltofastq_pipeline_key is None or not hasattr(self,"illuminate_key") or self.illuminate_key is None:
         if not hasattr(self,"bcltofastq_pipeline_key") or self.bcltofastq_pipeline_key is None:
             self.__start_bcltofastq_pipeline__(configs,mockdb)
             if configs["system"].get("Logging","debug") is "True":
                 print "    Starting bcltofastq pipeline."
         if not hasattr(self,"illuminate_key") or self.illuminate_key is None:
             self.__launch_illuminate__(configs,mockdb)
             if configs["system"].get("Logging","debug") is "True":
                 print "    Starting illuminate."
         return False
     illuminate = mockdb['Illuminate'].__get__(configs['system'],self.illuminate_key)
     if not illuminate.__is_complete__(configs,mockdb=mockdb,*args,**kwargs):
         if configs["system"].get("Logging","debug") is "True":
             print "    Illuminate not done"
         return False
     bcl2fastq_pipeline = mockdb['BclToFastqPipeline'].__get__(configs['system'],self.bcltofastq_pipeline_key)
     if not bcl2fastq_pipeline.__is_complete__(configs,mockdb=mockdb,*args,**kwargs):
         if configs["system"].get("Logging","debug") is "True":
             print "    bcltofastq not done"
         return False
     if not hasattr(self,"generic_copy_key") or self.generic_copy_key is None:
         self.__launch_archive_fastq__(configs,mockdb)
         if configs["system"].get("Logging","debug") is "True":
             print "    Launching archive"
         return False
     archive = mockdb['GenericCopy'].__get__(configs['system'],self.generic_copy_key)
     if archive.__is_complete__(*args,**kwargs):
         if not disk_usage(self.fastq_archive) > 30000000:
             if not hasattr(self,'fastq_archive_reported') or self.fastq_archive_reported is None:
                 message = "The flowcell "+self.flowcell_key+" has finished casava, but the archive is not as large as expected.\n"
                 message += "\nPlease check.\n\n"
                 recipients = configs["system"].get('Email','recipients_for_fastq')
                 send_email("Flowcell size problem.",message,recipients=recipients)  
                 self.fastq_archive_reported = True
             return False
         fastq_check = check_fastq_output(self.fastq_archive)
         if fastq_check["md5"] == [] and fastq_check["fastqc"] == [] and fastq_check["index"] is True and fastq_check["sample_sheet"] is True:
             if not hasattr(self,"fastq_check_report") or self.fastq_check_report is None:
                 message = "Just informing you of the completion of the flowcell.\n"
                 recipients = configs["system"].get('Email','recipients_for_fastq')
                 send_email("The fastq have been successully generated for " + self.flowcell_key + ".",message,recipients=recipients)  
                 self.fastq_check_report = True
         else:              
             if not hasattr(self,"fastq_check_report") or self.fastq_check_report is None:
                 message = "Report detailing the issues with the flowcell directory for flowcell " + self.flowcell_key + ".\n"
                 if not fastq_check["sample_sheet"] is True:
                     message += "Sample sheet missing from " + self.archive_fastq + ".\n"
                 else:
                     if not fastq_check["index"]:
                         message += "Index counts not generated.\n"
                     if len(fastq_check["fastqc"]) != 0:
                         message += "The following directories do not have fastqc results:"
                         message += "\n\t".join(fastq_check["fastqc"]) + "\n"
                     if len(fastq_check["md5"]) != 0:
                         message += "The following directories do not have md5 checksums:"
                     message += "\n\t".join(fastq_check["md5"]) + "\n"
                 recipients = configs["system"].get('Email','recipients_for_fastq')
                 send_email("Problem with fastq generation for " + self.flowcell_key + ".",message,recipients=recipients)  
                 self.fastq_check_report = True
             return False
             
         if not hasattr(self,"generic_clean_key") or self.generic_clean_key is None:
             if hasattr(self,'fastq_archive_reported') and self.fastq_archive_reported is True:
                 message = "The flowcell "+self.flowcell_key+" has finished casava, and is now big enough.\n"
                 message += "\nContinuing.\n\n"
                 recipients = configs["system"].get('Email','recipients_for_fastq')
                 send_email("Flowcell size problem resolved.",message,recipients=recipients)  
             self.__launch_clean__(configs,mockdb)
         self.__link_to_web_portal__(configs['system'])
         if configs["system"].get("Logging","debug") is "True":
             print "  Filling stats"
         flowcell = mockdb['Flowcell'].__get__(configs['system'],self.flowcell_key)
         machine = mockdb['HiSeqMachine'].__get__(configs['system'],self.machine_key)
         fill_demultiplex_stats(configs['system'],mockdb,self.output_dir,flowcell,machine)
             #return False
     else:
         if configs["system"].get("Logging","debug") is "True":
             print "    Fastq archive not complete"
         return False     
     clean = mockdb['GenericClean'].__get__(configs['system'],self.generic_clean_key)
     if clean.__is_complete__(*args,**kwargs):
         self.__finish__(*args,**kwargs)
         return True
     return False
import sys
import ConfigParser
from mockdb.initiate_mockdb import initiate_mockdb, save_mockdb
from processes.parsing import parse_sequencing_run_dir
from demultiplex_stats.fill_demultiplex_stats import fill_demultiplex_stats


directory = sys.argv[1]
config = ConfigParser.ConfigParser()
config.read('/mnt/iscsi_space/zerbeb/qc_pipeline_project/qc_pipeline/config/qc.cfg')

mockdb=initiate_mockdb(config)

[date,machine_key,run_number,side,flowcell_key] = parse_sequencing_run_dir(directory)
machine = mockdb['HiSeqMachine'].__get__(config, machine_key)
flowcell = mockdb['Flowcell'].__get__(config, flowcell_key)
fill_demultiplex_stats(config,mockdb,directory,flowcell,machine)

save_mockdb(config,mockdb)