def merge_flowcell_casava_results(flowcell_dirs,output_dir,*args,**kwargs): """ Merges the samples in multiple flowcell directories. """ sample_dirs_dict = list_sample_dirs(flowcell_dirs) sample_sheet_obj_list = SampleSheetObjList() sample_sheet_obj_list.__load_sample_sheets_from_sample_directories__(sample_dirs_dict) merge_casava_fastq_directories(sample_sheet_obj_list,output_dir,meta_data_prefix=["FCID"]) return
def __finish__(self,*args,**kwargs): """ Finishes the bcltofastq pipeline. This is separated out due to the consolidation of multiple directories into a single email and to isolate it for specific pipelines. """ problem_dirs = [] sample_dirs = list_sample_dirs(self.output_dir.split(":")) for sample in sample_dirs: for sample_dir in sample_dirs[sample]: if (int(disk_usage(sample_dir)) < 200000): problem_dirs.append(sample_dir) if len(problem_dirs) > 0: message = "The following directory(ies) is(are) less than 200MB:\n" for problem_dir in problem_dirs: message += "\t" + problem_dir + "\n" message += "Please check.\n" #send_email("Small sample directory",message,recipients='[email protected],[email protected]') GenericPipeline.__finish__(self,*args,**kwargs) return 1
def things_to_do_if_initializing_pipeline_with_input_directory(configs,storage_devices,mockdb,source_dir,pipeline_name=None,base_output_dir=None,combine_projects=True): if combine_project: sample_dirs["dummy_project"] = list_sample_dirs(source_dir) else: sample_dirs = list_project_sample_dirs(source_dir) target_config = MyConfigParser() target_config.read(configs["system"].get("Filenames","target_config")) for project in sample_dirs: for sample in sample_dirs[project]: running_location = identify_running_location_with_most_currently_available(configs,storage_devices) parsed = parse_sample_sheet(configs['system'],mockdb,sample_dirs[project][sample][0]) if base_output_dir is None: base_output_dir = configs['pipeline'].get('Common_directories','archive_directory') automation_parameters_config = MyConfigParser() automation_parameters_config.read(configs["system"].get("Filenames","automation_config")) description_dict = parse_description_into_dictionary(parsed['description']) if 'Pipeline' in description_dict: pipeline_key = description_dict['Pipeline'] else: description_pieces = parsed['description'].split('_') pipeline_key = description_pieces[-1] pipeline_name_for_sample = automation_parameters_config.safe_get("Pipeline",pipeline_key) if not pipeline_name_for_sample == pipeline_name: continue mockdb[pipeline_name].__new__(configs['system'],input_dir=sample_dirs[project][sample][0],pipeline_config=configs["pipeline"],project=parsed['project_name'],pipeline_key=pipeline_key,**parsed) flowcell_dict = mockdb['SequencingRun'].__attribute_value_to_object_dict__('flowcell_key') flowcell_dict = mockdb['SequencingRun'].__attribute_value_to_object_dict__('flowcell_key') if parsed['flowcell'].key in flowcell_dict: seq_run = flowcell_dict[parsed['flowcell'].key] pass else: try: base_dir = get_sequencing_run_base_dir(source_dir) [date,machine_key,run_number,side,flowcell_key] = parse_sequencing_run_dir(base_dir) machine = mockdb['HiSeqMachine'].__get__(configs['system'],machine_key) run_type = determine_run_type(base_dir) seq_run = mockdb['SequencingRun'].__new__(configs['system'],flowcell,machine,date,run_number,output_dir=base_dir,side=side,run_type=run_type) fill_demultiplex_stats(configs['system'],mockdb,seq_run.output_dir,flowcell,machine) except: pass return 1
from processes.hiseq.sample_sheet import SampleSheetObjList from processes.hiseq.scripts import list_sample_dirs import argparse if __name__ == '__main__': #Handle arguments parser = argparse.ArgumentParser(description='Test various functions in this functions in this folder that require multiple modules') parser.add_argument('--load_samples_sample_sheets', dest="samples_dir", type=str, help='Test the loading of sample sheets by sample by providing the path for under which all sub-directories are evaluated for SampleSheet.csv.') parser.add_argument('--column_values', dest="values_dir", type=str, help='Test the column values function by returning a list of samples in all of the sample sheets by providing the path for under which all sub-directories are evaluated for SampleSheet.csv.') parser.add_argument('--merge_to_single', dest="merge_dir", type=str, help='Test the merge all sample sheet objects function by returning a single sample sheet by providing the path for under which all sub-directories are evaluated for SampleSheet.csv.') parser.add_argument('--filter_by_sample', dest="filter_dir", type=str, help='Test the filter sample sheet object by printing multiple sample sheet objects after providing the path for under which all sub-directories are evaluated for SampleSheet.csv.') args = parser.parse_args() sample_sheet_obj_list = SampleSheetObjList() if args.samples_dir: sample_dirs_dict = list_sample_dirs([args.samples_dir]) sample_sheet_obj_list. __load_sample_sheets_from_sample_directories__(sample_dirs_dict) sample_sheet_obj_list.__print__() if args.values_dir: sample_dirs_dict = list_sample_dirs([args.values_dir]) sample_sheet_obj_list. __load_sample_sheets_from_sample_directories__(sample_dirs_dict) print str(sample_sheet_obj_list.__get_column_values__("SampleID")) if args.merge_dir: sample_dirs_dict = list_sample_dirs([args.merge_dir]) sample_sheet_obj_list. __load_sample_sheets_from_sample_directories__(sample_dirs_dict) new_sample_sheet_obj_list = sample_sheet_obj_list.__merge_all_sample_sheet_objects__() new_sample_sheet_obj_list.__print__(print_meta_data=False) if args.filter_dir: sample_dirs_dict = list_sample_dirs([args.filter_dir]) sample_sheet_obj_list. __load_sample_sheets_from_sample_directories__(sample_dirs_dict) sample_ids = sample_sheet_obj_list.__get_column_values__("SampleID")