def merge_flowcell_casava_results(flowcell_dirs,output_dir,*args,**kwargs):
    """
    Merges the samples in multiple flowcell directories.
    """
    sample_dirs_dict = list_sample_dirs(flowcell_dirs)
    sample_sheet_obj_list = SampleSheetObjList()
    sample_sheet_obj_list.__load_sample_sheets_from_sample_directories__(sample_dirs_dict)
    merge_casava_fastq_directories(sample_sheet_obj_list,output_dir,meta_data_prefix=["FCID"])
    return
Exemplo n.º 2
0
 def __finish__(self,*args,**kwargs):
     """
     Finishes the bcltofastq pipeline.  This is separated
     out due to the consolidation of multiple directories into a single email
     and to isolate it for specific pipelines.
     """
     problem_dirs = []
     sample_dirs = list_sample_dirs(self.output_dir.split(":"))
     for sample in sample_dirs:
         for sample_dir in sample_dirs[sample]:
             if (int(disk_usage(sample_dir)) < 200000):
                 problem_dirs.append(sample_dir)
     if len(problem_dirs) > 0:
         message = "The following directory(ies) is(are) less than 200MB:\n"
         for problem_dir in problem_dirs:
             message += "\t" + problem_dir + "\n"
         message += "Please check.\n"
         #send_email("Small sample directory",message,recipients='[email protected],[email protected]')  
     GenericPipeline.__finish__(self,*args,**kwargs)
     return 1
Exemplo n.º 3
0
def things_to_do_if_initializing_pipeline_with_input_directory(configs,storage_devices,mockdb,source_dir,pipeline_name=None,base_output_dir=None,combine_projects=True):
    if combine_project:
        sample_dirs["dummy_project"] = list_sample_dirs(source_dir)
    else:
        sample_dirs = list_project_sample_dirs(source_dir)
    target_config = MyConfigParser()
    target_config.read(configs["system"].get("Filenames","target_config"))
    for project in sample_dirs:
        for sample in sample_dirs[project]:
            running_location = identify_running_location_with_most_currently_available(configs,storage_devices)
            parsed = parse_sample_sheet(configs['system'],mockdb,sample_dirs[project][sample][0])
            if base_output_dir is None:
                base_output_dir = configs['pipeline'].get('Common_directories','archive_directory')
            automation_parameters_config = MyConfigParser()
            automation_parameters_config.read(configs["system"].get("Filenames","automation_config"))
            description_dict = parse_description_into_dictionary(parsed['description'])
            if 'Pipeline' in description_dict:
                pipeline_key =  description_dict['Pipeline']
            else:
                description_pieces = parsed['description'].split('_')
                pipeline_key = description_pieces[-1]
            pipeline_name_for_sample = automation_parameters_config.safe_get("Pipeline",pipeline_key)
            if not pipeline_name_for_sample == pipeline_name:
                continue
            mockdb[pipeline_name].__new__(configs['system'],input_dir=sample_dirs[project][sample][0],pipeline_config=configs["pipeline"],project=parsed['project_name'],pipeline_key=pipeline_key,**parsed)
            flowcell_dict = mockdb['SequencingRun'].__attribute_value_to_object_dict__('flowcell_key')
            flowcell_dict = mockdb['SequencingRun'].__attribute_value_to_object_dict__('flowcell_key')
            if parsed['flowcell'].key in flowcell_dict:
                seq_run = flowcell_dict[parsed['flowcell'].key]
                pass
            else:
                try:
                    base_dir = get_sequencing_run_base_dir(source_dir)
                    [date,machine_key,run_number,side,flowcell_key] = parse_sequencing_run_dir(base_dir)
                    machine = mockdb['HiSeqMachine'].__get__(configs['system'],machine_key)
                    run_type = determine_run_type(base_dir)
                    seq_run = mockdb['SequencingRun'].__new__(configs['system'],flowcell,machine,date,run_number,output_dir=base_dir,side=side,run_type=run_type)
                    fill_demultiplex_stats(configs['system'],mockdb,seq_run.output_dir,flowcell,machine)
                except:
                    pass
    return 1
Exemplo n.º 4
0
from processes.hiseq.sample_sheet import SampleSheetObjList
from processes.hiseq.scripts import list_sample_dirs
import argparse

if __name__ == '__main__':
    #Handle arguments
    parser = argparse.ArgumentParser(description='Test various functions in this functions in this folder that require multiple modules')
    parser.add_argument('--load_samples_sample_sheets', dest="samples_dir", type=str, help='Test the loading of sample sheets by sample by providing the path for under which all sub-directories are evaluated for SampleSheet.csv.')
    parser.add_argument('--column_values', dest="values_dir", type=str, help='Test the column values function by returning a list of samples in all of the sample sheets by providing the path for under which all sub-directories are evaluated for SampleSheet.csv.')
    parser.add_argument('--merge_to_single', dest="merge_dir", type=str, help='Test the merge all sample sheet objects function by returning a single sample sheet by providing the path for under which all sub-directories are evaluated for SampleSheet.csv.')
    parser.add_argument('--filter_by_sample', dest="filter_dir", type=str, help='Test the filter sample sheet object by printing multiple sample sheet objects after providing the path for under which all sub-directories are evaluated for SampleSheet.csv.')

    args = parser.parse_args()
    sample_sheet_obj_list = SampleSheetObjList()
    if args.samples_dir:
        sample_dirs_dict = list_sample_dirs([args.samples_dir])
        sample_sheet_obj_list. __load_sample_sheets_from_sample_directories__(sample_dirs_dict)
        sample_sheet_obj_list.__print__()
    if args.values_dir:
        sample_dirs_dict = list_sample_dirs([args.values_dir])
        sample_sheet_obj_list. __load_sample_sheets_from_sample_directories__(sample_dirs_dict)
        print str(sample_sheet_obj_list.__get_column_values__("SampleID"))
    if args.merge_dir:
        sample_dirs_dict = list_sample_dirs([args.merge_dir])
        sample_sheet_obj_list. __load_sample_sheets_from_sample_directories__(sample_dirs_dict)
        new_sample_sheet_obj_list = sample_sheet_obj_list.__merge_all_sample_sheet_objects__()
        new_sample_sheet_obj_list.__print__(print_meta_data=False)
    if args.filter_dir:
        sample_dirs_dict = list_sample_dirs([args.filter_dir])
        sample_sheet_obj_list. __load_sample_sheets_from_sample_directories__(sample_dirs_dict)
        sample_ids = sample_sheet_obj_list.__get_column_values__("SampleID")