Пример #1
0
def maintain_sequencing_run_objects(config,mockdb):
    """
    Reads in the directories in the hiseq output directory and compares it to what's in the 
    SequencingRun database.  If it is a new directory, a new sequencing run object is created in
    the Running state.
    """
    monitoring_dirs = list_monitoring_dirs(config.get('Common_directories','hiseq_output'))
    for dir in monitoring_dirs:
        try:
            [date,machine_key,run_number,side,flowcell_key] = parse_sequencing_run_dir(dir)
        except:
            date = None
            machine_key = "dummy_machine"
            flowcell_key = "dummy_flowcell"
            run_number = -1
            side = "dummy_side"
        try:
            if not date is None:
                monitoring_dirs_keyed_by_flowcell[flowcell_key] = dir
        except:
            monitoring_dirs_keyed_by_flowcell = {}
            monitoring_dirs_keyed_by_flowcell[flowcell_key] = dir
    db_flowcells = set(mockdb['SequencingRun'].__attribute_value_to_key_dict__('flowcell_key').keys())
    new_flowcells = set(monitoring_dirs_keyed_by_flowcell.keys()).difference(db_flowcells)
    if config.get("Logging","debug") is "True":
        #print "db flowcells: "+ str(db_flowcells)
        #print "monitoring flowcells: " + str(monitoring_dirs_keyed_by_flowcell)
        print "new flowcells: "+ str(new_flowcells)
    for new_flowcell in new_flowcells:
        add_sequencing_run_object(config,mockdb,input_dir=monitoring_dirs_keyed_by_flowcell[new_flowcell])
    return 1
Пример #2
0
def add_sequencing_run_object(config,mockdb,input_dir,output_dir=None,sample_sheet=None):
    """
    Reads in the directories in the hiseq output directory and compares it to what's in the 
    SequencingRun database.  If it is a new directory, a new sequencing run object is created in
    the Running state.
    """
    [date,machine_key,run_number,side,flowcell_key] = parse_sequencing_run_dir(input_dir)
    machine = mockdb['HiSeqMachine'].__get__(config,key=machine_key)
    flowcell = mockdb['Flowcell'].__get__(config,key=flowcell_key)
    run_type = determine_run_type(input_dir)
    seq_run=mockdb['SequencingRun'].__new__(config,input_dir=input_dir,fastq_archive=output_dir,flowcell=flowcell,machine=machine,date=date,run_number=run_number,side=side,run_type=run_type,sample_sheet=sample_sheet)
    seq_run.state = 'Running'
    return 1
Пример #3
0
def fill_run_stats_from_directory_name(config,mockdb,directory,add=True):
    """
    This function reads in a directory and processes it.  If the flowcell exists,
    the sequencing run for the flowcell is pulled and overwritten.  If add
    is True or no flowcell exists, both the sequencing object and the flowcell are
    created.
    """
    [date,machine_key,run_number,side,flowcell_key] = parse_sequencing_run_dir(directory)
    last_name = date + "_" + machine_key + "_" + run_number + "_" + side + flowcell_key
    orig_filename = os.path.join(config.get('Common_directories','hiseq_output'),last_name,config.get('Filenames','basecalling_initialized'))
    done_filename = os.path.join(config.get('Common_directories','hiseq_output'),last_name,config.get('Filenames','basecalling_complete'))
    if os.path.isfile(orig_filename):
        #start_timestamp = datetime.datetime.strptime(time.ctime(os.path.getctime(orig_filename)),"%Y-%m-%d %H:%M:%S")
        start_timestamp = datetime.datetime.strptime(time.ctime(os.path.getctime(orig_filename)), "%a %b %d %H:%M:%S %Y")
    else:
        start_timestamp = None
    if os.path.isfile(done_filename):
        #end_timestamp = datetime.datetime.strptime(time.ctime(os.path.getctime(done_filename)),"%Y-%m-%d %H:%M:%S")
        end_timestamp = datetime.datetime.strptime(time.ctime(os.path.getctime(done_filename)), "%a %b %d %H:%M:%S %Y")
    else:
        end_timestamp = None
    machine = mockdb['HiSeqMachine'].__get__(config,key=machine_key)
    flowcell = mockdb['Flowcell'].__get__(config,key=flowcell_key)
    seq_run_flowcell_dict = mockdb['SequencingRun'].__attribute_value_to_object_dict__('flowcell_key')
    if add==False and flowcell_key in seq_run_flowcell_dict.keys():
        seq_runs = seq_run_flowcell_dict[flowcell_key]
        #if len(seq_runs) > 1:
            #raise Exception("A flowcell, {0}, has been used in multiple sequencing runs.".format(flowcell_key))
        for seq_run in seq_runs:
            seq_run.begin_timestamp = start_timestamp
            seq_run.end_timestamp = end_timestamp
            seq_run.flowcell_key = flowcell.key
            seq_run.machine_key = machine.key
    else:
        seq_run = mockdb['SequencingRun'].__new__(config,flowcell=flowcell,machine=machine,date=date,run_number=run_number,output_dir=directory,side=side,begin_timestamp=start_timestamp,end_timestamp=end_timestamp)
        seq_run.state = 'Complete'
    return 1
        string += "," + flowcell.key 
        string += "," + machine.key
        string += "," + number
        print string
    return 1

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Places the statistics in Demultiplex_Stats.html into the database')
    parser.add_argument('directory', type=str, help='The directory name where the casava results are placed.')
    parser.add_argument('--report', dest='report', action='store_true', default=False, help='Prints data insead of inputting into database')
    parser.add_argument('--add_seq_run', dest='add_seq_run', action='store_true', default=False, help='Adds a completed sequencing run for the flowcell')
    args = parser.parse_args()
    config = ConfigParser.ConfigParser()
    config.read('/home/sequencing/src/pipeline_project/pipeline/config/ihg_system.cfg')
    mockdb = initiate_mockdb(config)
    [date,machine_key,run_number,side,flowcell_key] = parse_sequencing_run_dir(args.directory)
    flowcell = mockdb['Flowcell'].__get__(config,key=flowcell_key)
    machine = mockdb['HiSeqMachine'].__get__(config,key=machine_key)
    if args.report is True:
        report_demultiplex_stats(config,mockdb,args.directory,flowcell,machine)
    else:
        if args.add_seq_run is True:
            try:
                flowcell_seq_run_dict = mockdb['SequencingRun'].__attribute_value_to_object_dict__('flowcell_key')
                seq_run = flowcell_seq_run_dict[flowcell.key][0]
            except KeyError:
                run_type = determine_run_type(args.directory)
                seq_run=mockdb['SequencingRun'].__new__(config,flowcell=flowcell,machine=machine,date=date,run_number=run_number,output_dir=args.directory,side=side,run_type=run_type)
            seq_run.__finish__()
        fill_demultiplex_stats(config,mockdb,args.directory,flowcell,machine)
        save_mockdb(config,mockdb)