def analyze_coh(ifo_list,ifo_data,ifo_to_do,tmplt_job,insp_job,df_job,\ prev_df,dag, exttrigInjections, usertag=None, inspinjNode = None,\ runSplitBank=False,sb_job = None,sbBankFile=None,sbNumBanks = None,\ runSpinChecker=False,sc_job=None): """ Analyze the data from a single IFO. Since the way we treat all this data is the same, this function is the same for all interferometers. Returns the last LSCdataFind job that was executed and the chunks analyzed. ifo_name = the name of the IFO ifo_data = the master science segs ifo_to_do = the science segments we need to analyze tmplt_job = if not GeoBank: template bank job we should use insp_job = the condor job that we should use to analyze data df_job = the condor job to find the data prev_df = the previous LSCdataFind job that was executed dag = the DAG to attach the nodes to exttrigInjections = a two-element list specifying the range of injections for the external trigger case. usertag = the usertag to add to the job names inspinjNode = the inspinj node to be added as a parent to inspirals """ data_opts = {} type = {} channel = {} ifo_char = '' for ifo_name in ifo_list: ifo_char += ifo_name if ifo_name == 'G1': data_opts['G1'] = 'geo-data' try: type['G1'] = cp.get('input', 'geo-type') except: type['G1'] = None channel['G1'] = cp.get('input', 'geo-channel') elif ifo_name == 'V1': data_opts['V1'] = 'virgo-data' try: type['V1'] = cp.get('input', 'virgo-type') except: type['V1'] = None channel['V1'] = cp.get('input', 'virgo-channel') else: data_opts[ifo_name] = 'ligo-data' try: type[ifo_name] = cp.get('input', 'ligo-type') if (type[ifo_name] == 'RDS_R_L4') or ('RDS_C' in type[ifo_name]) or \ ('DMT_C' in type[ifo_name]) or ('LDAS_C' in type[ifo_name]): type[ifo_name] = ifo_name + '_' + type[ifo_name] except: type[ifo_name] = None channel[ifo_name] = cp.get('input', 'ligo-channel') # see if we are using calibrated data if cp.has_section(data_opts[ifo_name]) and \ cp.has_option(data_opts[ifo_name],'calibrated-data'): calibrated = True else: calibrated = False if ifo_data: exttrigStart = ifo_data[ifo_name][0].start() exttrigDuration = ifo_data[ifo_name][-1].end() - exttrigStart injectionFileTemplate = "HL-INJECTION_%%s-%d-%d.xml" % \ (exttrigStart, exttrigDuration) # loop over the master science segments for seg in ifo_data[ifo_name]: # loop over the master analysis chunks in the science segment for chunk in seg: done_this_chunk = False # now loop over all the data that we need to filter for seg_to_do in ifo_to_do[ifo_name]: # if the current chunk is in one of the segments we need to filter if not done_this_chunk and inspiral.overlap_test( chunk, seg_to_do): # make sure we only filter the master chunk once done_this_chunk = True if not sbBankFile: # Determine template bank file name ifo_name = cp.get('templatebank-meta', 'bank-ifo') tb_node = inspiral.TmpltBankNode(tmplt_job) tb_node.set_start(chunk.start()) tb_node.set_end(chunk.end()) tb_node.set_ifo(ifo_name) tb_node.set_vds_group(ifo_name[0] + str(chunk.start())) tb_node.set_user_tag((usertag.split('_')[0]) + '_DATAFIND') os.symlink("../datafind/" + tb_node.get_output(),\ tb_node.get_output()) sbBankFile = tb_node.get_output() # Set up the bank splitting if runSplitBank: sb_node = inspiral.SplitBankNode(sb_job) sb_node.set_bank(sbBankFile) sb_node.set_num_banks(sbNumBanks) sbOutBanks = sb_node.get_output() dag.add_node(sb_node) else: sbNumBanks = 1 scSpinBank = [] scNoSpinBank = [] scNodes = [] if runSpinChecker: for bank in range(sbNumBanks): sc_node = inspiral.PTFSpinCheckerNode(sc_job) scNodes.append(sc_node) sc_node.set_start(chunk.start()) sc_node.set_end(chunk.end()) sc_node.set_ifo(ifo_char) if runSplitBank: sc_node.set_bank(sbOutBanks[bank]) sc_node.add_parent(sb_node) scSpinBank.append(sbOutBanks[bank].replace( '.xml', '_spin.xml')) scNoSpinBank.append(sbOutBanks[bank].replace('.xml',\ '_nospin.xml')) sc_node.set_ifo_tag("FIRST_" + str(bank)) else: sc_node.set_ifo_tag("FIRST") scSpinBank.append(sc_node.get_output_base + '_spin.xml.gz') scNoSpinBank.append(sc_node.get_output_base + '_nospin.xml.gz') sc_node.set_spin_output(scSpinBank[bank]) sc_node.set_nospin_output(scNoSpinBank[bank]) dag.add_node(sc_node) if doExtTrig: for inj in range(exttrigInjections[0], exttrigInjections[1] + 1): #XXX: ensure output is added to list of output files exttrigUserTag = usertag + "_" + str(inj) injectionFile = injectionFileTemplate % exttrigUserTag for bank in range(sbNumBanks): insp = setup_coh_inspiral(ifo_name,ifo_char,insp_job,\ runSplitBank,calibrated,runSpinChecker,chunk,dag,bank,\ scSpinBank,scNoSpinBank,sbOutBanks,scNodes,sb_node,\ exttrigUserTag) insp.set_injections(injectionFile) elif doSlides: slide_vector = setup_timeslides(ifo_analyze) num_slides = len(slide_vector) for slide in range(int(num_slides)): vector = slide_vector[slide] slidesUserTag = usertag + "_" + "slide" + "_" + \ str('_'.join(map(str,[str(key) + "_" + str(vector[key])\ for key in vector.keys()]))) for bank in range(sbNumBanks): insp = setup_coh_inspiral(ifo_name,ifo_char,insp_job,\ runSplitBank,calibrated,runSpinChecker,chunk,dag,bank,\ scSpinBank,scNoSpinBank,sbOutBanks,scNodes,sb_node,\ slidesUserTag) for key in vector.keys(): insp.add_var_opt(key.lower() + '-slide', vector[key]) else: for bank in range(sbNumBanks): insp = setup_coh_inspiral(ifo_name,ifo_char,insp_job,\ runSplitBank,calibrated,runSpinChecker,chunk,dag,bank,\ scSpinBank,scNoSpinBank,sbOutBanks,scNodes,sb_node,usertag) # store this chunk in the list of filtered data for ifo_name in ifo_list: chunks_analyzed[ifo_name] = [] chunks_analyzed[ifo_name].append( AnalyzedIFOData(chunk, insp, 0)) return chunks_analyzed
job_segs.append(segments.segment(seg_start, seg_start + length)) analysis_segs.append( segments.segment(seg_start + overlap / 2, seg_start + length - overlap / 2)) seg_start += length - overlap # Get the last one that goes to the end of the segment if seg_start < seg_end: job_segs.append(segments.segment(seg_end - length, seg_end)) analysis_segs.append( segments.segment(seg_start + overlap / 2, seg_end)) for seg, analysis_seg in zip(job_segs, analysis_segs): # create the template bank job bank = inspiral.TmpltBankNode(tmplt_job) bank.set_start(seg[0]) bank.set_end(seg[1]) bank.set_ifo(ifo) bank.set_cache(df.get_output()) bank.add_parent(df) dag.add_node(bank) # split the template bank up into smaller banks split = inspiral.SplitBankNode(split_job) split.set_bank(bank.get_output()) split.set_num_banks(cp.get('splitbank', 'number-of-banks')) split.add_parent(bank) dag.add_node(split) # create the inspiral jobs to do the analysis
def analyze_ifo(ifo_name,ifo_data,ifo_to_do,tmplt_job,insp_job,df_job,\ prev_df,dag, usertag=None, inspinjNode = None, insp_ckpt_job = None): """ Analyze the data from a single IFO. Since the way we treat all this data is the same, this function is the same for all interferometers. Returns the last LSCdataFind job that was executed and the chunks analyzed. ifo_name = the name of the IFO ifo_data = the master science segs ifo_to_do = the science segments we need to analyze tmplt_job = if not FixedBank: template bank job we should use insp_job = the condor job that we should use to analyze data df_job = the condor job to find the data prev_df = the previous LSCdataFind job that was executed dag = the DAG to attach the nodes to usertag = the usertag to add to the job names inspinjNode = the inspinj node to be added as a parent to inspirals insp_ckpt_job = a checkpoint restore job for the inspiral code """ # add the non veto inspiral options if cp.has_section('no-veto-inspiral'): insp_job.add_ini_opts(cp,'no-veto-inspiral') # add the ifo specific options if cp.has_section(ifo_name.lower() + '-inspiral'): insp_job.add_ini_opts(cp,ifo_name.lower() + '-inspiral') if cp.has_section(ifo_name.lower() + '-tmpltbank'): tmplt_job.add_ini_opts(cp,ifo_name.lower() + '-tmpltbank') # we may use a fixed bank specified in ini file try: FixedBank = cp.get('input','fixed-bank') print "For %s we use bank %s"%(ifo_name, FixedBank) except: FixedBank = None # get datatype info from config file data_opts, type, channel = inspiralutils.get_data_options(cp,ifo_name) if cp.has_section('tmpltbank-1'): tmplt_job.add_ini_opts(cp, 'tmpltbank-1') if cp.has_section(data_opts): tmplt_job.add_ini_opts(cp,data_opts) insp_job.add_ini_opts(cp,data_opts) tmplt_job.set_channel(channel) insp_job.set_channel(channel) # see if we are using calibrated data if cp.has_section(data_opts) and cp.has_option(data_opts,'calibrated-data'): calibrated = True print "we use calibrated data for", ifo_name else: calibrated = False # prepare the injection filename if ifo_data: injStart = ifo_data[0].start() injDuration = ifo_data[-1].end()-injStart injectionFileTemplate = "HL-INJECTION_%%s-%d-%d.xml" % \ (injStart, injDuration) chunks_analyzed = [] # loop over the master science segments for seg in ifo_data: # loop over the master analysis chunks in the science segment for chunk in seg: done_this_chunk = False # now loop over all the data that we need to filter for seg_to_do in ifo_to_do: # if the current chunk is in one of the segments we need to filter if not done_this_chunk and inspiral.overlap_test(chunk,seg_to_do): # make sure we only filter the master chunk once done_this_chunk = True # make sure we have done one and only one datafind for the segment if not opts.read_cache: if not seg.get_df_node(): df = pipeline.LSCDataFindNode(df_job) if not opts.disable_dag_categories: df.set_category('datafind') if not opts.disable_dag_priorities: df.set_priority(100) df.set_observatory(ifo_name[0]) # add a padding time to the start of the datafind call (but don't change datafind output name) if ifo_name == 'G1': dfsect = 'geo-data' elif ifo_name == 'V1': dfsect = 'virgo-data' else: dfsect = 'ligo-data' if cp.has_option(dfsect,ifo_name.lower() + '-datafind-start-padding'): padding=cp.get(dfsect,ifo_name.lower()+'-datafind-start-padding') else: padding=0. df.set_start(seg.start(),padding) df.set_end(seg.end()) seg.set_df_node(df) if type: df.set_type(type) if prev_df and opts.disable_dag_categories: df.add_parent(prev_df) if opts.datafind: dag.add_node(df) prev_df = df else: prev_df = None # make a template bank job for the master chunk bank = inspiral.TmpltBankNode(tmplt_job) if not opts.disable_dag_categories: bank.set_category('tmpltbank') if not opts.disable_dag_priorities: bank.set_priority(1) bank.set_start(chunk.start()) bank.set_end(chunk.end()) bank.set_ifo(ifo_name) bank.set_vds_group(ifo_name[0] + str(chunk.start())) if not opts.read_cache: bank.set_cache(df.get_output()) else: bank.set_cache(cp.get('datafind',ifo_name+"-cache")) if not calibrated: bank.calibration() if opts.datafind: bank.add_parent(df) if (opts.template_bank and not FixedBank): dag.add_node(bank) # make an inspiral job for the master chunk insp = inspiral.InspiralNode(insp_job) if not opts.disable_dag_categories: insp.set_category('inspiral1') if not opts.disable_dag_priorities: insp.set_priority(2) if usertag: insp.set_user_tag(usertag.split('_CAT')[0]) insp.set_start(chunk.start()) insp.set_end(chunk.end()) insp.set_trig_start(chunk.trig_start()) insp.set_trig_end(chunk.trig_end()) insp.set_ifo(ifo_name) insp.set_ifo_tag("FIRST") insp.set_vds_group(ifo_name[0] + str(chunk.start())) if not opts.read_cache: insp.set_cache(df.get_output()) else: insp.set_cache(cp.get('datafind',ifo_name+"-cache")) if not calibrated: insp.calibration() if FixedBank: insp.set_bank(FixedBank) else: insp.set_bank(bank.get_output()) if opts.datafind: insp.add_parent(df) if inspinjNode and opts.inspinj: insp.add_parent(inspinjNode) if (opts.template_bank and not FixedBank): insp.add_parent(bank) if opts.inspiral: dag.add_node(insp) if opts.data_checkpoint: # make an inspiral checkpoint restore job insp_job.set_universe("vanilla") insp.set_data_checkpoint() insp.set_post_script(cp.get('condor','checkpoint-post-script')) insp.add_post_script_arg(os.path.join(os.getcwd(),insp.get_checkpoint_image())) insp_ckpt = inspiral.InspiralCkptNode(insp_ckpt_job) insp_ckpt.set_output(insp.get_output()) insp_ckpt.set_injections(insp.get_injections()) insp_ckpt.set_checkpoint_image(insp.get_checkpoint_image()) if cp.has_option('pipeline','condor-c-site'): # additional requirements to launch jon on remote pool insp_ckpt_job.set_universe("grid") insp_ckpt.set_grid_start("pegasuslite") insp_ckpt.add_pegasus_profile("condor","grid_resource","condor %s" % cp.get('pipeline','condor-c-site')) insp_ckpt.add_pegasus_profile("condor","+remote_jobuniverse","5") insp_ckpt.add_pegasus_profile("condor","+remote_requirements","True") insp_ckpt.add_pegasus_profile("condor","+remote_ShouldTransferFiles","True") insp_ckpt.add_pegasus_profile("condor","+remote_WhenToTransferOutput","ON_EXIT") insp_ckpt.add_pegasus_profile("condor","+remote_TransferInputFiles",'"' + insp.get_checkpoint_image() + '"') insp_ckpt.add_pegasus_profile("condor","+remote_PeriodicRelease",'( JobStatus == 5 && HoldReasonCode == 13 && NumSystemHolds < 3 )') else: insp_ckpt_job.set_universe("vanilla") insp_ckpt.add_parent(insp) if opts.inspiral: dag.add_node(insp_ckpt) # ensure output is added to list of output files output = insp_ckpt.get_output() # store this chunk in the list of filtered data chunks_analyzed.append(AnalyzedIFOData(chunk,insp_ckpt)) else: # XXX: ensure output is added to list of output files output = insp.get_output() # store this chunk in the list of filtered data chunks_analyzed.append(AnalyzedIFOData(chunk,insp)) return tuple([prev_df,chunks_analyzed])