def make_segments_plot(workflow, seg_files, out_dir, tags=[]): make_analysis_dir(out_dir) node = PlotExecutable(workflow.cp, 'plot_segments', ifos=workflow.ifos, out_dir=out_dir, tags=tags).create_node() node.add_input_list_opt('--segment-files', seg_files) node.new_output_file_opt(workflow.analysis_time, '.html', '--output-file') workflow += node
def make_snrifar_plot(workflow, bg_file, out_dir, tags=[]): make_analysis_dir(out_dir) node = PlotExecutable(workflow.cp, 'plot_snrifar', ifos=workflow.ifos, out_dir=out_dir, tags=tags).create_node() node.add_input_opt('--trigger-file', bg_file) node.new_output_file_opt(bg_file.segment, '.png', '--output-file') workflow += node
def setup_postprocessing_preparation(workflow, triggerFiles, output_dir, tags=[], **kwargs): """ This function aims to be the gateway for preparing the output of the coincidence and/or matched-filtering stages of the workflow for calculation of the significance of triggers and any rate statements that are to made. In practice this normally means combining output files, performing any clustering and performing mapping between triggers and simulations where needed. Parameters ----------- workflow : pycbc.workflow.core.Workflow The Workflow instance that the coincidence jobs will be added to. triggerFiles : pycbc.workflow.core.FileList An FileList of the trigger files that are used as input at this stage. output_dir : path The directory in which output files will be stored. tags : list of strings (optional, default = []) A list of the tagging strings that will be used for all jobs created by this call to the workflow. An example might be ['POSTPROC1'] or ['DENTYSNEWPOSTPROC']. This will be used in output names. Returns -------- postProcPreppedFiles : pycbc.workflow.core.FileList A list of files that can be used as input for the post-processing stage. """ logging.info("Entering post-processing preparation module.") make_analysis_dir(output_dir) # Parse for options in .ini file postProcPrepMethod = workflow.cp.get_opt_tags("workflow-postprocprep", "postprocprep-method", tags) # Scope here for adding different options/methods here. For now we only # have the single_stage ihope method which consists of converting the # ligolw_thinca output xml into one file, clustering, performing injection # finding and putting everything into one SQL database. if postProcPrepMethod == "PIPEDOWN_WORKFLOW": # If you want the intermediate output files, call this directly postPostPreppedFiles,_,_,_ = setup_postprocprep_pipedown_workflow( workflow, triggerFiles, output_dir, tags=tags, **kwargs) elif postProcPrepMethod == "PIPEDOWN_REPOP": postPostPreppedFiles,_,_,_ = setup_postprocprep_pipedown_workflow( workflow, triggerFiles, output_dir, tags=tags, do_repop=True, **kwargs) elif postProcPrepMethod == "GSTLAL_POSTPROCPREP": postPostPreppedFiles = setup_postprocprep_gstlal_workflow(workflow, triggerFiles, output_dir, tags=tags, **kwargs) else: errMsg = "Post-processing preparation method not recognized. Must be " errMsg += "one of PIPEDOWN_WORKFLOW or GSTLAL_POSTPROCPREP." raise ValueError(errMsg) logging.info("Leaving post-processing preparation module.") return postPostPreppedFiles
def make_average_psd(workflow, psd_files, out_dir, tags=None, gate_files=None, output_fmt='.txt'): make_analysis_dir(out_dir) tags = [] if tags is None else tags node = AvgPSDExecutable(workflow.cp, 'average_psd', ifos=workflow.ifos, out_dir=out_dir, tags=tags).create_node() node.add_input_list_opt('--input-files', psd_files) node.new_output_file_opt(workflow.analysis_time, output_fmt, '--detector-avg-file') # FIXME should Node have a public method for handling # multidetector output options of type --option H1:foo L1:bar? node.add_opt('--time-avg-file') for ifo in workflow.ifos: time_avg_file = File(ifo, node.executable.name, workflow.analysis_time, extension=output_fmt, directory=out_dir, tags=tags) multi_ifo_string = ifo + ':' + time_avg_file.name node.add_opt(multi_ifo_string) node._add_output(time_avg_file) if gate_files is not None: ifo_gate = None for gate_file in gate_files: if gate_file.ifo == ifo: ifo_gate = gate_file if ifo_gate is not None: node.add_input_opt('--gating-file', ifo_gate) workflow += node return node.output_files
def make_average_psd(workflow, psd_files, out_dir, tags=None, output_fmt='.txt'): make_analysis_dir(out_dir) tags = [] if tags is None else tags node = AvgPSDExecutable(workflow.cp, 'average_psd', ifos=workflow.ifos, out_dir=out_dir, tags=tags).create_node() node.add_input_list_opt('--input-files', psd_files) if len(workflow.ifos) > 1: node.new_output_file_opt(workflow.analysis_time, output_fmt, '--detector-avg-file') node.new_multiifo_output_list_opt('--time-avg-file', workflow.ifos, workflow.analysis_time, output_fmt, tags=tags) workflow += node return node.output_files
def setup_combine_statmap(workflow, final_bg_file_list, bg_file_list, out_dir, tags=None): """ Combine the statmap files into one background file """ if tags is None: tags = [] make_analysis_dir(out_dir) logging.info('Setting up combine statmap') cstat_exe_name = os.path.basename(workflow.cp.get("executables", "combine_statmap")) if cstat_exe_name == 'pycbc_combine_statmap': cstat_class = PyCBCCombineStatmap elif cstat_exe_name == 'pycbc_add_statmap': cstat_class = PyCBCAddStatmap else: raise NotImplementedError('executable should be ' 'pycbc_combine_statmap or pycbc_add_statmap') cstat_exe = cstat_class(workflow.cp, 'combine_statmap', ifos=workflow.ifos, tags=tags, out_dir=out_dir) if cstat_exe_name == 'pycbc_combine_statmap': combine_statmap_node = cstat_exe.create_node(final_bg_file_list) elif cstat_exe_name == 'pycbc_add_statmap': combine_statmap_node = cstat_exe.create_node(final_bg_file_list, bg_file_list) workflow.add_node(combine_statmap_node) return combine_statmap_node.output_file
def setup_exclude_zerolag(workflow, statmap_file, other_statmap_files, out_dir, ifos, tags=None): """ Exclude single triggers close to zerolag triggers from forming any background events """ if tags is None: tags = [] make_analysis_dir(out_dir) logging.info('Setting up exclude zerolag') exc_zerolag_exe = PyCBCExcludeZerolag(workflow.cp, 'exclude_zerolag', ifos=ifos, tags=tags, out_dir=out_dir) exc_zerolag_node = exc_zerolag_exe.create_node(statmap_file, other_statmap_files, tags=None) workflow.add_node(exc_zerolag_node) return exc_zerolag_node.output_file
def setup_psd_calculate(workflow, frame_files, ifo, segments, segment_name, out_dir, gate_files=None, tags=None): make_analysis_dir(out_dir) tags = [] if not tags else tags if workflow.cp.has_option_tags('workflow-psd', 'parallelization-factor', tags=tags): num_parts = int(workflow.cp.get_opt_tags('workflow-psd', 'parallelization-factor', tags=tags)) else: num_parts = 1 # get rid of duplicate segments which happen when splitting the bank segments = segmentlist(frozenset(segments)) segment_lists = list(chunks(segments, num_parts)) psd_files = FileList([]) for i, segs in enumerate(segment_lists): seg_file = segments_to_file(segmentlist(segs), out_dir + '/%s-INSPIRAL_DATA-%s.xml' % (ifo, i), 'INSPIRAL_DATA', ifo=ifo) psd_files += [make_psd_file(workflow, frame_files, seg_file, segment_name, out_dir, gate_files=gate_files, tags=tags + ['PART%s' % i])] if num_parts > 1: return merge_psds(workflow, psd_files, ifo, out_dir, tags=tags) else: return psd_files[0]
def make_psd_file(workflow, frame_files, segment_file, segment_name, out_dir, gate_files=None, tags=None): make_analysis_dir(out_dir) tags = [] if not tags else tags exe = CalcPSDExecutable(workflow.cp, 'calculate_psd', ifos=segment_file.ifo, out_dir=out_dir, tags=tags) node = exe.create_node() node.add_input_opt('--analysis-segment-file', segment_file) node.add_opt('--segment-name', segment_name) if gate_files is not None: ifo_gate = None for gate_file in gate_files: if gate_file.ifo == segment_file.ifo: ifo_gate = gate_file if ifo_gate is not None: node.add_input_opt('--gating-file', ifo_gate) if not exe.has_opt('frame-type'): node.add_input_list_opt('--frame-files', frame_files) node.new_output_file_opt(workflow.analysis_time, '.hdf', '--output-file') workflow += node return node.output_files[0]
def setup_psd_calculate(workflow, frame_files, ifo, segments, segment_name, out_dir, tags=None): make_analysis_dir(out_dir) tags = [] if not tags else tags if workflow.cp.has_option_tags('workflow-psd', 'parallelization-factor', tags=tags): num_parts = int(workflow.cp.get_opt_tags('workflow-psd', 'parallelization-factor', tags=tags)) else: num_parts = 1 # get rid of duplicate segments which happen when splitting the bank segments = segmentlist(frozenset(segments)) segment_lists = list(chunks(segments, num_parts)) psd_files = FileList([]) for i, segs in enumerate(segment_lists): seg_file = SegFile.from_segment_list('%s_%s' %(segment_name, i), segmentlist(segs), segment_name, ifo, valid_segment=workflow.analysis_time, extension='xml', directory=out_dir) psd_files += [make_psd_file(workflow, frame_files, seg_file, segment_name, out_dir, tags=tags + ['PART%s' % i])] if num_parts > 1: return merge_psds(workflow, psd_files, ifo, out_dir, tags=tags) else: return psd_files[0]
def setup_psd_calculate(workflow, frame_files, ifo, segments, segment_name, out_dir, gate_files=None, tags=None): make_analysis_dir(out_dir) tags = [] if not tags else tags if workflow.cp.has_option_tags('workflow-psd', 'parallelization-factor', tags=tags): num_parts = int(workflow.cp.get_opt_tags('workflow-psd', 'parallelization-factor', tags=tags)) else: num_parts = 1 segment_lists = list(chunks(segments, num_parts)) psd_files = FileList([]) for i, segs in enumerate(segment_lists): seg_file = segments_to_file(segmentlist(segs), out_dir + '/%s-INSPIRAL_DATA-%s.xml' % (ifo, i), 'INSPIRAL_DATA', ifo=ifo) psd_files += [make_psd_file(workflow, frame_files, seg_file, segment_name, out_dir, gate_files=gate_files, tags=tags + ['PART%s' % i])] if num_parts > 1: return merge_psds(workflow, psd_files, ifo, out_dir, tags=tags) else: return psd_files[0]
def save_veto_definer(cp, out_dir, tags=None): """ Retrieve the veto definer file and save it locally Parameters ----------- cp : ConfigParser instance out_dir : path tags : list of strings Used to retrieve subsections of the ini file for configuration options. """ if tags is None: tags = [] make_analysis_dir(out_dir) veto_def_url = cp.get_opt_tags("workflow-segments", "segments-veto-definer-url", tags) veto_def_base_name = os.path.basename(veto_def_url) veto_def_new_path = os.path.abspath( os.path.join(out_dir, veto_def_base_name)) # Don't need to do this if already done resolve_url(veto_def_url, out_dir) # and update location cp.set("workflow-segments", "segments-veto-definer-file", veto_def_new_path) return veto_def_new_path
def make_inj_table(workflow, inj_file, out_dir, tags=[]): make_analysis_dir(out_dir) node = PlotExecutable(workflow.cp, 'page_injections', ifos=workflow.ifos, out_dir=out_dir, tags=tags).create_node() node.add_input_opt('--injection-file', inj_file) node.new_output_file_opt(inj_file.segment, '.html', '--output-file') workflow += node
def convert_trig_to_hdf(workflow, hdfbank, xml_trigger_files, out_dir, tags=None): """Return the list of hdf5 trigger files outpus """ if tags is None: tags = [] #FIXME, make me not needed logging.info('convert single inspiral trigger files to hdf5') make_analysis_dir(out_dir) ifos, insp_groups = xml_trigger_files.categorize_by_attr('ifo') trig_files = FileList() for ifo, insp_group in zip(ifos, insp_groups): trig2hdf_exe = PyCBCTrig2HDFExecutable(workflow.cp, 'trig2hdf', ifos=ifo, out_dir=out_dir, tags=tags) segs, insp_bundles = insp_group.categorize_by_attr('segment') for insps in insp_bundles: trig2hdf_node = trig2hdf_exe.create_node(insps, hdfbank[0]) workflow.add_node(trig2hdf_node) trig_files += trig2hdf_node.output_files return trig_files
def setup_interval_coinc(workflow, hdfbank, trig_files, veto_files, veto_names, out_dir, tags=[]): """ This function sets up exact match coincidence and background estimation using a folded interval technique. """ make_analysis_dir(out_dir) logging.info('Setting up coincidence') if len(hdfbank) > 1: raise ValueError('This coincidence method only supports a ' 'pregenerated template bank') hdfbank = hdfbank[0] if len(workflow.ifos) > 2: raise ValueError( 'This coincidence method only supports two ifo searches') findcoinc_exe = PyCBCFindCoincExecutable(workflow.cp, 'coinc', ifos=workflow.ifos, tags=tags, out_dir=out_dir) # Wall time knob and memory knob factor = int( workflow.cp.get_opt_tags('workflow-coincidence', 'parallelization-factor', tags)) stat_files = [] for veto_file, veto_name in zip(veto_files, veto_names): bg_files = FileList() for i in range(factor): group_str = '%s/%s' % (i, factor) coinc_node = findcoinc_exe.create_node(trig_files, hdfbank, veto_file, veto_name, group_str, tags=[veto_name, str(i)]) bg_files += coinc_node.output_files workflow.add_node(coinc_node) stat_files += [ setup_statmap(workflow, bg_files, hdfbank, out_dir, tags=tags + [veto_name]) ] return stat_files logging.info('...leaving coincidence ')
def merge_psds(workflow, files, ifo, out_dir, tags=None): make_analysis_dir(out_dir) tags = [] if not tags else tags node = MergePSDFiles(workflow.cp, "merge_psds", ifos=ifo, out_dir=out_dir, tags=tags).create_node() node.add_input_list_opt("--psd-files", files) node.new_output_file_opt(workflow.analysis_time, ".hdf", "--output-file") workflow += node return node.output_files[0]
def make_sensitivity_plot(workflow, inj_file, out_dir, tags=[]): make_analysis_dir(out_dir) for tag in workflow.cp.get_subsections('plot_sensitivity'): node = PlotExecutable(workflow.cp, 'plot_sensitivity', ifos=workflow.ifos, out_dir=out_dir, tags=[tag] + tags).create_node() node.add_input_opt('--injection-file', inj_file) node.new_output_file_opt(inj_file.segment, '.png', '--output-file') workflow += node
def setup_coh_PTF_post_processing(workflow, trigger_files, trigger_cache, output_dir, segment_dir, injection_trigger_files=None, injection_files=None, injection_trigger_caches=None, injection_caches=None, config_file=None, run_dir=None, ifos=None, web_dir=None, inj_tags=[], tags=[], **kwargs): """ This function aims to be the gateway for running postprocessing in CBC offline workflows. Post-processing generally consists of calculating the significance of triggers and making any statements about trigger rates. Dedicated plotting jobs do not belong here. Properties ----------- workflow : pycbc.workflow.core.Workflow The Workflow instance that the coincidence jobs will be added to. trigger_files : pycbc.workflow.core.FileList An FileList of the trigger files that are used as input at this stage. summary_xml_files : pycbc.workflow.core.FileList An FileList of the output of the analysislogging_utils module. output_dir : path The directory in which output files will be stored. tags : list of strings (optional, default = []) A list of the tagging strings that will be used for all jobs created by this call to the workflow. An example might be ['POSTPROC1'] or ['DENTYSNEWPOSTPROC']. This will be used in output names. Returns -------- post_proc_files : pycbc.workflow.core.FileList A list of the output from this stage. """ logging.info("Entering post-processing stage.") make_analysis_dir(output_dir) # Parse for options in .ini file post_proc_method = workflow.cp.get_opt_tags("workflow-postproc", "postproc-method", tags) # Scope here for adding different options/methods here. For now we only # have the single_stage ihope method which consists of converting the # ligolw_thinca output xml into one file, clustering, performing injection # finding and putting everything into one SQL database. if post_proc_method == "COH_PTF_WORKFLOW": post_proc_files = setup_postproc_coh_PTF_workflow(workflow, trigger_files, trigger_cache, injection_trigger_files, injection_files, injection_trigger_caches, injection_caches, config_file, output_dir, web_dir, segment_dir, ifos=ifos, inj_tags=inj_tags, tags=tags, **kwargs) else: errMsg = "Post-processing method not recognized. Must be " errMsg += "COH_PTF_WORKFLOW." raise ValueError(errMsg) logging.info("Leaving post-processing module.") return post_proc_files
def setup_interval_coinc_inj(workflow, hdfbank, full_data_trig_files, inj_trig_files, stat_files, background_file, veto_file, veto_name, out_dir, tags=None): """ Set up exact match coincidence and background estimation This function sets up exact match coincidence and background estimation using a folded interval technique. """ if tags is None: tags = [] make_analysis_dir(out_dir) logging.info('Setting up coincidence for injection') if len(hdfbank) > 1: raise ValueError('This coincidence method only supports a ' 'pregenerated template bank') hdfbank = hdfbank[0] if len(workflow.ifos) > 2: err_msg = 'This coincidence method only supports two-ifo searches' raise ValueError(err_msg) # Wall time knob and memory knob factor = int(workflow.cp.get_opt_tags('workflow-coincidence', 'parallelization-factor', tags)) ffiles = {} ifiles = {} for ifo, ffi in zip(*full_data_trig_files.categorize_by_attr('ifo')): ffiles[ifo] = ffi[0] ifos, files = inj_trig_files.categorize_by_attr('ifo') # ifos list is used later for ifo, ifi in zip(ifos, files): ifiles[ifo] = ifi[0] ifo0, ifo1 = ifos[0], ifos[1] combo = [(FileList([ifiles[ifo0], ifiles[ifo1]]), "injinj"), (FileList([ifiles[ifo0], ffiles[ifo1]]), "injfull"), (FileList([ifiles[ifo1], ffiles[ifo0]]), "fullinj"), ] bg_files = {'injinj':[], 'injfull':[], 'fullinj':[]} for trig_files, ctag in combo: findcoinc_exe = PyCBCFindCoincExecutable(workflow.cp, 'coinc', ifos=workflow.ifos, tags=tags + [ctag], out_dir=out_dir) for i in range(factor): group_str = '%s/%s' % (i, factor) coinc_node = findcoinc_exe.create_node(trig_files, hdfbank, stat_files, veto_file, veto_name, group_str, tags=["JOB"+str(i)]) bg_files[ctag] += coinc_node.output_files workflow.add_node(coinc_node) return setup_statmap_inj(workflow, bg_files, background_file, hdfbank, out_dir, tags=tags)
def setup_interval_coinc_inj(workflow, hdfbank, full_data_trig_files, inj_trig_files, background_file, veto_file, veto_name, out_dir, tags=[]): """ This function sets up exact match coincidence and background estimation using a folded interval technique. """ make_analysis_dir(out_dir) logging.info('Setting up coincidence for injection') if len(hdfbank) > 1: raise ValueError('This coincidence method only supports a ' 'pregenerated template bank') hdfbank = hdfbank[0] if len(workflow.ifos) > 2: raise ValueError('This coincidence method only supports two ifo searches') combinecoinc_exe = PyCBCStatMapInjExecutable(workflow.cp, 'statmap_inj', ifos=workflow.ifos, tags=tags, out_dir=out_dir) # Wall time knob and memory knob factor = int(workflow.cp.get_opt_tags('workflow-coincidence', 'parallelization-factor', tags)) ffiles = {} ifiles = {} ifos, files = full_data_trig_files.categorize_by_attr('ifo') for ifo, file in zip(ifos, files): ffiles[ifo] = file[0] ifos, files = inj_trig_files.categorize_by_attr('ifo') for ifo, file in zip(ifos, files): ifiles[ifo] = file[0] ifo0, ifo1 = ifos[0], ifos[1] combo = [(FileList([ifiles[ifo0], ifiles[ifo1]]), "injinj"), (FileList([ifiles[ifo0], ffiles[ifo1]]), "injfull"), (FileList([ifiles[ifo1], ffiles[ifo0]]), "fullinj"), ] bg_files = {'injinj':[],'injfull':[],'fullinj':[]} for trig_files, ctag in combo: findcoinc_exe = PyCBCFindCoincExecutable(workflow.cp, 'coinc', ifos=workflow.ifos, tags=tags + [ctag], out_dir=out_dir) for i in range(factor): group_str = '%s/%s' % (i, factor) coinc_node = findcoinc_exe.create_node(trig_files, hdfbank, veto_file, veto_name, group_str, tags=([str(i)])) bg_files[ctag] += coinc_node.output_files workflow.add_node(coinc_node) combine_node = combinecoinc_exe.create_node(FileList(bg_files['injinj']), background_file, FileList(bg_files['injfull']), FileList(bg_files['fullinj'])) workflow.add_node(combine_node) logging.info('...leaving coincidence ') return combine_node.output_files[0]
def make_foreground_table(workflow, trig_file, bank_file, ftag, out_dir, tags=[]): make_analysis_dir(out_dir) node = PlotExecutable(workflow.cp, 'page_foreground', ifos=workflow.ifos, out_dir=out_dir, tags=tags).create_node() node.add_input_opt('--bank-file', bank_file) node.add_opt('--foreground-tag', ftag) node.add_input_opt('--trigger-file', trig_file) node.new_output_file_opt(bank_file.segment, '.html', '--output-file') workflow += node
def find_injections_in_hdf_coinc(workflow, inj_coinc_file, inj_xml_file, veto_file, veto_name, out_dir, tags=[]): make_analysis_dir(out_dir) exe = PyCBCHDFInjFindExecutable(workflow.cp, 'hdfinjfind', ifos=workflow.ifos, out_dir=out_dir, tags=tags) node = exe.create_node(inj_coinc_file, inj_xml_file, veto_file, veto_name, tags) workflow += node return node.output_files[0]
def merge_psds(workflow, files, ifo, out_dir, tags=None): make_analysis_dir(out_dir) tags = [] if not tags else tags node = MergePSDFiles(workflow.cp, 'merge_psds', ifos=ifo, out_dir=out_dir, tags=tags).create_node() node.add_input_list_opt('--psd-files', files) node.new_output_file_opt(workflow.analysis_time, '.hdf', '--output-file') workflow += node return node.output_files[0]
def setup_multiifo_interval_coinc(workflow, hdfbank, trig_files, stat_files, veto_file, veto_name, out_dir, pivot_ifo, fixed_ifo, tags=None): """ This function sets up exact match multiifo coincidence """ if tags is None: tags = [] make_analysis_dir(out_dir) logging.info('Setting up coincidence') ifos, _ = trig_files.categorize_by_attr('ifo') findcoinc_exe = PyCBCFindMultiifoCoincExecutable(workflow.cp, 'multiifo_coinc', ifos=ifos, tags=tags, out_dir=out_dir) # Wall time knob and memory knob factor = int( workflow.cp.get_opt_tags('workflow-coincidence', 'parallelization-factor', [findcoinc_exe.ifo_string] + tags)) statmap_files = [] bg_files = FileList() for i in range(factor): group_str = '%s/%s' % (i, factor) coinc_node = findcoinc_exe.create_node(trig_files, hdfbank, stat_files, veto_file, veto_name, group_str, pivot_ifo, fixed_ifo, tags=['JOB' + str(i)]) bg_files += coinc_node.output_files workflow.add_node(coinc_node) statmap_files = setup_multiifo_statmap(workflow, ifos, bg_files, out_dir, tags=tags) logging.info('...leaving coincidence ') return statmap_files
def find_injections_in_hdf_coinc(workflow, inj_coinc_file, inj_xml_file, veto_file, veto_name, out_dir, tags=None): if tags is None: tags = [] make_analysis_dir(out_dir) exe = PyCBCHDFInjFindExecutable(workflow.cp, 'hdfinjfind', ifos=workflow.ifos, out_dir=out_dir, tags=tags) node = exe.create_node(inj_coinc_file, inj_xml_file, veto_file, veto_name, tags) workflow += node return node.output_files[0]
def make_psd_file(workflow, frame_files, segment_file, segment_name, out_dir, tags=None): make_analysis_dir(out_dir) tags = [] if not tags else tags node = MergeExecutable(workflow.cp, 'calculate_psd', ifos=segment_file.ifo, out_dir=out_dir, tags=tags).create_node() node.add_input_opt('--analysis-segment-file', segment_file) node.add_opt('--segment-name', segment_name) node.add_input_list_opt('--frame-files', frame_files) node.new_output_file_opt(workflow.analysis_time, '.hdf', '--output-file') workflow += node return node.output_files[0]
def setup_multiifo_interval_coinc(workflow, hdfbank, trig_files, stat_files, veto_files, veto_names, out_dir, pivot_ifo, fixed_ifo, tags=None): """ This function sets up exact match multiifo coincidence """ if tags is None: tags = [] make_analysis_dir(out_dir) logging.info('Setting up coincidence') if len(hdfbank) != 1: raise ValueError('Must use exactly 1 bank file for this coincidence ' 'method, I got %i !' % len(hdfbank)) hdfbank = hdfbank[0] ifos, _ = trig_files.categorize_by_attr('ifo') findcoinc_exe = PyCBCFindMultiifoCoincExecutable(workflow.cp, 'multiifo_coinc', ifos=ifos, tags=tags, out_dir=out_dir) # Wall time knob and memory knob factor = int( workflow.cp.get_opt_tags('workflow-coincidence', 'parallelization-factor', tags)) bg_files = [] for veto_file, veto_name in zip(veto_files, veto_names): for i in range(factor): group_str = '%s/%s' % (i, factor) coinc_node = findcoinc_exe.create_node(trig_files, hdfbank, stat_files, veto_file, veto_name, group_str, pivot_ifo, fixed_ifo, tags=[veto_name, str(i)]) bg_files += coinc_node.output_files workflow.add_node(coinc_node) logging.info('...leaving coincidence ') return bg_files
def make_coinc_snrchi_plot(workflow, inj_file, inj_trig, stat_file, trig_file, out_dir, tags=[]): make_analysis_dir(out_dir) for tag in workflow.cp.get_subsections('plot_coinc_snrchi'): node = PlotExecutable(workflow.cp, 'plot_coinc_snrchi', ifos=inj_trig.ifo, out_dir=out_dir, tags=[tag] + tags).create_node() node.add_input_opt('--found-injection-file', inj_file) node.add_input_opt('--single-injection-file', inj_trig) node.add_input_opt('--coinc-statistic-file', stat_file) node.add_input_opt('--single-trigger-file', trig_file) node.new_output_file_opt(inj_file.segment, '.png', '--output-file') workflow += node
def merge_single_detector_hdf_files(workflow, bank_file, trigger_files, out_dir, tags=[]): make_analysis_dir(out_dir) out = FileList() for ifo in workflow.ifos: node = MergeExecutable(workflow.cp, 'hdf_trigger_merge', ifos=ifo, out_dir=out_dir, tags=tags).create_node() node.add_input_opt('--bank-file', bank_file) node.add_input_list_opt('--trigger-files', trigger_files.find_output_with_ifo(ifo)) node.new_output_file_opt(workflow.analysis_time, '.hdf', '--output-file') workflow += node out += node.output_files return out
def setup_gating_workflow(workflow, science_segs, datafind_outs, output_dir=None, tags=None): ''' Setup gating section of CBC workflow. At present this only supports pregenerated gating files, in the future these could be created within the workflow. Parameters ---------- workflow: pycbc.workflow.core.Workflow An instanced class that manages the constructed workflow. science_segs : Keyed dictionary of glue.segmentlist objects scienceSegs[ifo] holds the science segments to be analysed for each ifo. datafind_outs : pycbc.workflow.core.FileList The file list containing the datafind files. output_dir : path string The directory where data products will be placed. tags : list of strings If given these tags are used to uniquely name and identify output files that would be produced in multiple calls to this function. Returns -------- gate_files : pycbc.workflow.core.FileList The FileList holding the gate files, 0 or 1 per ifo ''' if tags is None: tags = [] logging.info("Entering gating module.") make_analysis_dir(output_dir) cp = workflow.cp # Parse for options in ini file. try: gateMethod = cp.get_opt_tags("workflow-gating", "gating-method", tags) except: # Gating is optional, just return an empty list if not # provided. return FileList([]) if gateMethod == "PREGENERATED_FILE": logging.info("Setting gating from pre-generated file(s).") gate_files = setup_gate_pregenerated(workflow, tags=tags) else: errMsg = "Gating method not recognized. Only " errMsg += "PREGENERATED_FILE is currently supported." raise ValueError(errMsg) logging.info("Leaving gating module.") return gate_files
def veto_injections(workflow, inj_file, veto_file, veto_name, out_dir, tags=None): tags = [] if tags is None else tags make_analysis_dir(out_dir) node = Executable(workflow.cp, 'strip_injections', ifos=workflow.ifos, out_dir=out_dir, tags=tags).create_node() node.add_opt('--segment-name', veto_name) node.add_input_opt('--veto-file', veto_file) node.add_input_opt('--injection-file', inj_file) node.add_opt('--ifos', ' '.join(workflow.ifos)) node.new_output_file_opt(workflow.analysis_time, '.xml', '--output-file') workflow += node return node.output_files[0]
def make_psd_file(workflow, frame_files, segment_file, segment_name, out_dir, tags=None): make_analysis_dir(out_dir) tags = [] if not tags else tags exe = CalcPSDExecutable(workflow.cp, "calculate_psd", ifos=segment_file.ifo, out_dir=out_dir, tags=tags) node = exe.create_node() node.add_input_opt("--analysis-segment-file", segment_file) node.add_opt("--segment-name", segment_name) if not exe.has_opt("frame-type"): node.add_input_list_opt("--frame-files", frame_files) node.new_output_file_opt(workflow.analysis_time, ".hdf", "--output-file") workflow += node return node.output_files[0]
def make_average_psd(workflow, psd_files, out_dir, tags=None, output_fmt=".txt"): make_analysis_dir(out_dir) tags = [] if tags is None else tags node = AvgPSDExecutable(workflow.cp, "average_psd", ifos=workflow.ifos, out_dir=out_dir, tags=tags).create_node() node.add_input_list_opt("--input-files", psd_files) if len(workflow.ifos) > 1: node.new_output_file_opt(workflow.analysis_time, output_fmt, "--detector-avg-file") node.new_multiifo_output_list_opt("--time-avg-file", workflow.ifos, workflow.analysis_time, output_fmt, tags=tags) workflow += node return node.output_files
def setup_psd_workflow(workflow, science_segs, datafind_outs, output_dir=None, tags=None): ''' Setup static psd section of CBC workflow. At present this only supports pregenerated psd files, in the future these could be created within the workflow. Parameters ---------- workflow: pycbc.workflow.core.Workflow An instanced class that manages the constructed workflow. science_segs : Keyed dictionary of glue.segmentlist objects scienceSegs[ifo] holds the science segments to be analysed for each ifo. datafind_outs : pycbc.workflow.core.FileList The file list containing the datafind files. output_dir : path string The directory where data products will be placed. tags : list of strings If given these tags are used to uniquely name and identify output files that would be produced in multiple calls to this function. Returns -------- psd_files : pycbc.workflow.core.FileList The FileList holding the psd files, 0 or 1 per ifo ''' if tags is None: tags = [] logging.info("Entering static psd module.") make_analysis_dir(output_dir) cp = workflow.cp # Parse for options in ini file. try: psdMethod = cp.get_opt_tags("workflow-psd", "psd-method", tags) except: # Predefined PSD sare optional, just return an empty list if not # provided. return FileList([]) if psdMethod == "PREGENERATED_FILE": logging.info("Setting psd from pre-generated file(s).") psd_files = setup_psd_pregenerated(workflow, tags=tags) else: errMsg = "PSD method not recognized. Only " errMsg += "PREGENERATED_FILE is currently supported." raise ValueError(errMsg) logging.info("Leaving psd module.") return psd_files
def merge_single_detector_hdf_files(workflow, bank_file, trigger_files, out_dir, tags=None): if tags is None: tags = [] make_analysis_dir(out_dir) out = FileList() for ifo in workflow.ifos: node = MergeExecutable(workflow.cp, 'hdf_trigger_merge', ifos=ifo, out_dir=out_dir, tags=tags).create_node() node.add_input_opt('--bank-file', bank_file) node.add_input_list_opt('--trigger-files', trigger_files.find_output_with_ifo(ifo)) node.new_output_file_opt(workflow.analysis_time, '.hdf', '--output-file') workflow += node out += node.output_files return out
def make_snrchi_plot(workflow, trig_files, veto_file, out_dir, tags=[]): make_analysis_dir(out_dir) for tag in workflow.cp.get_subsections('plot_snrchi'): for trig_file in trig_files: node = PlotExecutable(workflow.cp, 'plot_snrchi', ifos=trig_file.ifo, out_dir=out_dir, tags=[tag] + tags).create_node() node.set_memory(15000) node.add_input_opt('--trigger-file', trig_file) node.add_input_opt('--veto-file', veto_file) node.new_output_file_opt(trig_file.segment, '.png', '--output-file') workflow += node
def convert_bank_to_hdf(workflow, xmlbank, out_dir, tags=[]): """Return the template bank in hdf format """ #FIXME, make me not needed if len(xmlbank) > 1: raise ValueError('Can only convert a single template bank') logging.info('convert template bank to HDF') make_analysis_dir(out_dir) bank2hdf_exe = PyCBCBank2HDFExecutable(workflow.cp, 'bank2hdf', ifos=workflow.ifos, out_dir=out_dir, tags=tags) bank2hdf_node = bank2hdf_exe.create_node(xmlbank[0]) workflow.add_node(bank2hdf_node) return bank2hdf_node.output_files
def make_average_psd(workflow, psd_files, out_dir, tags=None, output_fmt='.txt'): make_analysis_dir(out_dir) tags = [] if tags is None else tags node = AvgPSDExecutable(workflow.cp, 'average_psd', ifos=workflow.ifos, out_dir=out_dir, tags=tags).create_node() node.add_input_list_opt('--input-files', psd_files) node.new_output_file_opt(workflow.analysis_time, output_fmt, '--detector-avg-file') node.new_multiifo_output_list_opt('--time-avg-file', workflow.ifos, workflow.analysis_time, output_fmt, tags=tags) workflow += node return node.output_files
def convert_bank_to_hdf(workflow, xmlbank, out_dir, tags=None): """Return the template bank in hdf format""" if tags is None: tags = [] #FIXME, make me not needed if len(xmlbank) > 1: raise ValueError('Can only convert a single template bank') logging.info('convert template bank to HDF') make_analysis_dir(out_dir) bank2hdf_exe = PyCBCBank2HDFExecutable(workflow.cp, 'bank2hdf', ifos=workflow.ifos, out_dir=out_dir, tags=tags) bank2hdf_node = bank2hdf_exe.create_node(xmlbank[0]) workflow.add_node(bank2hdf_node) return bank2hdf_node.output_files
def setup_interval_coinc(workflow, hdfbank, trig_files, veto_files, veto_names, out_dir, tags=[]): """ This function sets up exact match coincidence and background estimation using a folded interval technique. """ make_analysis_dir(out_dir) logging.info('Setting up coincidence') if len(hdfbank) > 1: raise ValueError('This coincidence method only supports a ' 'pregenerated template bank') hdfbank = hdfbank[0] if len(workflow.ifos) > 2: raise ValueError('This coincidence method only supports two ifo searches') findcoinc_exe = PyCBCFindCoincExecutable(workflow.cp, 'coinc', ifos=workflow.ifos, tags=tags, out_dir=out_dir) combinecoinc_exe = PyCBCStatMapExecutable(workflow.cp, 'statmap', ifos=workflow.ifos, tags=tags, out_dir=out_dir) # Wall time knob and memory knob factor = int(workflow.cp.get_opt_tags('workflow-coincidence', 'parallelization-factor', tags)) stat_files = FileList() for veto_file, veto_name in zip(veto_files, veto_names): bg_files = FileList() for i in range(factor): group_str = '%s/%s' % (i, factor) coinc_node = findcoinc_exe.create_node(trig_files, hdfbank, veto_file, veto_name, group_str, tags= [veto_name, str(i)]) bg_files += coinc_node.output_files workflow.add_node(coinc_node) combine_node = combinecoinc_exe.create_node(bg_files, tags=[veto_name]) workflow.add_node(combine_node) stat_files += combine_node.output_files return stat_files logging.info('...leaving coincidence ')
def setup_splittable_workflow(workflow, input_tables, out_dir=None, tags=None): ''' This function aims to be the gateway for code that is responsible for taking some input file containing some table, and splitting into multiple files containing different parts of that table. For now the only supported operation is using lalapps_splitbank to split a template bank xml file into multiple template bank xml files. Parameters ----------- workflow : pycbc.workflow.core.Workflow The Workflow instance that the jobs will be added to. input_tables : pycbc.workflow.core.FileList The input files to be split up. out_dir : path The directory in which output will be written. Returns -------- split_table_outs : pycbc.workflow.core.FileList The list of split up files as output from this job. ''' if tags is None: tags = [] logging.info("Entering split output files module.") make_analysis_dir(out_dir) # Parse for options in .ini file splitMethod = workflow.cp.get_opt_tags("workflow-splittable", "splittable-method", tags) if splitMethod == "IN_WORKFLOW": # Scope here for choosing different options logging.info("Adding split output file jobs to workflow.") split_table_outs = setup_splittable_dax_generated( workflow, input_tables, out_dir, tags) elif splitMethod == "NOOP": # Probably better not to call the module at all, but this option will # return the input file list. split_table_outs = input_tables else: errMsg = "Splittable method not recognized. Must be one of " errMsg += "IN_WORKFLOW or NOOP." raise ValueError(errMsg) logging.info("Leaving split output files module.") return split_table_outs
def setup_splittable_workflow(workflow, input_tables, out_dir=None, tags=None): ''' This function aims to be the gateway for code that is responsible for taking some input file containing some table, and splitting into multiple files containing different parts of that table. For now the only supported operation is using lalapps_splitbank to split a template bank xml file into multiple template bank xml files. Parameters ----------- workflow : pycbc.workflow.core.Workflow The Workflow instance that the jobs will be added to. input_tables : pycbc.workflow.core.FileList The input files to be split up. out_dir : path The directory in which output will be written. Returns -------- split_table_outs : pycbc.workflow.core.FileList The list of split up files as output from this job. ''' if tags is None: tags = [] logging.info("Entering split output files module.") make_analysis_dir(out_dir) # Parse for options in .ini file splitMethod = workflow.cp.get_opt_tags("workflow-splittable", "splittable-method", tags) if splitMethod == "IN_WORKFLOW": # Scope here for choosing different options logging.info("Adding split output file jobs to workflow.") split_table_outs = setup_splittable_dax_generated(workflow, input_tables, out_dir, tags) elif splitMethod == "NOOP": # Probably better not to call the module at all, but this option will # return the input file list. split_table_outs = input_tables else: errMsg = "Splittable method not recognized. Must be one of " errMsg += "IN_WORKFLOW or NOOP." raise ValueError(errMsg) logging.info("Leaving split output files module.") return split_table_outs
def setup_gating_workflow(workflow, output_dir=None, tags=None): ''' Setup gating section of CBC workflow. At present this only supports pregenerated gating files, in the future these could be created within the workflow. Parameters ---------- workflow: pycbc.workflow.core.Workflow An instanced class that manages the constructed workflow. output_dir : path string The directory where data products will be placed. tags : list of strings If given these tags are used to uniquely name and identify output files that would be produced in multiple calls to this function. Returns -------- gate_files : pycbc.workflow.core.FileList The FileList holding the gate files, 0 or 1 per ifo ''' if tags is None: tags = [] logging.info("Entering gating module.") make_analysis_dir(output_dir) cp = workflow.cp # Parse for options in ini file. try: gateMethod = cp.get_opt_tags("workflow-gating", "gating-method", tags) except ConfigParser.Error: # Gating is optional, just return an empty list if not # provided. return FileList([]) if gateMethod == "PREGENERATED_FILE": logging.info("Setting gating from pre-generated file(s).") gate_files = setup_gate_pregenerated(workflow, output_dir=output_dir, tags=tags) else: errMsg = "Gating method not recognized. Only " errMsg += "PREGENERATED_FILE is currently supported." raise ValueError(errMsg) logging.info("Leaving gating module.") return gate_files
def convert_trig_to_hdf(workflow, hdfbank, xml_trigger_files, out_dir, tags=[]): """Return the list of hdf5 trigger files outpus """ #FIXME, make me not needed logging.info('convert single inspiral trigger files to hdf5') make_analysis_dir(out_dir) ifos, insp_groups = xml_trigger_files.categorize_by_attr('ifo') trig_files = FileList() for ifo, insp_group in zip(ifos, insp_groups): trig2hdf_exe = PyCBCTrig2HDFExecutable(workflow.cp, 'trig2hdf', ifos=ifo, out_dir=out_dir, tags=tags) segs, insp_bundles = insp_group.categorize_by_attr('segment') for insps in insp_bundles: trig2hdf_node = trig2hdf_exe.create_node(insps, hdfbank[0]) workflow.add_node(trig2hdf_node) trig_files += trig2hdf_node.output_files return trig_files
def setup_interval_coinc(workflow, hdfbank, trig_files, stat_files, veto_files, veto_names, out_dir, tags=None): """ This function sets up exact match coincidence and background estimation using a folded interval technique. """ if tags is None: tags = [] make_analysis_dir(out_dir) logging.info('Setting up coincidence') if len(hdfbank) != 1: raise ValueError('Must use exactly 1 bank file for this coincidence ' 'method, I got %i !' % len(hdfbank)) hdfbank = hdfbank[0] if len(workflow.ifos) > 2: raise ValueError('This coincidence method only supports two ifo searches') findcoinc_exe = PyCBCFindCoincExecutable(workflow.cp, 'coinc', ifos=workflow.ifos, tags=tags, out_dir=out_dir) # Wall time knob and memory knob factor = int(workflow.cp.get_opt_tags('workflow-coincidence', 'parallelization-factor', tags)) statmap_files = [] for veto_file, veto_name in zip(veto_files, veto_names): bg_files = FileList() for i in range(factor): group_str = '%s/%s' % (i, factor) coinc_node = findcoinc_exe.create_node(trig_files, hdfbank, stat_files, veto_file, veto_name, group_str, tags=[veto_name, str(i)]) bg_files += coinc_node.output_files workflow.add_node(coinc_node) statmap_files += [setup_statmap(workflow, bg_files, hdfbank, out_dir, tags=tags + [veto_name])] logging.info('...leaving coincidence ') return statmap_files
def save_veto_definer(cp, out_dir, tags=[]): """ Retrieve the veto definer file and save it locally Parameters ----------- cp : ConfigParser instance out_dir : path tags : list of strings Used to retrieve subsections of the ini file for configuration options. """ make_analysis_dir(out_dir) vetoDefUrl = cp.get_opt_tags("workflow-segments", "segments-veto-definer-url", tags) vetoDefBaseName = os.path.basename(vetoDefUrl) vetoDefNewPath = os.path.abspath(os.path.join(out_dir, vetoDefBaseName)) urllib.urlretrieve(vetoDefUrl, vetoDefNewPath) # and update location cp.set("workflow-segments", "segments-veto-definer-file", vetoDefNewPath)
def setup_multiifo_combine_statmap(workflow, final_bg_file_list, out_dir, tags): """ Combine the multiifo statmap files into one background file """ if tags is None: tags = [] make_analysis_dir(out_dir) logging.info('Setting up multiifo combine statmap') cstat_exe = PyCBCMultiifoCombineStatmap(workflow.cp, 'combine_statmap', ifos=workflow.ifos, tags=tags, out_dir=out_dir) ifolist = ' '.join(workflow.ifos) cluster_window = float( workflow.cp.get_opt_tags('combine_statmap', 'cluster-window', tags)) combine_statmap_node = cstat_exe.create_node(final_bg_file_list, ifolist, cluster_window, tags) workflow.add_node(combine_statmap_node) return combine_statmap_node.output_file
def setup_coh_PTF_post_processing(workflow, trigger_files, trigger_cache, output_dir, segment_dir, injection_trigger_files=None, injection_files=None, injection_trigger_caches=None, injection_caches=None, config_file=None, run_dir=None, ifos=None, web_dir=None, inj_tags=[], tags=[], **kwargs): """ This function aims to be the gateway for running postprocessing in CBC offline workflows. Post-processing generally consists of calculating the significance of triggers and making any statements about trigger rates. Dedicated plotting jobs do not belong here. Parameters ----------- workflow : pycbc.workflow.core.Workflow The Workflow instance that the coincidence jobs will be added to. trigger_files : pycbc.workflow.core.FileList An FileList of the trigger files that are used as input at this stage. summary_xml_files : pycbc.workflow.core.FileList An FileList of the output of the analysislogging_utils module. output_dir : path The directory in which output files will be stored. tags : list of strings (optional, default = []) A list of the tagging strings that will be used for all jobs created by this call to the workflow. An example might be ['POSTPROC1'] or ['DENTYSNEWPOSTPROC']. This will be used in output names. Returns -------- post_proc_files : pycbc.workflow.core.FileList A list of the output from this stage. """ logging.info("Entering post-processing stage.") make_analysis_dir(output_dir) # Parse for options in .ini file post_proc_method = workflow.cp.get_opt_tags("workflow-postproc", "postproc-method", tags) # Scope here for adding different options/methods here. For now we only # have the single_stage ihope method which consists of converting the # ligolw_thinca output xml into one file, clustering, performing injection # finding and putting everything into one SQL database. if post_proc_method == "COH_PTF_WORKFLOW": post_proc_files = setup_postproc_coh_PTF_workflow( workflow, trigger_files, trigger_cache, injection_trigger_files, injection_files, injection_trigger_caches, injection_caches, config_file, output_dir, web_dir, segment_dir, ifos=ifos, inj_tags=inj_tags, tags=tags, **kwargs) else: errMsg = "Post-processing method not recognized. Must be " errMsg += "COH_PTF_WORKFLOW." raise ValueError(errMsg) logging.info("Leaving post-processing module.") return post_proc_files
def setup_datafind_workflow(workflow, scienceSegs, outputDir, seg_file=None, tags=None): """ Setup datafind section of the workflow. This section is responsible for generating, or setting up the workflow to generate, a list of files that record the location of the frame files needed to perform the analysis. There could be multiple options here, the datafind jobs could be done at run time or could be put into a dag. The subsequent jobs will know what was done here from the OutFileList containing the datafind jobs (and the Dagman nodes if appropriate. For now the only implemented option is to generate the datafind files at runtime. This module can also check if the frameFiles actually exist, check whether the obtained segments line up with the original ones and update the science segments to reflect missing data files. Parameters ---------- workflow: pycbc.workflow.core.Workflow The workflow class that stores the jobs that will be run. scienceSegs : Dictionary of ifo keyed glue.segment.segmentlist instances This contains the times that the workflow is expected to analyse. outputDir : path All output files written by datafind processes will be written to this directory. seg_file : SegFile, optional (default=None) The file returned by get_science_segments containing the science segments and the associated segment_summary. This will be used for the segment_summary test and is required if, and only if, performing that test. tags : list of string, optional (default=None) Use this to specify tags. This can be used if this module is being called more than once to give call specific configuration (by setting options in [workflow-datafind-${TAG}] rather than [workflow-datafind]). This is also used to tag the Files returned by the class to uniqueify the Files and uniqueify the actual filename. FIXME: Filenames may not be unique with current codes! Returns -------- datafindOuts : OutGroupList List of all the datafind output files for use later in the pipeline. sci_avlble_file : SegFile SegFile containing the analysable time after checks in the datafind module are applied to the input segment list. For production runs this is expected to be equal to the input segment list. scienceSegs : Dictionary of ifo keyed glue.segment.segmentlist instances This contains the times that the workflow is expected to analyse. If the updateSegmentTimes kwarg is given this will be updated to reflect any instances of missing data. sci_avlble_name : string The name with which the analysable time is stored in the sci_avlble_file. """ if tags is None: tags = [] logging.info("Entering datafind module") make_analysis_dir(outputDir) cp = workflow.cp # Parse for options in ini file datafindMethod = cp.get_opt_tags("workflow-datafind", "datafind-method", tags) if cp.has_option_tags("workflow-datafind", "datafind-check-segment-gaps", tags): checkSegmentGaps = cp.get_opt_tags("workflow-datafind", "datafind-check-segment-gaps", tags) else: checkSegmentGaps = "no_test" if cp.has_option_tags("workflow-datafind", "datafind-check-frames-exist", tags): checkFramesExist = cp.get_opt_tags("workflow-datafind", "datafind-check-frames-exist", tags) else: checkFramesExist = "no_test" if cp.has_option_tags("workflow-datafind", "datafind-check-segment-summary", tags): checkSegmentSummary = cp.get_opt_tags("workflow-datafind", "datafind-check-segment-summary", tags) else: checkSegmentSummary = "no_test" logging.info("Starting datafind with setup_datafind_runtime_generated") if datafindMethod == "AT_RUNTIME_MULTIPLE_CACHES": datafindcaches, datafindouts = \ setup_datafind_runtime_cache_multi_calls_perifo(cp, scienceSegs, outputDir, tags=tags) elif datafindMethod == "AT_RUNTIME_SINGLE_CACHES": datafindcaches, datafindouts = \ setup_datafind_runtime_cache_single_call_perifo(cp, scienceSegs, outputDir, tags=tags) elif datafindMethod == "AT_RUNTIME_MULTIPLE_FRAMES": datafindcaches, datafindouts = \ setup_datafind_runtime_frames_multi_calls_perifo(cp, scienceSegs, outputDir, tags=tags) elif datafindMethod == "AT_RUNTIME_SINGLE_FRAMES": datafindcaches, datafindouts = \ setup_datafind_runtime_frames_single_call_perifo(cp, scienceSegs, outputDir, tags=tags) elif datafindMethod == "FROM_PREGENERATED_LCF_FILES": ifos = scienceSegs.keys() datafindcaches, datafindouts = \ setup_datafind_from_pregenerated_lcf_files(cp, ifos, outputDir, tags=tags) else: msg = "Entry datafind-method in [workflow-datafind] does not have " msg += "expected value. Valid values are " msg += "AT_RUNTIME_MULTIPLE_FRAMES, AT_RUNTIME_SINGLE_FRAMES " msg += "AT_RUNTIME_MULTIPLE_CACHES or AT_RUNTIME_SINGLE_CACHES. " msg += "Consult the documentation for more info." raise ValueError(msg) using_backup_server = False if datafindMethod == "AT_RUNTIME_MULTIPLE_FRAMES" or \ datafindMethod == "AT_RUNTIME_SINGLE_FRAMES": if cp.has_option_tags("workflow-datafind", "datafind-backup-datafind-server", tags): using_backup_server = True backup_server = cp.get_opt_tags("workflow-datafind", "datafind-backup-datafind-server", tags) cp_new = copy.deepcopy(cp) cp_new.set("workflow-datafind", "datafind-ligo-datafind-server", backup_server) cp_new.set('datafind', 'urltype', 'gsiftp') backup_datafindcaches, backup_datafindouts =\ setup_datafind_runtime_frames_single_call_perifo(cp_new, scienceSegs, outputDir, tags=tags) backup_datafindouts = datafind_keep_unique_backups(\ backup_datafindouts, datafindouts) datafindcaches.extend(backup_datafindcaches) datafindouts.extend(backup_datafindouts) logging.info("setup_datafind_runtime_generated completed") # If we don't have frame files covering all times we can update the science # segments. if checkSegmentGaps in ['warn','update_times','raise_error']: logging.info("Checking science segments against datafind output....") newScienceSegs = get_science_segs_from_datafind_outs(datafindcaches) logging.info("New segments calculated from data find output.....") missingData = False for ifo in scienceSegs.keys(): # If no science segments in input then do nothing if not scienceSegs[ifo]: msg = "No science segments are present for ifo %s, " %(ifo) msg += "the segment metadata indicates there is no analyzable" msg += " strain data between the selected GPS start and end " msg += "times." logging.warning(msg) continue if not newScienceSegs.has_key(ifo): msg = "No data frames were found corresponding to the science " msg += "segments for ifo %s" %(ifo) logging.error(msg) missingData = True if checkSegmentGaps == 'update_times': scienceSegs[ifo] = segments.segmentlist() continue missing = scienceSegs[ifo] - newScienceSegs[ifo] if abs(missing): msg = "From ifo %s we are missing frames covering:" %(ifo) msg += "\n%s" % "\n".join(map(str, missing)) missingData = True logging.error(msg) if checkSegmentGaps == 'update_times': # Remove missing time, so that we can carry on if desired logging.info("Updating science segments for ifo %s." %(ifo)) scienceSegs[ifo] = scienceSegs[ifo] - missing if checkSegmentGaps == 'raise_error' and missingData: raise ValueError("Workflow cannot find needed data, exiting.") logging.info("Done checking, any discrepancies are reported above.") elif checkSegmentGaps == 'no_test': pass else: errMsg = "checkSegmentGaps kwarg must take a value from 'no_test', " errMsg += "'warn', 'update_times' or 'raise_error'." raise ValueError(errMsg) # Do all of the frame files that were returned actually exist? if checkFramesExist in ['warn','update_times','raise_error']: logging.info("Verifying that all frames exist on disk.") missingFrSegs, missingFrames = \ get_missing_segs_from_frame_file_cache(datafindcaches) missingFlag = False for ifo in missingFrames.keys(): # If no data in the input then do nothing if not scienceSegs[ifo]: continue # If using a backup server, does the frame exist remotely? if using_backup_server: # WARNING: This will be slow, but hopefully it will not occur # for too many frames. This could be optimized if # it becomes necessary. new_list = [] for frame in missingFrames[ifo]: for dfout in datafindouts: dfout_pfns = list(dfout.pfns) dfout_urls = [a.url for a in dfout_pfns] if frame.url in dfout_urls: pfn = dfout_pfns[dfout_urls.index(frame.url)] dfout.removePFN(pfn) if len(dfout.pfns) == 0: new_list.append(frame) else: msg = "Frame %s not found locally. "\ %(frame.url,) msg += "Replacing with remote url(s) %s." \ %(str([a.url for a in dfout.pfns]),) logging.info(msg) break else: new_list.append(frame) missingFrames[ifo] = new_list if missingFrames[ifo]: msg = "From ifo %s we are missing the following frames:" %(ifo) msg +='\n'.join([a.url for a in missingFrames[ifo]]) missingFlag = True logging.error(msg) if checkFramesExist == 'update_times': # Remove missing times, so that we can carry on if desired logging.info("Updating science times for ifo %s." %(ifo)) scienceSegs[ifo] = scienceSegs[ifo] - missingFrSegs[ifo] if checkFramesExist == 'raise_error' and missingFlag: raise ValueError("Workflow cannot find all frames, exiting.") logging.info("Finished checking frames.") elif checkFramesExist == 'no_test': pass else: errMsg = "checkFramesExist kwarg must take a value from 'no_test', " errMsg += "'warn', 'update_times' or 'raise_error'." raise ValueError(errMsg) # Check if there are cases where frames exist, but no entry in the segment # summary table are present. if checkSegmentSummary in ['warn', 'raise_error']: logging.info("Checking the segment summary table against frames.") dfScienceSegs = get_science_segs_from_datafind_outs(datafindcaches) missingFlag = False # NOTE: Should this be overrideable in the config file? sci_seg_name = "SCIENCE" if seg_file is None: err_msg = "You must provide the science segments SegFile object " err_msg += "if using the datafind-check-segment-summary option." raise ValueError(err_msg) if seg_file.seg_summ_dict is None: err_msg = "The provided science segments SegFile object must " err_msg += "contain a valid segment_summary table if using the " err_msg += "datafind-check-segment-summary option." raise ValueError(err_msg) seg_summary_times = seg_file.seg_summ_dict for ifo in dfScienceSegs.keys(): curr_seg_summ_times = seg_summary_times[ifo + ":" + sci_seg_name] missing = (dfScienceSegs[ifo] & seg_file.valid_segments) missing.coalesce() missing = missing - curr_seg_summ_times missing.coalesce() scienceButNotFrame = scienceSegs[ifo] - dfScienceSegs[ifo] scienceButNotFrame.coalesce() missing2 = scienceSegs[ifo] - scienceButNotFrame missing2.coalesce() missing2 = missing2 - curr_seg_summ_times missing2.coalesce() if abs(missing): msg = "From ifo %s the following times have frames, " %(ifo) msg += "but are not covered in the segment summary table." msg += "\n%s" % "\n".join(map(str, missing)) logging.error(msg) missingFlag = True if abs(missing2): msg = "From ifo %s the following times have frames, " %(ifo) msg += "are science, and are not covered in the segment " msg += "summary table." msg += "\n%s" % "\n".join(map(str, missing2)) logging.error(msg) missingFlag = True if checkSegmentSummary == 'raise_error' and missingFlag: errMsg = "Segment_summary discrepancy detected, exiting." raise ValueError(errMsg) elif checkSegmentSummary == 'no_test': pass else: errMsg = "checkSegmentSummary kwarg must take a value from 'no_test', " errMsg += "'warn', or 'raise_error'." raise ValueError(errMsg) # Now need to create the file for SCIENCE_AVAILABLE sci_avlble_dict = segments.segmentlistdict() # NOTE: Should this be overrideable in the config file? sci_avlble_name = "SCIENCE_AVAILABLE" for ifo in scienceSegs.keys(): sci_avlble_dict[ifo + ':' + sci_avlble_name] = scienceSegs[ifo] sci_avlble_file = SegFile.from_segment_list_dict('SCIENCE_AVAILABLE', sci_avlble_dict, ifo_list = scienceSegs.keys(), valid_segment=workflow.analysis_time, extension='.xml', tags=tags, directory=outputDir) logging.info("Leaving datafind module") return FileList(datafindouts), sci_avlble_file, scienceSegs, sci_avlble_name
def setup_timeslides_workflow(workflow, output_dir=None, tags=[], timeSlideSectionName='ligolw_tisi'): ''' Setup generation of time_slide input files in the workflow. Currently used only with ligolw_tisi to generate files containing the list of slides to be performed in each time slide job. Parameters ----------- workflow : pycbc.workflow.core.Workflow The Workflow instance that the coincidence jobs will be added to. output_dir : path The directory in which output files will be stored. tags : list of strings (optional, default = []) A list of the tagging strings that will be used for all jobs created by this call to the workflow. This will be used in output names. timeSlideSectionName : string (optional, default='injections') The string that corresponds to the option describing the exe location in the [executables] section of the .ini file and that corresponds to the section (and sub-sections) giving the options that will be given to the code at run time. Returns -------- timeSlideOuts : pycbc.workflow.core.FileList The list of time slide files created by this call. ''' logging.info("Entering time slides setup module.") make_analysis_dir(output_dir) # Get ifo list and full analysis segment for output file naming ifoList = workflow.ifos ifo_string = workflow.ifo_string fullSegment = workflow.analysis_time # Identify which time-slides to do by presence of sub-sections in the # configuration file all_sec = workflow.cp.sections() timeSlideSections = [sec for sec in all_sec if sec.startswith('tisi-')] timeSlideTags = [(sec.split('-')[-1]).upper() for sec in timeSlideSections] timeSlideOuts = FileList([]) # FIXME: Add ability to specify different exes # Make the timeSlideFiles for timeSlideTag in timeSlideTags: currTags = tags + [timeSlideTag] timeSlideMethod = workflow.cp.get_opt_tags("workflow-timeslides", "timeslides-method", currTags) if timeSlideMethod in ["IN_WORKFLOW", "AT_RUNTIME"]: timeSlideExeTag = workflow.cp.get_opt_tags("workflow-timeslides", "timeslides-exe", currTags) timeSlideExe = select_generic_executable(workflow, timeSlideExeTag) timeSlideJob = timeSlideExe(workflow.cp, timeSlideExeTag, ifos=ifo_string, tags=currTags, out_dir=output_dir) timeSlideNode = timeSlideJob.create_node(fullSegment) if timeSlideMethod == "AT_RUNTIME": workflow.execute_node(timeSlideNode) else: workflow.add_node(timeSlideNode) tisiOutFile = timeSlideNode.output_files[0] elif timeSlideMethod == "PREGENERATED": timeSlideFilePath = workflow.cp.get_opt_tags("workflow-timeslides", "timeslides-pregenerated-file", currTags) file_url = urlparse.urljoin('file:', urllib.pathname2url(\ timeSlideFilePath)) tisiOutFile = File(ifoString, 'PREGEN_TIMESLIDES', fullSegment, file_url, tags=currTags) timeSlideOuts.append(tisiOutFile) return timeSlideOuts
def get_segments_file(workflow, name, option_name, out_dir): """Get cumulative segments from option name syntax for each ifo. Use syntax of configparser string to define the resulting segment_file e.x. option_name = +up_flag1,+up_flag2,+up_flag3,-down_flag1,-down_flag2 Each ifo may have a different string and is stored separately in the file. Flags which add time must precede flags which subtract time. Parameters ---------- workflow: pycbc.workflow.Workflow name: string Name of the segment list being created option_name: str Name of option in the associated config parser to get the flag list returns -------- seg_file: pycbc.workflow.SegFile SegFile intance that points to the segment xml file on disk. """ from pycbc.dq import query_str make_analysis_dir(out_dir) cp = workflow.cp start = workflow.analysis_time[0] end = workflow.analysis_time[1] # Check for veto definer file veto_definer = None if cp.has_option("workflow-segments", "segments-veto-definer-url"): veto_definer = save_veto_definer(workflow.cp, out_dir, []) # Check for provided server server = "https://segments.ligo.org" if cp.has_option("workflow-segments", "segments-database-url"): server = cp.get("workflow-segments", "segments-database-url") source = "any" if cp.has_option("workflow-segments", "segments-source"): source = cp.get("workflow-segments", "segments-source") if source == "file": local_file_path = \ resolve_url(cp.get("workflow-segments", option_name+"-file")) pfn = os.path.join(out_dir, os.path.basename(local_file_path)) shutil.move(local_file_path, pfn) return SegFile.from_segment_xml(pfn) segs = {} for ifo in workflow.ifos: flag_str = cp.get_opt_tags("workflow-segments", option_name, [ifo]) key = ifo + ':' + name segs[key] = query_str(ifo, flag_str, start, end, source=source, server=server, veto_definer=veto_definer) logging.info("%s: got %s flags", ifo, option_name) return SegFile.from_segment_list_dict(name, segs, extension='.xml', valid_segment=workflow.analysis_time, directory=out_dir)
def setup_tmpltbank_workflow(workflow, science_segs, datafind_outs, output_dir=None, psd_files=None, tags=None, return_format=None): ''' Setup template bank section of CBC workflow. This function is responsible for deciding which of the various template bank workflow generation utilities should be used. Parameters ---------- workflow: pycbc.workflow.core.Workflow An instanced class that manages the constructed workflow. science_segs : Keyed dictionary of glue.segmentlist objects scienceSegs[ifo] holds the science segments to be analysed for each ifo. datafind_outs : pycbc.workflow.core.FileList The file list containing the datafind files. output_dir : path string The directory where data products will be placed. psd_files : pycbc.workflow.core.FileList The file list containing predefined PSDs, if provided. tags : list of strings If given these tags are used to uniquely name and identify output files that would be produced in multiple calls to this function. Returns -------- tmplt_banks : pycbc.workflow.core.FileList The FileList holding the details of all the template bank jobs. ''' if tags is None: tags = [] logging.info("Entering template bank generation module.") make_analysis_dir(output_dir) cp = workflow.cp # Parse for options in ini file tmpltbankMethod = cp.get_opt_tags("workflow-tmpltbank", "tmpltbank-method", tags) # There can be a large number of different options here, for e.g. to set # up fixed bank, or maybe something else if tmpltbankMethod == "PREGENERATED_BANK": logging.info("Setting template bank from pre-generated bank(s).") tmplt_banks = setup_tmpltbank_pregenerated(workflow, tags=tags) # Else we assume template banks will be generated in the workflow elif tmpltbankMethod == "WORKFLOW_INDEPENDENT_IFOS": logging.info("Adding template bank jobs to workflow.") if cp.has_option_tags("workflow-tmpltbank", "tmpltbank-link-to-matchedfilter", tags): if not cp.has_option_tags("workflow-matchedfilter", "matchedfilter-link-to-tmpltbank", tags): errMsg = "If using tmpltbank-link-to-matchedfilter, you should " errMsg = "also use matchedfilter-link-to-tmpltbank." logging.warn(errMsg) linkToMatchedfltr = True else: linkToMatchedfltr = False if cp.has_option_tags("workflow-tmpltbank", "tmpltbank-compatibility-mode", tags): if not linkToMatchedfltr: errMsg = "Compatibility mode requires that the " errMsg += "tmpltbank-link-to-matchedfilter option is also set." raise ValueError(errMsg) if not cp.has_option_tags("workflow-matchedfilter", "matchedfilter-compatibility-mode", tags): errMsg = "If using compatibility mode it must be set both in " errMsg += "the template bank and matched-filtering stages." raise ValueError(errMsg) compatibility_mode = True else: compatibility_mode = False tmplt_banks = setup_tmpltbank_dax_generated( workflow, science_segs, datafind_outs, output_dir, tags=tags, link_to_matchedfltr=linkToMatchedfltr, compatibility_mode=compatibility_mode, psd_files=psd_files) elif tmpltbankMethod == "WORKFLOW_INDEPENDENT_IFOS_NODATA": logging.info("Adding template bank jobs to workflow.") tmplt_banks = setup_tmpltbank_without_frames(workflow, output_dir, tags=tags, independent_ifos=True, psd_files=psd_files) elif tmpltbankMethod == "WORKFLOW_NO_IFO_VARIATION_NODATA": logging.info("Adding template bank jobs to workflow.") tmplt_banks = setup_tmpltbank_without_frames(workflow, output_dir, tags=tags, independent_ifos=False, psd_files=psd_files) else: errMsg = "Template bank method not recognized. Must be either " errMsg += "PREGENERATED_BANK, WORKFLOW_INDEPENDENT_IFOS " errMsg += "or WORKFLOW_INDEPENDENT_IFOS_NODATA." raise ValueError(errMsg) # Check the format of the input template bank file and return it in # the format requested as per return_format, provided a conversion # between the two specific formats has been implemented. Currently, # a conversion from xml.gz or xml to hdf is supported, but not vice # versa. If a return_format is not specified the function returns # the bank in the format as it was inputted. tmplt_bank_filename = tmplt_banks[0].name ext = tmplt_bank_filename.split('.', 1)[1] logging.info("Input bank is a %s file", ext) if return_format is None: tmplt_banks_return = tmplt_banks elif return_format in ('hdf', 'h5', 'hdf5'): if ext in ('hdf', 'h5', 'hdf5') or ext in ('xml.gz', 'xml'): tmplt_banks_return = pycbc.workflow.convert_bank_to_hdf( workflow, tmplt_banks, "bank") else: if ext == return_format: tmplt_banks_return = tmplt_banks else: raise NotImplementedError("{0} to {1} conversion is not " "supported.".format(ext, return_format)) logging.info("Leaving template bank generation module.") return tmplt_banks_return
def setup_coincidence_workflow(workflow, segsList, timeSlideFiles, inspiral_outs, output_dir, veto_cats=[2, 3, 4], tags=[], timeSlideTags=None): ''' This function aims to be the gateway for setting up a set of coincidence jobs in a workflow. The goal is that this function can support a number of different ways/codes that could be used for doing this. For now it only supports ligolw_sstinca. Parameters ----------- workflow : pycbc.workflow.core.Workflow The Workflow instance that the coincidence jobs will be added to. segsList : pycbc.workflow.core.FileList The list of files returned by workflow's segment module that contains pointers to all the segment files generated in the workflow. If the coincidence code will be applying the data quality vetoes, then this will be used to ensure that the codes get the necessary input to do this. timeSlideFiles : pycbc.workflow.core.FileList An FileList of the timeSlide input files that are needed to determine what time sliding needs to be done if the coincidence code will be running time slides to facilitate background computations later in the workflow. inspiral_outs : pycbc.workflow.core.FileList An FileList of the matched-filter module output that is used as input to the coincidence codes running at this stage. output_dir : path The directory in which coincidence output will be stored. veto_cats : list of ints (optional, default = [2,3,4]) Veto categories that will be applied in the coincidence jobs. If this takes the default value the code will run data quality at cumulative categories 2, 3 and 4. Note that if we change the flag definitions to be non-cumulative then this option will need to be revisited. tags : list of strings (optional, default = []) A list of the tagging strings that will be used for all jobs created by this call to the workflow. An example might be ['BNSINJECTIONS'] or ['NOINJECTIONANALYSIS']. This will be used in output names. timeSlideTags : list of strings (optional, default = []) A list of the tags corresponding to the timeSlideFiles that are to be used in this call to the module. This can be used to ensure that the injection runs do no time sliding, but the no-injection runs do perform time slides (or vice-versa if you prefer!) Returns -------- coinc_outs : pycbc.workflow.core.FileList A list of the *final* outputs of the coincident stage. This *does not* include any intermediate products produced within the workflow. If you require access to intermediate products call the various sub-functions in this module directly. ''' logging.info('Entering coincidence setup module.') make_analysis_dir(output_dir) # Parse for options in .ini file coincidenceMethod = workflow.cp.get_opt_tags("workflow-coincidence", "coincidence-method", tags) # Scope here for adding different options/methods here. For now we only # have the single_stage ihope method which consists of using ligolw_add # to create a large job for coincidence and then running ligolw_thinca # on that output. if coincidenceMethod == "WORKFLOW_DISCRETE_SLIDES": # If I am doing exact match I can parallelize these jobs and reduce # memory footprint. This will require all input inspiral jobs to have # a JOB%d tag to distinguish between them. if workflow.cp.has_option_tags("workflow-coincidence", "coincidence-exact-match-parallelize", tags): parallelize_split_input = True else: parallelize_split_input = False # If you want the ligolw_add outputs, call this function directly coinc_outs, other_outs = setup_coincidence_workflow_ligolw_thinca( workflow, segsList, timeSlideFiles, inspiral_outs, output_dir, veto_cats=veto_cats, tags=tags, timeSlideTags=timeSlideTags, parallelize_split_input=parallelize_split_input) else: errMsg = "Coincidence method not recognized. Must be one of " errMsg += "WORKFLOW_DISCRETE_SLIDES (currently only one option)." raise ValueError(errMsg) logging.info('Leaving coincidence setup module.') return coinc_outs, other_outs
def setup_analysislogging(workflow, segs_list, insps, args, output_dir, program_name="workflow", tags=[]): """ This module sets up the analysis logging xml file that contains the following information: * Command line arguments that the code was run with * Segment list of times marked as SCIENCE * Segment list of times marked as SCIENCE and "OK" ie. not CAT_1 vetoed * Segment list of times marked as SCIENCE_OK and present on the cluster * The times that will be analysed by the matched-filter jobs Parameters ----------- workflow : pycbc.workflow.core.Workflow The Workflow instance. segs_list : pycbc.workflow.core.FileList A list of Files containing the information needed to generate the segments above. For segments generated at run time the associated segmentlist is a property of this object. insps : pycbc.workflow.core.FileList The output files from the matched-filtering module. Used to identify what times have been analysed in this workflow. output_dir : path Directory to output any files to. program_name : string (optional, default = "workflow") The program name to stick in the process/process_params tables. tags : list (optional, default = []) If given restrict to considering inspiral and segment files that are tagged with all tags in this list. """ logging.info("Entering analysis logging module.") make_analysis_dir(output_dir) # Construct the summary XML file outdoc = ligolw.Document() outdoc.appendChild(ligolw.LIGO_LW()) # Add process and process_params tables proc_id = process.register_to_xmldoc(outdoc, program_name, vars(args) ).process_id # Now add the various segment lists to this file summ_segs = segmentlist([workflow.analysis_time]) # If tags is given filter by tags if tags: for tag in tags: segs_list = segs_list.find_output_with_tag(tag) insps = insps.find_output_with_tag(tag) for ifo in workflow.ifos: # Lets get the segment lists we need seg_ifo_files = segs_list.find_output_with_ifo(ifo) # SCIENCE sci_seg_file = seg_ifo_files.find_output_with_tag('SCIENCE') if len(sci_seg_file) == 1: sci_seg_file = sci_seg_file[0] sci_segs = sci_seg_file.segmentList sci_def_id = segmentdb_utils.add_to_segment_definer(outdoc, proc_id, ifo, "CBC_WORKFLOW_SCIENCE", 0) segmentdb_utils.add_to_segment(outdoc, proc_id, sci_def_id, sci_segs) segmentdb_utils.add_to_segment_summary(outdoc, proc_id, sci_def_id, summ_segs, comment='') elif sci_seg_file: # FIXME: While the segment module is still fractured (#127) this # may not work. Please update when #127 is resolved pass #err_msg = "Got %d files matching %s and %s. Expected 1 or 0." \ # %(len(sci_seg_file), ifo, 'SCIENCE') #raise ValueError(err_msg) # SCIENCE_OK sci_ok_seg_file = seg_ifo_files.find_output_with_tag('SCIENCE_OK') if len(sci_ok_seg_file) == 1: sci_ok_seg_file = sci_ok_seg_file[0] sci_ok_segs = sci_ok_seg_file.segmentList sci_ok_def_id = segmentdb_utils.add_to_segment_definer(outdoc, proc_id, ifo, "CBC_WORKFLOW_SCIENCE_OK", 0) segmentdb_utils.add_to_segment(outdoc, proc_id, sci_ok_def_id, sci_ok_segs) segmentdb_utils.add_to_segment_summary(outdoc, proc_id, sci_ok_def_id, summ_segs, comment='') elif sci_ok_seg_file: # FIXME: While the segment module is still fractured (#127) this # may not work. Please update when #127 is resolved pass #err_msg = "Got %d files matching %s and %s. Expected 1 or 0." \ # %(len(sci_ok_seg_file), ifo, 'SCIENCE_OK') #raise ValueError(err_msg) # SCIENCE_AVAILABLE sci_available_seg_file = seg_ifo_files.find_output_with_tag(\ 'SCIENCE_AVAILABLE') if len(sci_available_seg_file) == 1: sci_available_seg_file = sci_available_seg_file[0] sci_available_segs = sci_available_seg_file.segmentList sci_available_def_id = segmentdb_utils.add_to_segment_definer(\ outdoc, proc_id, ifo, "CBC_WORKFLOW_SCIENCE_AVAILABLE", 0) segmentdb_utils.add_to_segment(outdoc, proc_id, sci_available_def_id, sci_available_segs) segmentdb_utils.add_to_segment_summary(outdoc, proc_id, sci_available_def_id, summ_segs, comment='') elif sci_available_seg_file: # FIXME: While the segment module is still fractured (#127) this # may not work. Please update when #127 is resolved pass #err_msg = "Got %d files matching %s and %s. Expected 1 or 0." \ # %(len(sci_available_seg_file), ifo, 'SCIENCE_AVAILABLE') #raise ValueError(err_msg) # ANALYSABLE - This one needs to come from inspiral outs ifo_insps = insps.find_output_with_ifo(ifo) analysable_segs = ifo_insps.get_times_covered_by_files() analysable_def_id = segmentdb_utils.add_to_segment_definer(outdoc, proc_id, ifo, "CBC_WORKFLOW_ANALYSABLE", 0) segmentdb_utils.add_to_segment(outdoc, proc_id, analysable_def_id, analysable_segs) segmentdb_utils.add_to_segment_summary(outdoc, proc_id, analysable_def_id, summ_segs, comment='') summ_file = File(workflow.ifos, "WORKFLOW_SUMMARY", workflow.analysis_time, extension=".xml", directory=output_dir) summ_file.PFN(summ_file.storage_path, site='local') utils.write_filename(outdoc, summ_file.storage_path) return FileList([summ_file])
def setup_matchedfltr_workflow(workflow, science_segs, datafind_outs, tmplt_banks, output_dir=None, injection_file=None, tags=None): ''' This function aims to be the gateway for setting up a set of matched-filter jobs in a workflow. This function is intended to support multiple different ways/codes that could be used for doing this. For now the only supported sub-module is one that runs the matched-filtering by setting up a serious of matched-filtering jobs, from one executable, to create matched-filter triggers covering the full range of science times for which there is data and a template bank file. Parameters ----------- Workflow : pycbc.workflow.core.Workflow The workflow instance that the coincidence jobs will be added to. science_segs : ifo-keyed dictionary of glue.segments.segmentlist instances The list of times that are being analysed in this workflow. datafind_outs : pycbc.workflow.core.FileList An FileList of the datafind files that are needed to obtain the data used in the analysis. tmplt_banks : pycbc.workflow.core.FileList An FileList of the template bank files that will serve as input in this stage. output_dir : path The directory in which output will be stored. injection_file : pycbc.workflow.core.File, optional (default=None) If given the file containing the simulation file to be sent to these jobs on the command line. If not given no file will be sent. tags : list of strings (optional, default = []) A list of the tagging strings that will be used for all jobs created by this call to the workflow. An example might be ['BNSINJECTIONS'] or ['NOINJECTIONANALYSIS']. This will be used in output names. Returns ------- inspiral_outs : pycbc.workflow.core.FileList A list of output files written by this stage. This *will not* contain any intermediate products produced within this stage of the workflow. If you require access to any intermediate products produced at this stage you can call the various sub-functions directly. ''' if tags is None: tags = [] logging.info("Entering matched-filtering setup module.") make_analysis_dir(output_dir) cp = workflow.cp # Parse for options in .ini file mfltrMethod = cp.get_opt_tags("workflow-matchedfilter", "matchedfilter-method", tags) # Could have a number of choices here if mfltrMethod == "WORKFLOW_INDEPENDENT_IFOS": logging.info("Adding matched-filter jobs to workflow.") if cp.has_option_tags("workflow-matchedfilter", "matchedfilter-link-to-tmpltbank", tags): if not cp.has_option_tags("workflow-tmpltbank", "tmpltbank-link-to-matchedfilter", tags): errMsg = "If using matchedfilter-link-to-tmpltbank, you should " errMsg += "also use tmpltbank-link-to-matchedfilter." logging.warn(errMsg) linkToTmpltbank = True else: linkToTmpltbank = False if cp.has_option_tags("workflow-matchedfilter", "matchedfilter-compatibility-mode", tags): if not linkToTmpltbank: errMsg = "Compatibility mode requires that the " errMsg += "matchedfilter-link-to-tmpltbank option is also set." raise ValueError(errMsg) if not cp.has_option_tags("workflow-tmpltbank", "tmpltbank-compatibility-mode", tags): errMsg = "If using compatibility mode it must be set both in " errMsg += "the template bank and matched-filtering stages." raise ValueError(errMsg) compatibility_mode = True else: compatibility_mode = False inspiral_outs = setup_matchedfltr_dax_generated( workflow, science_segs, datafind_outs, tmplt_banks, output_dir, injection_file=injection_file, tags=tags, link_to_tmpltbank=linkToTmpltbank, compatibility_mode=compatibility_mode) elif mfltrMethod == "WORKFLOW_MULTIPLE_IFOS": logging.info("Adding matched-filter jobs to workflow.") inspiral_outs = setup_matchedfltr_dax_generated_multi( workflow, science_segs, datafind_outs, tmplt_banks, output_dir, injection_file=injection_file, tags=tags) else: errMsg = "Matched filter method not recognized. Must be one of " errMsg += "WORKFLOW_INDEPENDENT_IFOS (currently only one option)." raise ValueError(errMsg) logging.info("Leaving matched-filtering setup module.") return inspiral_outs
def setup_injection_workflow(workflow, output_dir=None, inj_section_name='injections', exttrig_file=None, tags=None): """ This function is the gateway for setting up injection-generation jobs in a workflow. It should be possible for this function to support a number of different ways/codes that could be used for doing this, however as this will presumably stay as a single call to a single code (which need not be inspinj) there are currently no subfunctions in this moudle. Parameters ----------- workflow : pycbc.workflow.core.Workflow The Workflow instance that the coincidence jobs will be added to. output_dir : path The directory in which injection files will be stored. inj_section_name : string (optional, default='injections') The string that corresponds to the option describing the exe location in the [executables] section of the .ini file and that corresponds to the section (and sub-sections) giving the options that will be given to the code at run time. tags : list of strings (optional, default = []) A list of the tagging strings that will be used for all jobs created by this call to the workflow. This will be used in output names. Returns -------- inj_files : pycbc.workflow.core.FileList The list of injection files created by this call. inj_tags : list of strings The tag corresponding to each injection file and used to uniquely identify them. The FileList class contains functions to search based on tags. """ if tags is None: tags = [] logging.info("Entering injection module.") make_analysis_dir(output_dir) # Get full analysis segment for output file naming full_segment = workflow.analysis_time ifos = workflow.ifos # Identify which injections to do by presence of sub-sections in # the configuration file inj_tags = [] inj_files = FileList([]) for section in workflow.cp.get_subsections(inj_section_name): inj_tag = section.upper() curr_tags = tags + [inj_tag] # Parse for options in ini file injection_method = workflow.cp.get_opt_tags("workflow-injections", "injections-method", curr_tags) if injection_method in ["IN_WORKFLOW", "AT_RUNTIME"]: # FIXME: Add ability to specify different exes inj_job = LalappsInspinjExecutable(workflow.cp, inj_section_name, out_dir=output_dir, ifos='HL', tags=curr_tags) node = inj_job.create_node(full_segment) if injection_method == "AT_RUNTIME": workflow.execute_node(node) else: workflow.add_node(node) inj_file = node.output_files[0] inj_files.append(inj_file) elif injection_method == "PREGENERATED": file_attrs = { 'ifos': ['HL'], 'segs': full_segment, 'tags': curr_tags } injection_path = workflow.cp.get_opt_tags( "workflow-injections", "injections-pregenerated-file", curr_tags) curr_file = resolve_url_to_file(injection_path, attrs=file_attrs) inj_files.append(curr_file) elif injection_method in ["IN_COH_PTF_WORKFLOW", "AT_COH_PTF_RUNTIME"]: inj_job = LalappsInspinjExecutable(workflow.cp, inj_section_name, out_dir=output_dir, ifos=ifos, tags=curr_tags) node = inj_job.create_node(full_segment, exttrig_file) if injection_method == "AT_COH_PTF_RUNTIME": workflow.execute_node(node) else: workflow.add_node(node) inj_file = node.output_files[0] if workflow.cp.has_option("workflow-injections", "em-bright-only"): em_filter_job = PycbcDarkVsBrightInjectionsExecutable( workflow.cp, 'em_bright_filter', tags=curr_tags, out_dir=output_dir, ifos=ifos) node = em_filter_job.create_node(inj_file, full_segment, curr_tags) if injection_method == "AT_COH_PTF_RUNTIME": workflow.execute_node(node) else: workflow.add_node(node) inj_file = node.output_files[0] if workflow.cp.has_option("workflow-injections", "do-jitter-skyloc"): jitter_job = LigolwCBCJitterSkylocExecutable( workflow.cp, 'jitter_skyloc', tags=curr_tags, out_dir=output_dir, ifos=ifos) node = jitter_job.create_node(inj_file, full_segment, curr_tags) if injection_method == "AT_COH_PTF_RUNTIME": workflow.execute_node(node) else: workflow.add_node(node) inj_file = node.output_files[0] if workflow.cp.has_option("workflow-injections", "do-align-total-spin"): align_job = LigolwCBCAlignTotalSpinExecutable( workflow.cp, 'align_total_spin', tags=curr_tags, out_dir=output_dir, ifos=ifos) node = align_job.create_node(inj_file, full_segment, curr_tags) if injection_method == "AT_COH_PTF_RUNTIME": workflow.execute_node(node) else: workflow.add_node(node) inj_file = node.output_files[0] inj_files.append(inj_file) else: err = "Injection method must be one of IN_WORKFLOW, " err += "AT_RUNTIME or PREGENERATED. Got %s." % (injection_method) raise ValueError(err) inj_tags.append(inj_tag) logging.info("Leaving injection module.") return inj_files, inj_tags