Пример #1
0
def make_singles_plot(workflow, trig_files, bank_file, veto_file, veto_name,
                     out_dir, exclude=None, require=None, tags=None):
    tags = [] if tags is None else tags
    makedir(out_dir)
    secs = requirestr(workflow.cp.get_subsections('plot_singles'), require)
    secs = excludestr(secs, exclude)
    files = FileList([])
    for tag in secs:
        for trig_file in trig_files:
            node = PlotExecutable(workflow.cp, 'plot_singles',
                        ifos=trig_file.ifo,
                        out_dir=out_dir,
                        tags=[tag] + tags).create_node()

            node.add_input_opt('--bank-file', bank_file)
            if veto_file is not None:
                node.add_input_opt('--veto-file', veto_file)
                node.add_opt('--segment-name', veto_name)
            node.add_opt('--detector', trig_file.ifo)
            node.add_input_opt('--single-trig-file', trig_file)
            node.new_output_file_opt(trig_file.segment, '.png', '--output-file')
            workflow += node
            files += node.output_files
    return files
Пример #2
0
def setup_postprocprep_pipedown_workflow(workflow, coincFiles, output_dir,
                                      tags=[], do_repop=False, 
                                      injectionFiles=None,
                                      vetoFiles=None, injLessTag=None,
                                      injectionTags=[], veto_cats=[]):
    """
    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
    coincFiles : pycbc.workflow.core.FileList
        An FileList of the coincident trigger files that are used as
        input at this stage.
    output_dir : path
        The directory in which output files will be stored.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['POSTPROC1'] or
        ['DENTYSNEWPOSTPROC']. This will be used in output names.
    do_repop : Boolean
        If False, use the 'coinc_inspiral.snr' column from the coincident 
        trigger files as clustering and ranking statistic; if True, use
        a repop_coinc job before clustering to calculate a different ranking
        statistic and store in the coinc_inspiral table for later use.
    injectionFiles : pycbc.workflow.core.FileList (optional, default=None)
        The injection files to be used in this stage. An empty list (or any
        other input that evaluates as false) is valid and will imply that no
        injections are being done.
    vetoFiles : pycbc.workflow.core.FileList (required)
        The data quality files to be used in this stage. This is required and
        will be used to determine the analysed times when doing post-processing.
    injLessTag : string (required)
        The tag that identifies files that do not have simulations in them.
        Ie. the primary search results.
    injectionTags : list of strings (optional, default = [])
        Each injection file has a unique tag. If used in the method, this
        tells the post-processing preparation code which injection tags it
        should include when creating the combined output.
    veto_cats : list of integers (optional, default = [])
        Decide which set of veto files should be used in the post-processing
        preparation. For example tell the workflow to only generate results
        at cumulative categories 2, 3 and 4 by supplying [2,3,4] here.

    Returns
    --------
    finalFiles : pycbc.workflow.core.FileList
        A list of the single SQL database storing the clustered, injection
        found, triggers for all injections, time slid and zero lag analyses.
    initialSqlFiles : pycbc.workflow.core.FileList
        The SQL files before clustering is applied and injection finding
        performed.
    clusteredSqlFiles : pycbc.workflow.core.FileList
        The clustered SQL files before injection finding performed.
    combinedSqlFiles : pycbc.workflow.core.FileList
        A combined file containing all triggers after clustering, including
        the injection and veto tables, but before injection finding performed.
        Probably there is no need to ever keep this file and it will be a
        temporary file in most cases.
    """
    if not veto_cats:
        raise ValueError("A non-empty list of veto categories is required.")

    # Setup needed exe classes
    sqliteCombine1ExeTag = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-combiner1-exe", tags)
    sqliteCombine1Exe = select_generic_executable(workflow, 
                                                  sqliteCombine1ExeTag)
    sqliteCombine2ExeTag = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-combiner2-exe", tags)
    sqliteCombine2Exe = select_generic_executable(workflow, 
                                                  sqliteCombine2ExeTag)
    clusterCoincsExeTag = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-cluster-exe", tags)
    clusterCoincsExe = select_generic_executable(workflow, clusterCoincsExeTag)
    injFindExeTag = workflow.cp.get_opt_tags("workflow-postprocprep",
                                   "postprocprep-injfind-exe", tags)
    injFindExe = select_generic_executable(workflow, injFindExeTag)

    sqliteCombine1Outs = FileList([])
    clusterCoincsOuts = FileList([])
    injFindOuts = FileList([])
    sqliteCombine2Outs = FileList([])

    if do_repop:
        repopCoincExeTag = workflow.cp.get_opt_tags("workflow-postprocprep",
                                                "postprocprep-repop-exe", tags)
        repopCoincExe = select_generic_executable(workflow, repopCoincExeTag)
        repopCoincOuts = FileList([])

    for cat in veto_cats:
        # FIXME: Some hacking is still needed while we support pipedown
        # FIXME: There are currently 3 names to say cumulative cat_3
        vetoTag = 'CUMULATIVE_CAT_%d' %(cat)
        dqSegFile = vetoFiles.find_output_with_tag(vetoTag)
        if not len(dqSegFile) == 1:
            errMsg = "Did not find exactly 1 data quality file."
            raise ValueError(errMsg)
        # Don't think this is used here, this is the tag *in* the file
        dqVetoName = 'VETO_CAT%d_CUMULATIVE' %(cat)
        # FIXME: Here we set the dqVetoName to be compatible with pipedown
        pipedownDQVetoName = 'CAT_%d_VETO' %(cat)

        sqliteCombine2Inputs = FileList([])
        # Do injection-less jobs first.

        # Choose a label for clustering the jobs
        job_label = get_random_label()

        # Combine trig files first
        currTags = tags + [injLessTag, vetoTag]
        trigVetoInpFiles = coincFiles.find_output_with_tag(pipedownDQVetoName)
        trigInpFiles = trigVetoInpFiles.find_output_with_tag(injLessTag)
        if len(trigInpFiles) == 0:
            err_msg = "No input files found. Workflow would fail."
            raise ValueError(err_msg)
        trigInpFiles.append(dqSegFile[0])
        sqliteCombine1Job = sqliteCombine1Exe(workflow.cp,
                                              sqliteCombine1ExeTag,
                                              ifo=workflow.ifo_string,
                                              out_dir=output_dir,
                                              tags=currTags)
        sqliteCombine1Node = sqliteCombine1Job.create_node(
                                          workflow.analysis_time, trigInpFiles, 
                                          workflow=workflow)
        sqliteCombine1Node.add_profile('pegasus', 'label', job_label)
        workflow.add_node(sqliteCombine1Node)
        # Node has only one output file
        sqliteCombine1Out = sqliteCombine1Node.output_files[0]
        sqliteCombine1Outs.append(sqliteCombine1Out)

        if do_repop:
            repopCoincJob = repopCoincExe(workflow.cp,
                                          repopCoincExeTag,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir,
                                          tags=currTags)
            repopCoincNode = repopCoincJob.create_node(workflow.analysis_time,
                                                       sqliteCombine1Out)
            repopCoincNode.add_profile('pegasus', 'label', job_label)
            workflow.add_node(repopCoincNode)
            # Node has only one output file
            repopCoincOut = repopCoincNode.output_files[0]
            repopCoincOuts.append(repopCoincOut)

        # Input file plumbing allowing for possible repop_coinc job
        clusterCoincsIn = repopCoincOut if do_repop else sqliteCombine1Out
        # Cluster coincidences
        clusterCoincsJob = clusterCoincsExe(workflow.cp,
                                            clusterCoincsExeTag,
                                            ifo=workflow.ifo_string, 
                                            out_dir=output_dir, 
                                            tags=currTags)
        clusterCoincsNode = clusterCoincsJob.create_node(
                                       workflow.analysis_time, clusterCoincsIn)
        clusterCoincsNode.add_profile('pegasus', 'label', job_label)
        workflow.add_node(clusterCoincsNode)
        # Node has only one output file
        clusterCoincsOut = clusterCoincsNode.output_files[0]
        clusterCoincsOuts.append(clusterCoincsOut)
        sqliteCombine2Inputs.append(clusterCoincsOut)

        # Do injection jobs
        for injTag in injectionTags:
            # Choose a label for clustering the jobs
            job_label = get_random_label()
            # Combine trig files first
            currTags = tags + [injTag, vetoTag]
            trigInpFiles = trigVetoInpFiles.find_output_with_tag(injTag)
            trigInpFiles.append(dqSegFile[0])
            injFile = injectionFiles.find_output_with_tag(injTag)
            assert (len(injFile) == 1)
            sqliteCombine1Job = sqliteCombine1Exe(workflow.cp,
                                                  sqliteCombine1ExeTag,
                                                  ifo=workflow.ifo_string,
                                                  out_dir=output_dir,
                                                  tags=currTags)
            sqliteCombine1Node = sqliteCombine1Job.create_node(
                                          workflow.analysis_time, trigInpFiles,
                                          injFile=injFile[0], injString=injTag,
                                          workflow=workflow)
            sqliteCombine1Node.add_profile('pegasus', 'label', job_label)
            workflow.add_node(sqliteCombine1Node)
            # Node has only one output file
            sqliteCombine1Out = sqliteCombine1Node.output_files[0]
            sqliteCombine1Outs.append(sqliteCombine1Out)

            if do_repop:
                repopCoincJob = repopCoincExe(workflow.cp,
                                          repopCoincExeTag,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir,
                                          tags=currTags)
                repopCoincNode = repopCoincJob.create_node(
                                     workflow.analysis_time, sqliteCombine1Out)
                repopCoincNode.add_profile('pegasus', 'label', job_label)
                workflow.add_node(repopCoincNode)
                # Node has only one output file
                repopCoincOut = repopCoincNode.output_files[0]
                repopCoincOuts.append(repopCoincOut)

            # Input file plumbing allowing for possible repop_coinc job
            clusterCoincsIn = repopCoincOut if do_repop else sqliteCombine1Out
            # Cluster coincidences
            clusterCoincsJob = clusterCoincsExe(workflow.cp,
                                                clusterCoincsExeTag,
                                                ifo=workflow.ifo_string,
                                                out_dir=output_dir,
                                                tags=currTags)
            clusterCoincsNode = clusterCoincsJob.create_node(
                                       workflow.analysis_time, clusterCoincsIn)
            clusterCoincsNode.add_profile('pegasus', 'label', job_label)
            workflow.add_node(clusterCoincsNode)
            # Node has only one output file
            clusterCoincsOut = clusterCoincsNode.output_files[0]
            clusterCoincsOuts.append(clusterCoincsOut)
            sqliteCombine2Inputs.append(clusterCoincsOut)

        # Choose a new label for pegasus-clustering the jobs
        job_label = get_random_label()

        # Combine everything together and add veto file
        currTags = tags + [vetoTag]
        sqliteCombine2Job = sqliteCombine2Exe(workflow.cp, 
                                              sqliteCombine2ExeTag,
                                              ifo=workflow.ifo_string, 
                                              out_dir=output_dir,
                                              tags=currTags)
        sqliteCombine2Node = sqliteCombine2Job.create_node(
                                  workflow.analysis_time, sqliteCombine2Inputs)
        sqliteCombine2Node.add_profile('pegasus', 'label', job_label)
        workflow.add_node(sqliteCombine2Node)
        sqliteCombine2Out = sqliteCombine2Node.output_files[0]
        sqliteCombine2Outs.append(sqliteCombine2Out)

        # Inj finding
        injFindJob = injFindExe(workflow.cp, injFindExeTag,
                                          ifo=workflow.ifo_string,
                                          out_dir=output_dir,tags=currTags)
        injFindNode = injFindJob.create_node(workflow.analysis_time,
                                                         sqliteCombine2Out)
        injFindNode.add_profile('pegasus', 'label', job_label)
        workflow.add_node(injFindNode)
        injFindOut = injFindNode.output_files[0]
        injFindOuts.append(injFindOut)


    return injFindOuts, sqliteCombine1Outs, clusterCoincsOuts,\
           sqliteCombine2Outs
Пример #3
0
def get_coh_PTF_files(cp, ifos, run_dir, bank_veto=False, summary_files=False):
    """
    Retrieve files needed to run coh_PTF jobs within a PyGRB workflow

    Parameters
    ----------
    cp : pycbc.workflow.configuration.WorkflowConfigParser object
    The parsed configuration options of a pycbc.workflow.core.Workflow.

    ifos : str
    String containing the analysis interferometer IDs.

    run_dir : str
    The run directory, destination for retrieved files.

    bank_veto : Boolean
    If true, will retrieve the bank_veto_bank.xml file.

    summary_files : Boolean
    If true, will retrieve the summary page style files.

    Returns
    -------
    file_list : pycbc.workflow.FileList object
    A FileList containing the retrieved files.
    """
    if os.getenv("LAL_SRC") is None:
        raise ValueError("The environment variable LAL_SRC must be set to a "
                         "location containing the file lalsuite.git")
    else:
        lalDir = os.getenv("LAL_SRC")
        sci_seg = segments.segment(int(cp.get("workflow", "start-time")),
                                   int(cp.get("workflow", "end-time")))
        file_list = FileList([])

        # Bank veto
        if bank_veto:
            shutil.copy("%s/lalapps/src/ring/coh_PTF_config_files/" \
                        "bank_veto_bank.xml" % lalDir, "%s" % run_dir)
            bank_veto_url = "file://localhost%s/bank_veto_bank.xml" % run_dir
            bank_veto = File(ifos,
                             "bank_veto_bank",
                             sci_seg,
                             file_url=bank_veto_url)
            bank_veto.PFN(bank_veto.cache_entry.path, site="local")
            file_list.extend(FileList([bank_veto]))

        if summary_files:
            # summary.js file
            shutil.copy("%s/lalapps/src/ring/coh_PTF_config_files/" \
                        "coh_PTF_html_summary.js" % lalDir, "%s" % run_dir)
            summary_js_url = "file://localhost%s/coh_PTF_html_summary.js" \
                             % run_dir
            summary_js = File(ifos,
                              "coh_PTF_html_summary_js",
                              sci_seg,
                              file_url=summary_js_url)
            summary_js.PFN(summary_js.cache_entry.path, site="local")
            file_list.extend(FileList([summary_js]))

            # summary.css file
            shutil.copy("%s/lalapps/src/ring/coh_PTF_config_files/" \
                        "coh_PTF_html_summary.css" % lalDir, "%s" % run_dir)
            summary_css_url = "file://localhost%s/coh_PTF_html_summary.css" \
                              % run_dir
            summary_css = File(ifos,
                               "coh_PTF_html_summary_css",
                               sci_seg,
                               file_url=summary_css_url)
            summary_css.PFN(summary_css.cache_entry.path, site="local")
            file_list.extend(FileList([summary_css]))

        return file_list
Пример #4
0
def setup_interval_coinc(workflow,
                         hdfbank,
                         trig_files,
                         veto_files,
                         veto_names,
                         out_dir,
                         tags=None):
    """
    This function sets up exact match coincidence and background estimation
    using a folded interval technique.
    """
    if tags is None:
        tags = []
    make_analysis_dir(out_dir)
    logging.info('Setting up coincidence')

    if len(hdfbank) > 1:
        raise ValueError('This coincidence method only supports a '
                         'pregenerated template bank')
    hdfbank = hdfbank[0]

    if len(workflow.ifos) > 2:
        raise ValueError(
            'This coincidence method only supports two ifo searches')

    findcoinc_exe = PyCBCFindCoincExecutable(workflow.cp,
                                             'coinc',
                                             ifos=workflow.ifos,
                                             tags=tags,
                                             out_dir=out_dir)

    # Wall time knob and memory knob
    factor = int(
        workflow.cp.get_opt_tags('workflow-coincidence',
                                 'parallelization-factor', tags))

    stat_files = []
    for veto_file, veto_name in zip(veto_files, veto_names):
        bg_files = FileList()
        for i in range(factor):
            group_str = '%s/%s' % (i, factor)
            coinc_node = findcoinc_exe.create_node(trig_files,
                                                   hdfbank,
                                                   veto_file,
                                                   veto_name,
                                                   group_str,
                                                   tags=[veto_name,
                                                         str(i)])
            bg_files += coinc_node.output_files
            workflow.add_node(coinc_node)

        stat_files += [
            setup_statmap(workflow,
                          bg_files,
                          hdfbank,
                          out_dir,
                          tags=tags + [veto_name])
        ]

    logging.info('...leaving coincidence ')
    return stat_files
Пример #5
0
def make_single_template_plots(workflow,
                               segs,
                               seg_name,
                               params,
                               out_dir,
                               inj_file=None,
                               exclude=None,
                               require=None,
                               tags=None,
                               params_str=None,
                               use_exact_inj_params=False):
    tags = [] if tags is None else tags
    makedir(out_dir)
    name = 'single_template_plot'
    secs = requirestr(workflow.cp.get_subsections(name), require)
    secs = excludestr(secs, exclude)
    files = FileList([])
    for tag in secs:
        for ifo in workflow.ifos:
            # Reanalyze the time around the trigger in each detector
            node = PlotExecutable(workflow.cp,
                                  'single_template',
                                  ifos=[ifo],
                                  out_dir=out_dir,
                                  tags=[tag] + tags).create_node()
            if use_exact_inj_params:
                node.add_opt('--use-params-of-closest-injection')
            else:
                node.add_opt('--mass1', "%.6f" % params['mass1'])
                node.add_opt('--mass2', "%.6f" % params['mass2'])
                node.add_opt('--spin1z', "%.6f" % params['spin1z'])
                node.add_opt('--spin2z', "%.6f" % params['spin2z'])
            # str(numpy.float64) restricts to 2d.p. BE CAREFUL WITH THIS!!!
            str_trig_time = '%.6f' % (params[ifo + '_end_time'])
            node.add_opt('--trigger-time', str_trig_time)
            node.add_input_opt('--inspiral-segments', segs[ifo])
            if inj_file is not None:
                node.add_input_opt('--injection-file', inj_file)
            node.add_opt('--segment-name', seg_name)
            node.new_output_file_opt(workflow.analysis_time,
                                     '.hdf',
                                     '--output-file',
                                     store_file=False)
            data = node.output_files[0]
            workflow += node
            # Make the plot for this trigger and detector
            node = PlotExecutable(workflow.cp,
                                  name,
                                  ifos=[ifo],
                                  out_dir=out_dir,
                                  tags=[tag] + tags).create_node()
            node.add_input_opt('--single-template-file', data)
            node.new_output_file_opt(workflow.analysis_time, '.png',
                                     '--output-file')
            title = "'%s SNR and chi^2 timeseries" % (ifo)
            if params_str is not None:
                title += " using %s" % (params_str)
            title += "'"
            node.add_opt('--plot-title', title)
            caption = "'The SNR and chi^2 timeseries around the injection"
            if params_str is not None:
                caption += " using %s" % (params_str)
            if use_exact_inj_params:
                caption += ". The injection itself was used as the template.'"
            else:
                caption += ". The template used has the following parameters: "
                caption += "mass1=%s, mass2=%s, spin1z=%s, spin2z=%s'"\
                       %(params['mass1'], params['mass2'], params['spin1z'],
                         params['spin2z'])
            node.add_opt('--plot-caption', caption)
            workflow += node
            files += node.output_files
    return files
Пример #6
0
def setup_matchedfltr_dax_generated_multi(workflow,
                                          science_segs,
                                          datafind_outs,
                                          tmplt_banks,
                                          output_dir,
                                          injection_file=None,
                                          tags=None,
                                          link_to_tmpltbank=False,
                                          compatibility_mode=False):
    '''
    Setup matched-filter jobs that are generated as part of the workflow in
    which a single job reads in and generates triggers over multiple ifos.
    This
    module can support any matched-filter code that is similar in principle to
    pycbc_multi_inspiral or lalapps_coh_PTF_inspiral, but for new codes some
    additions are needed to define Executable and Job sub-classes
    (see jobutils.py).

    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
    science_segs : ifo-keyed dictionary of glue.segments.segmentlist instances
        The list of times that are being analysed in this workflow.
    datafind_outs : pycbc.workflow.core.FileList
        An FileList of the datafind files that are needed to obtain the
        data used in the analysis.
    tmplt_banks : pycbc.workflow.core.FileList
        An FileList of the template bank files that will serve as input
        in this stage.
    output_dir : path
        The directory in which output will be stored.
    injection_file : pycbc.workflow.core.File, optional (default=None)
        If given the file containing the simulation file to be sent to these
        jobs on the command line. If not given no file will be sent.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['BNSINJECTIONS'] or
        ['NOINJECTIONANALYSIS']. This will be used in output names.

    Returns
    -------
    inspiral_outs : pycbc.workflow.core.FileList
        A list of output files written by this stage. This *will not* contain
        any intermediate products produced within this stage of the workflow.
        If you require access to any intermediate products produced at this
        stage you can call the various sub-functions directly.
    '''
    if tags is None:
        tags = []
    # Need to get the exe to figure out what sections are analysed, what is
    # discarded etc. This should *not* be hardcoded, so using a new executable
    # will require a bit of effort here ....

    cp = workflow.cp
    ifos = science_segs.keys()
    match_fltr_exe = os.path.basename(cp.get('executables', 'inspiral'))

    # List for holding the output
    inspiral_outs = FileList([])

    logging.info("Setting up matched-filtering for %s." % (' '.join(ifos), ))

    if match_fltr_exe == 'lalapps_coh_PTF_inspiral':
        from pylal.legacy_ihope import select_legacy_matchedfilter_class
        exe_class = select_legacy_matchedfilter_class(match_fltr_exe)
        cp.set('inspiral', 'right-ascension', cp.get('workflow', 'ra'))
        cp.set('inspiral', 'declination', cp.get('workflow', 'dec'))
        cp.set('inspiral', 'sky-error', cp.get('workflow', 'sky-error'))
        cp.set('inspiral', 'trigger-time', cp.get('workflow', 'trigger-time'))
        cp.set('inspiral', 'block-duration',
               str(abs(science_segs[ifos[0]][0]) - \
                       2 * int(cp.get('inspiral', 'pad-data'))))

        job_instance = exe_class(workflow.cp,
                                 'inspiral',
                                 ifo=ifos,
                                 out_dir=output_dir,
                                 injection_file=injection_file,
                                 tags=tags)
        multi_ifo_coherent_job_setup(workflow,
                                     inspiral_outs,
                                     job_instance,
                                     science_segs,
                                     datafind_outs,
                                     output_dir,
                                     parents=tmplt_banks)
    else:
        # Select the appropriate class
        raise ValueError("Not currently supported.")
    return inspiral_outs
Пример #7
0
def setup_tmpltbank_pregenerated(workflow, tags=None):
    '''
    Setup CBC workflow to use a pregenerated template bank.
    The bank given in cp.get('workflow','pregenerated-template-bank') will be used
    as the input file for all matched-filtering jobs. If this option is
    present, workflow will assume that it should be used and not generate
    template banks within the workflow.

    Parameters
    ----------
    workflow: pycbc.workflow.core.Workflow
        An instanced class that manages the constructed workflow.
    tags : list of strings
        If given these tags are used to uniquely name and identify output files
        that would be produced in multiple calls to this function.

    Returns
    --------
    tmplt_banks : pycbc.workflow.core.FileList
        The FileList holding the details of the template bank.
    '''
    if tags is None:
        tags = []
    # Currently this uses the *same* fixed bank for all ifos.
    # Maybe we want to add capability to analyse separate banks in all ifos?

    # Set up class for holding the banks
    tmplt_banks = FileList([])

    cp = workflow.cp
    global_seg = workflow.analysis_time
    user_tag = "PREGEN_TMPLTBANK"
    try:
        # First check if we have a bank for all ifos
        pre_gen_bank = cp.get_opt_tags('workflow-tmpltbank',
                                       'tmpltbank-pregenerated-bank', tags)
        pre_gen_bank = resolve_url(pre_gen_bank)
        file_url = urlparse.urljoin('file:', urllib.pathname2url(pre_gen_bank))
        curr_file = File(workflow.ifos,
                         user_tag,
                         global_seg,
                         file_url,
                         tags=tags)
        curr_file.PFN(file_url, site='local')
        tmplt_banks.append(curr_file)
    except ConfigParser.Error:
        # Okay then I must have banks for each ifo
        for ifo in workflow.ifos:
            try:
                pre_gen_bank = cp.get_opt_tags(
                    'workflow-tmpltbank',
                    'tmpltbank-pregenerated-bank-%s' % ifo.lower(), tags)
                pre_gen_bank = resolve_url(pre_gen_bank)
                file_url = urlparse.urljoin('file:',
                                            urllib.pathname2url(pre_gen_bank))
                curr_file = File(ifo,
                                 user_tag,
                                 global_seg,
                                 file_url,
                                 tags=tags)
                curr_file.PFN(file_url, site='local')
                tmplt_banks.append(curr_file)

            except ConfigParser.Error:
                err_msg = "Cannot find pregerated template bank in section "
                err_msg += "[workflow-tmpltbank] or any tagged sections. "
                if tags:
                    tagged_secs = " ".join("[workflow-tmpltbank-%s]" \
                                           %(ifo,) for ifo in workflow.ifos)
                    err_msg += "Tagged sections are %s. " % (tagged_secs, )
                err_msg += "I looked for 'tmpltbank-pregenerated-bank' option "
                err_msg += "and 'tmpltbank-pregenerated-bank-%s'." % (ifo, )
                raise ConfigParser.Error(err_msg)

    return tmplt_banks
Пример #8
0
def setup_tmpltbank_dax_generated(workflow,
                                  science_segs,
                                  datafind_outs,
                                  output_dir,
                                  tags=None,
                                  link_to_matchedfltr=True,
                                  compatibility_mode=False,
                                  psd_files=None):
    '''
    Setup template bank jobs that are generated as part of the CBC workflow.
    This function will add numerous jobs to the CBC workflow using
    configuration options from the .ini file. The following executables are
    currently supported:

    * lalapps_tmpltbank
    * pycbc_geom_nonspin_bank

    Parameters
    ----------
    workflow: pycbc.workflow.core.Workflow
        An instanced class that manages the constructed workflow.
    science_segs : Keyed dictionary of glue.segmentlist objects
        scienceSegs[ifo] holds the science segments to be analysed for each
        ifo.
    datafind_outs : pycbc.workflow.core.FileList
        The file list containing the datafind files.
    output_dir : path string
        The directory where data products will be placed.
    tags : list of strings
        If given these tags are used to uniquely name and identify output files
        that would be produced in multiple calls to this function.
    link_to_matchedfltr : boolean, optional (default=True)
        If this option is given, the job valid_times will be altered so that
        there will be one inspiral file for every template bank and they will
        cover the same time span. Note that this option must also be given
        during matched-filter generation to be meaningful.
    psd_file : pycbc.workflow.core.FileList
        The file list containing predefined PSDs, if provided.

    Returns
    --------
    tmplt_banks : pycbc.workflow.core.FileList
        The FileList holding the details of all the template bank jobs.
    '''
    if tags is None:
        tags = []
    cp = workflow.cp
    # Need to get the exe to figure out what sections are analysed, what is
    # discarded etc. This should *not* be hardcoded, so using a new executable
    # will require a bit of effort here ....

    ifos = science_segs.keys()
    tmplt_bank_exe = os.path.basename(cp.get('executables', 'tmpltbank'))
    # Select the appropriate class
    exe_class = select_tmpltbank_class(tmplt_bank_exe)

    # The exe instance needs to know what data segments are analysed, what is
    # discarded etc. This should *not* be hardcoded, so using a new executable
    # will require a bit of effort here ....

    if link_to_matchedfltr:
        # Use this to ensure that inspiral and tmpltbank jobs overlap. This
        # means that there will be 1 inspiral job for every 1 tmpltbank and
        # the data read in by both will overlap as much as possible. (If you
        # ask the template bank jobs to use 2000s of data for PSD estimation
        # and the matched-filter jobs to use 4000s, you will end up with
        # twice as many matched-filter jobs that still use 4000s to estimate a
        # PSD but then only generate triggers in the 2000s of data that the
        # template bank jobs ran on.
        tmpltbank_exe = os.path.basename(cp.get('executables', 'inspiral'))
        link_exe_instance = select_matchedfilter_class(tmpltbank_exe)
    else:
        link_exe_instance = None

    # Set up class for holding the banks
    tmplt_banks = FileList([])

    # Template banks are independent for different ifos, but might not be!
    # Begin with independent case and add after FIXME
    for ifo in ifos:
        job_instance = exe_class(workflow.cp,
                                 'tmpltbank',
                                 ifo=ifo,
                                 out_dir=output_dir,
                                 tags=tags)
        # Check for the write_psd flag
        if cp.has_option_tags("workflow-tmpltbank", "tmpltbank-write-psd-file",
                              tags):
            job_instance.write_psd = True
        else:
            job_instance.write_psd = False

        if link_exe_instance:
            link_job_instance = link_exe_instance(cp,
                                                  'inspiral',
                                                  ifo=ifo,
                                                  out_dir=output_dir,
                                                  tags=tags)
        else:
            link_job_instance = None
        sngl_ifo_job_setup(workflow,
                           ifo,
                           tmplt_banks,
                           job_instance,
                           science_segs[ifo],
                           datafind_outs,
                           link_job_instance=link_job_instance,
                           allow_overlap=True,
                           compatibility_mode=compatibility_mode)
    return tmplt_banks
Пример #9
0
def convert_cachelist_to_filelist(datafindcache_list):
    """
    Take as input a list of glue.lal.Cache objects and return a pycbc FileList
    containing all frames within those caches.

    Parameters
    -----------
    datafindcache_list : list of glue.lal.Cache objects
        The list of cache files to convert.

    Returns
    --------
    datafind_filelist : FileList of frame File objects
        The list of frame files.
    """
    datafind_filelist = FileList([])
    prev_file = None
    for cache in datafindcache_list:
        curr_ifo = cache.ifo
        for frame in cache:
            # Don't add a new workflow file entry for this frame if
            # if is a duplicate. These are assumed to be returned in time
            # order
            if prev_file:
                prev_name = prev_file.cache_entry.url.split('/')[-1]
                this_name = frame.url.split('/')[-1]
                if prev_name == this_name:
                    continue

            # Pegasus doesn't like "localhost" in URLs.
            frame.url = frame.url.replace('file://localhost', 'file://')

            currFile = File(curr_ifo,
                            frame.description,
                            frame.segment,
                            file_url=frame.url,
                            use_tmp_subdirs=True)
            if frame.url.startswith('file://'):
                currFile.PFN(frame.url, site='local')
                if frame.url.startswith(
                        'file:///cvmfs/oasis.opensciencegrid.org/'):
                    # Datafind returned a URL valid on the osg as well
                    # so add the additional PFNs to allow OSG access.
                    currFile.PFN(frame.url, site='osg')
                    currFile.PFN(frame.url.replace(
                        'file:///cvmfs/oasis.opensciencegrid.org/',
                        'root://xrootd-local.unl.edu/user/'),
                                 site='osg')
                    currFile.PFN(frame.url.replace(
                        'file:///cvmfs/oasis.opensciencegrid.org/',
                        'gsiftp://red-gridftp.unl.edu/user/'),
                                 site='osg')
                    currFile.PFN(frame.url.replace(
                        'file:///cvmfs/oasis.opensciencegrid.org/',
                        'gsiftp://ldas-grid.ligo.caltech.edu/hdfs/'),
                                 site='osg')
            else:
                currFile.PFN(frame.url, site='notlocal')
            datafind_filelist.append(currFile)
            prev_file = currFile
    return datafind_filelist
Пример #10
0
def make_inference_single_parameter_plots(workflow,
                                          inference_file,
                                          output_dir,
                                          variable_args=None,
                                          samples_name="inference_samples",
                                          acf_name="inference_acf",
                                          acl_name="inference_acl",
                                          analysis_seg=None,
                                          tags=None):
    """ Sets up single-parameter plots for inference workflow.

    Parameters
    ----------
    workflow: pycbc.workflow.Workflow
        The core workflow instance we are populating
    inference_file: pycbc.workflow.File
        The file with posterior samples.
    output_dir: str
        The directory to store result plots and files.
    variable_args : list
        A list of parameters to use instead of [variable_args].
    samples_name: str
        The name in the [executables] section of the configuration file
        to use for the plot that shows all samples.
    acf_name: str
        The name in the [executables] section of the configuration file
        to use for the autocorrelation function plot.
    acl_name: str
        The name in the [executables] section of the configuration file
        to use for the autocorrelation length histogram.
    analysis_segs: {None, glue.segments.Segment}
       The segment this job encompasses. If None then use the total analysis
       time from the workflow.
    tags: {None, optional}
        Tags to add to the inference executables.

    Returns
    -------
    files: pycbc.workflow.FileList
        A list of result and output files. 
    """

    # default values
    tags = [] if tags is None else tags
    analysis_seg = workflow.analysis_time \
                       if analysis_seg is None else analysis_seg

    # make the directory that will contain the output files
    makedir(output_dir)

    # list of all output files
    files = FileList()

    # make a set of plots for each parameter
    for arg in variable_args:

        # plot posterior distribution
        corner_files = make_inference_posterior_plot(workflow,
                                                     inference_file,
                                                     output_dir,
                                                     variable_args=[arg],
                                                     analysis_seg=analysis_seg,
                                                     tags=tags + [arg])

        # make a node for plotting all the samples for each walker
        samples_node = PlotExecutable(workflow.cp,
                                      samples_name,
                                      ifos=workflow.ifos,
                                      out_dir=output_dir,
                                      tags=tags + [arg]).create_node()
        samples_node.add_input_opt("--input-file", inference_file)
        samples_node.new_output_file_opt(analysis_seg, ".png", "--output-file")
        samples_node.add_opt("--parameters", arg)

        # make node for plotting the autocorrelation function for each walker
        acf_node = PlotExecutable(workflow.cp,
                                  acf_name,
                                  ifos=workflow.ifos,
                                  out_dir=output_dir,
                                  tags=tags + [arg]).create_node()
        acf_node.add_input_opt("--input-file", inference_file)
        acf_node.new_output_file_opt(analysis_seg, ".png", "--output-file")
        acf_node.add_opt("--parameters", arg)

        # make node for plotting the autocorrelation function for each walker
        acl_node = PlotExecutable(workflow.cp,
                                  acl_name,
                                  ifos=workflow.ifos,
                                  out_dir=output_dir,
                                  tags=tags + [arg]).create_node()
        acl_node.add_input_opt("--input-file", inference_file)
        acl_node.new_output_file_opt(analysis_seg, ".png", "--output-file")
        acl_node.add_opt("--parameters", arg)

        # add nodes to workflow
        workflow += samples_node
        workflow += acf_node
        workflow += acl_node

        # add files to output files list
        files += corner_files
        files += samples_node.output_files
        files += acf_node.output_files
        files += acl_node.output_files

    return files
Пример #11
0
def setup_coincidence_workflow_ligolw_thinca(workflow,
                                             segsList,
                                             timeSlideFiles,
                                             inspiral_outs,
                                             output_dir,
                                             veto_cats=[2, 3, 4],
                                             tags=[],
                                             timeSlideTags=None,
                                             parallelize_split_input=False):
    """
    This function is used to setup a single-stage ihope style coincidence stage
    of the workflow using ligolw_sstinca (or compatible code!).

    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The workflow instance that the coincidence jobs will be added to.
    segsList : pycbc.workflow.core.FileList
        The list of files returned by workflow's segment module that contains
        pointers to all the segment files generated in the workflow. If the
        coincidence code will be applying the data quality vetoes, then this
        will be used to ensure that the codes get the necessary input to do
        this.
    timeSlideFiles : pycbc.workflow.core.FileList
        An FileList of the timeSlide input files that are needed to
        determine what time sliding needs to be done. One of the timeSlideFiles
        will normally be "zero-lag only", the others containing time slides
        used to facilitate background computations later in the workflow.
    inspiral_outs : pycbc.workflow.core.FileList
        An FileList of the matched-filter module output that is used as
        input to the coincidence codes running at this stage.
    output_dir : path
        The directory in which coincidence output will be stored.
    veto_cats : list of ints (optional, default = [2,3,4])
        Veto categories that will be applied in the coincidence jobs. If this
        takes the default value the code will run data quality at cumulative 
        categories 2, 3 and 4. Note that if we change the flag definitions to
        be non-cumulative then this option will need to be revisited.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['BNSINJECTIONS'] or
        ['NOINJECTIONANALYSIS']. This will be used in output names.
    timeSlideTags : list of strings (optional, default = [])
        A list of the tags corresponding to the timeSlideFiles that are to be
        used in this call to the module. This can be used to ensure that the
        injection runs do no time sliding, but the no-injection runs do perform
        time slides (or vice-versa if you prefer!)
    Returns
    --------
    ligolwThincaOuts : pycbc.workflow.core.FileList
        A list of the output files generated from ligolw_sstinca.
    ligolwAddOuts : pycbc.workflow.core.FileList
        A list of the output files generated from ligolw_add.
    """
    from pylal import ligolw_cafe

    logging.debug("Entering coincidence module.")
    cp = workflow.cp
    ifoString = workflow.ifo_string

    # setup code for each veto_category

    coinc_outs = FileList([])
    other_outs = {}

    if not timeSlideTags:
        # Get all sections by looking in ini file, use all time slide files.
        timeSlideTags = [(sec.split('-')[-1]).upper()
                         for sec in workflow.cp.sections()
                         if sec.startswith('tisi-')]

    if parallelize_split_input:
        # Want to split all input jobs according to their JOB%d tag.
        # This matches any string that is the letters JOB followed by some
        # numbers and nothing else.
        inspiral_outs_dict = {}
        regex_match = re.compile('JOB([0-9]+)\Z')
        for file in inspiral_outs:
            matches = [regex_match.match(tag) for tag in file.tags]
            # Remove non matching entries
            matches = [i for i in matches if i is not None]
            # Must have one entry
            if len(matches) == 0:
                warn_msg = "I was asked to parallelize over split inspiral "
                warn_msg += "files at the coincidence stage, but at least one "
                warn_msg += "input file does not have a JOB\%d tag indicating "
                warn_msg += "that it was split. Assuming that I do not have "
                warn_msg += "split input files and turning "
                warn_msg += "parallelize_split_input off."
                logging.warn(warn_msg)
                parallelize_split_input = False
                break
            if len(matches) > 1:
                err_msg = "One of my input files has two tags fitting JOB\%d "
                err_msg += "this means I cannot tell which split job this "
                err_msg += "file is from."
                raise ValueError(err_msg)
            # Extract the job ID
            id = int(matches[0].string[3:])
            if not inspiral_outs_dict.has_key(id):
                inspiral_outs_dict[id] = FileList([])
            inspiral_outs_dict[id].append(file)
        else:
            # If I got through all the files I want to sort the dictionaries so
            # that file with key a and index 3 is the same file as key b and
            # index 3 other than the tag is JOBA -> JOBB ... ie. it has used
            # a different part of the template bank.
            sort_lambda = lambda x: (x.ifo_string, x.segment, x.
                                     tagged_description)
            for key in inspiral_outs_dict.keys():
                inspiral_outs_dict[id].sort(key=sort_lambda)
            # These should be in ascending order, so I can assume the existence
            # of a JOB0 tag
            inspiral_outs = inspiral_outs_dict[0]
            for index, file in enumerate(inspiral_outs):
                # Store the index in the file for quicker mapping later
                file.thinca_index = index
    else:
        inspiral_outs_dict = None

    for timeSlideTag in timeSlideTags:
        # Get the time slide file from the inputs
        tisiOutFile = timeSlideFiles.find_output_with_tag(timeSlideTag)
        if not len(tisiOutFile) == 1:
            errMsg = "If you are seeing this, something batshit is going on!"
            if len(tisiOutFile) == 0:
                errMsg = "No time slide files found matching %s." \
                                                                %(timeSlideTag)
            if len(tisiOutFile) > 1:
                errMsg = "More than one time slide files match %s." \
                                                                %(timeSlideTag)
            raise ValueError(errMsg)
        tisiOutFile = tisiOutFile[0]

        # Next we run ligolw_cafe. This is responsible for
        # identifying what times will be used for the ligolw_thinca jobs and
        # what files are needed for each. If doing time sliding there
        # will be some triggers read into multiple jobs
        cacheInspOuts = inspiral_outs.convert_to_lal_cache()
        if workflow.cp.has_option_tags("workflow-coincidence",
                                       "maximum-extent", tags):
            max_extent = float(
                workflow.cp.get_opt_tags("workflow-coincidence",
                                         "maximum-extent", tags))
        else:
            # hard-coded default value for extent of time in a single job
            max_extent = 3600
        logging.debug("Calling into cafe.")
        time_slide_table = lsctables.TimeSlideTable.get_table(\
                ligolw_utils.load_filename(tisiOutFile.storage_path,
                                 gz=tisiOutFile.storage_path.endswith(".gz"),
                                 contenthandler=ContentHandler,
                                 verbose=False))
        time_slide_table.sync_next_id()
        time_slide_dict = time_slide_table.as_dict()

        cafe_seglists, cafe_caches = ligolw_cafe.ligolw_cafe(
            cacheInspOuts,
            time_slide_dict.values(),
            extentlimit=max_extent,
            verbose=False)
        logging.debug("Done with cafe.")

        # Take the combined seglist file
        dqSegFile = segsList.find_output_with_tag(
            'COMBINED_CUMULATIVE_SEGMENTS')
        if not len(dqSegFile) == 1:
            errMsg = "Did not find exactly 1 data quality file."
            print len(dqSegFile), dqSegFile
            raise ValueError(errMsg)
        dqSegFile = dqSegFile[0]

        # Set up llwadd job
        llwadd_tags = [timeSlideTag] + tags
        ligolwadd_job = LigolwAddExecutable(cp,
                                            'llwadd',
                                            ifo=ifoString,
                                            out_dir=output_dir,
                                            tags=llwadd_tags)
        ligolwAddOuts = FileList([])

        # Go global setup at each category
        # This flag will add a clustering job after ligolw_thinca
        if workflow.cp.has_option_tags("workflow-coincidence",
                                       "coincidence-post-cluster",
                                       llwadd_tags):
            coinc_post_cluster = True
        else:
            coinc_post_cluster = False

        # Go global setup at each category
        ligolwthinca_job = {}
        cluster_job = {}
        thinca_tags = {}
        for category in veto_cats:
            logging.debug("Preparing %s %s" % (timeSlideTag, category))
            dqVetoName = 'VETO_CAT%d_CUMULATIVE' % (category)
            # FIXME: Should we resolve this now?
            # FIXME: Here we set the dqVetoName to be compatible with pipedown
            #        For pipedown must put the slide identifier first and
            #        dqVetoName last.
            pipedownDQVetoName = 'CAT_%d_VETO' % (category)
            curr_thinca_job_tags = [timeSlideTag] + tags + [pipedownDQVetoName]
            thinca_tags[category] = curr_thinca_job_tags
            # Set up jobs for ligolw_thinca
            ligolwthinca_job[category] = LigolwSSthincaExecutable(
                cp,
                'thinca',
                ifo=ifoString,
                out_dir=output_dir,
                dqVetoName=dqVetoName,
                tags=curr_thinca_job_tags)
            if coinc_post_cluster:
                cluster_job[category] = SQLInOutExecutable(
                    cp,
                    'pycbccluster',
                    ifo=ifoString,
                    out_dir=output_dir,
                    tags=curr_thinca_job_tags)

        for idx, cafe_cache in enumerate(cafe_caches):
            ligolwAddOuts = FileList([])
            ligolwThincaOuts = FileList([])
            ligolwThincaLikelihoodOuts = FileList([])
            ligolwClusterOuts = FileList([])

            if not len(cafe_cache.objects):
                raise ValueError("One of the cache objects contains no files!")

            # Determine segments to accept coincidences.
            # If cache is not the first or last in the timeseries, check if the
            # two closes caches in the timeseries and see if their extent
            # match. If they match, they're adjacent and use the time where
            # they meet as a bound for accepting coincidences. If they're not
            # adjacent, then there is no bound for accepting coincidences.
            coincStart, coincEnd = None, None
            if idx and (cafe_cache.extent[0]
                        == cafe_caches[idx - 1].extent[1]):
                coincStart = cafe_cache.extent[0]
            if idx + 1 - len(cafe_caches) and \
                        (cafe_cache.extent[1] == cafe_caches[idx+1].extent[0]):
                coincEnd = cafe_cache.extent[1]
            coincSegment = (coincStart, coincEnd)

            # Need to create a list of the File(s) contained in the cache.
            # Assume that if we have partitioned input then if *one* job in the
            # partitioned input is an input then *all* jobs will be.
            if not parallelize_split_input:
                inputTrigFiles = FileList([])
                for object in cafe_cache.objects:
                    inputTrigFiles.append(object.workflow_file)

                llw_files = inputTrigFiles + [dqSegFile] + [tisiOutFile]

                # Now we can create the nodes
                node = ligolwadd_job.create_node(cafe_cache.extent, llw_files)
                ligolwAddFile = node.output_files[0]
                ligolwAddOuts.append(ligolwAddFile)
                workflow.add_node(node)
                for category in veto_cats:
                    node = ligolwthinca_job[category].create_node(\
                                cafe_cache.extent, coincSegment, ligolwAddFile)
                    ligolwThincaOuts += \
                        node.output_files.find_output_without_tag('DIST_STATS')
                    ligolwThincaLikelihoodOuts += \
                           node.output_files.find_output_with_tag('DIST_STATS')
                    workflow.add_node(node)
                    if coinc_post_cluster:
                        node = cluster_job[category].create_node(\
                                       cafe_cache.extent, ligolwThincaOuts[-1])
                        ligolwClusterOuts += node.output_files
                        workflow.add_node(node)
            else:
                for key in inspiral_outs_dict.keys():
                    curr_tags = ["JOB%d" % (key)]
                    curr_list = inspiral_outs_dict[key]
                    inputTrigFiles = FileList([])
                    for object in cafe_cache.objects:
                        inputTrigFiles.append(
                            curr_list[object.workflow_file.thinca_index])

                    llw_files = inputTrigFiles + [dqSegFile] + [tisiOutFile]

                    # Now we can create the nodes
                    node = ligolwadd_job.create_node(cafe_cache.extent,
                                                     llw_files,
                                                     tags=curr_tags)
                    ligolwAddFile = node.output_files[0]
                    ligolwAddOuts.append(ligolwAddFile)
                    workflow.add_node(node)
                    if workflow.cp.has_option_tags(
                            "workflow-coincidence",
                            "coincidence-write-likelihood",
                            curr_thinca_job_tags):
                        write_likelihood = True
                    else:
                        write_likelihood = False
                    for category in veto_cats:
                        node = ligolwthinca_job[category].create_node(\
                             cafe_cache.extent, coincSegment, ligolwAddFile,
                             tags=curr_tags, write_likelihood=write_likelihood)
                        ligolwThincaOuts += \
                               node.output_files.find_output_without_tag(\
                                                                  'DIST_STATS')
                        ligolwThincaLikelihoodOuts += \
                              node.output_files.find_output_with_tag(\
                                                                  'DIST_STATS')
                        workflow.add_node(node)
                        if coinc_post_cluster:
                            node = cluster_job[category].create_node(\
                                       cafe_cache.extent, ligolwThincaOuts[-1])
                            ligolwClusterOuts += node.output_files
                            workflow.add_node(node)

            other_returns = {}
            other_returns['LIGOLW_ADD'] = ligolwAddOuts
            other_returns['DIST_STATS'] = ligolwThincaLikelihoodOuts

            if coinc_post_cluster:
                main_return = ligolwClusterOuts
                other_returns['THINCA'] = ligolwThincaOuts
            else:
                main_return = ligolwThincaOuts

            logging.debug("Done")
            coinc_outs.extend(main_return)
            for key, file_list in other_returns.items():
                if other_outs.has_key(key):
                    other_outs[key].extend(other_returns[key])
                else:
                    other_outs[key] = other_returns[key]
    return coinc_outs, other_outs
Пример #12
0
def setup_postproc_pipedown_workflow(workflow,
                                     trigger_files,
                                     summary_xml_files,
                                     output_dir,
                                     tags=[],
                                     veto_cats=[]):
    """
    This module sets up the post-processing stage in the workflow, using a pipedown
    style set up. This consists of running compute_durations to determine and
    store the analaysis time (foreground and background). It then runs cfar
    jobs to determine the false alarm rate for all triggers (simulations or
    otherwise) in the input database.
    Pipedown expects to take as input (at this stage) a single database
    containing all triggers. This sub-module follows that same idea, so
    len(triggerFiles) must equal 1 (for every DQ category that we will run).

    Parameters
    ----------  
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
    trigger_files : pycbc.workflow.core.FileList
        An FileList containing the combined databases at CAT_1,2,3... that
        will be used to calculate FARs
    summary_xml_files : pycbc.workflow.core.FileList (required)
        A FileList of the output of the analysislogging_utils module.
        For pipedown-style post-processing this should be one file containing
        a segment table holding the single detector analysed times.
    output_dir : path
        The directory in which output files will be stored.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['POSTPROC1'] or
        ['DENTYSNEWPOSTPROC']. This will be used in output names.
    veto_cats : list of integers (default = [], non-empty list required)
        Decide which veto category levels should be used in post-processing.
        For example tell the workflow to only generate results at cumulative
        categories 2, 3 and 4 by supplying [2,3,4] here.

    Returns
    --------
    final_files : pycbc.workflow.core.FileList
        A list of the final SQL databases containing computed FARs.
    """
    if not veto_cats:
        raise ValueError("A non-empty list of veto categories is required.")
    if not len(summary_xml_files) == 1:
        errMsg = "I need exactly one summaryXML file, got %d." \
                                                     %(len(summary_xml_files),)
        raise ValueError(errMsg)

    # Setup needed exe classes
    compute_durations_exe_tag = workflow.cp.get_opt_tags(
        "workflow-postproc", "postproc-computedurations-exe", tags)
    compute_durations_exe = select_generic_executable(
        workflow, compute_durations_exe_tag)
    cfar_exe_tag = workflow.cp.get_opt_tags("workflow-postproc",
                                            "postproc-cfar-exe", tags)
    cfar_exe = select_generic_executable(workflow, cfar_exe_tag)

    comp_durations_outs = FileList([])
    cfar_outs = FileList([])

    for cat in veto_cats:

        veto_tag = 'CUMULATIVE_CAT_%d' % (cat)
        trig_input_files = trigger_files.find_output_with_tag(veto_tag)
        if not len(trig_input_files) == 1:
            err_msg = "Did not find exactly 1 database input file."
            raise ValueError(err_msg)

        curr_tags = tags + [veto_tag]

        # Choose a label for clustering the jobs
        job_label = get_random_label()

        # Start with compute durations
        computeDurationsJob = compute_durations_exe(workflow.cp,
                                                    compute_durations_exe_tag,
                                                    ifo=workflow.ifo_string,
                                                    out_dir=output_dir,
                                                    tags=curr_tags)
        compute_durations_node = computeDurationsJob.create_node(
            workflow.analysis_time, trig_input_files[0], summary_xml_files[0])
        compute_durations_node.add_profile('pegasus', 'label', job_label)
        workflow.add_node(compute_durations_node)

        # Node has only one output file
        compute_durations_out = compute_durations_node.output_files[0]
        comp_durations_outs.append(compute_durations_out)

        # Add the calculate FAR (cfar) job
        cfar_job = cfar_exe(workflow.cp,
                            cfar_exe_tag,
                            ifo=workflow.ifo_string,
                            out_dir=output_dir,
                            tags=curr_tags)
        cfar_node = cfar_job.create_node(workflow.analysis_time,
                                         compute_durations_out)
        cfar_node.add_profile('pegasus', 'label', job_label)
        workflow.add_node(cfar_node)

        # Node has only one output file
        cfar_out = cfar_node.output_files[0]
        cfar_outs.append(cfar_out)

    return cfar_outs
Пример #13
0
    def create_node(self,
                    trig_files=None,
                    segment_dir=None,
                    out_tags=[],
                    tags=[]):
        node = Node(self)

        if not trig_files:
            raise ValueError("%s must be supplied with trigger files" %
                             self.name)

        # Data options
        pad_data = self.cp.get('inspiral', 'pad-data')
        if pad_data is None:
            raise ValueError("The option pad-data is a required option of "
                             "%s. Please check the ini file." % self.name)

        num_trials = int(self.cp.get("trig_combiner", "num-trials"))
        trig_name = self.cp.get('workflow', 'trigger-name')
        if all("COHERENT_NO_INJECTIONS" in t.name for t in trig_files) and \
                self.cp.has_option_tag('inspiral', 'do-short-slides',
                                       'coherent_no_injections'):
            node.add_opt('--short-slides')

        node.add_opt('--grb-name', trig_name)

        node.add_opt('--pad-data', pad_data)
        node.add_opt('--segment-length',
                     self.cp.get('inspiral', 'segment-duration'))
        node.add_opt('--ifo-tag', self.ifos)
        node.add_opt('--user-tag', 'INSPIRAL')

        # Set input / output options
        node.add_input_list_opt('--input-files', trig_files)

        node.add_opt('--segment-dir', segment_dir)
        node.add_opt('--output-dir', self.out_dir)

        out_files = FileList([])
        for out_tag in out_tags:
            out_file = File(self.ifos,
                            'INSPIRAL',
                            trig_files[0].segment,
                            directory=self.out_dir,
                            extension='xml.gz',
                            tags=["GRB%s" % trig_name, out_tag],
                            store_file=self.retain_files)
            out_files.append(out_file)

        for trial in range(1, num_trials + 1):
            out_file = File(self.ifos,
                            'INSPIRAL',
                            trig_files[0].segment,
                            directory=self.out_dir,
                            extension='xml.gz',
                            tags=["GRB%s" % trig_name,
                                  "OFFTRIAL_%d" % trial],
                            store_file=self.retain_files)
            out_files.append(out_file)

        node.add_profile('condor', 'request_cpus', self.num_threads)

        return node, out_files
Пример #14
0
def convert_cachelist_to_filelist(datafindcache_list):
    """
    Take as input a list of glue.lal.Cache objects and return a pycbc FileList
    containing all frames within those caches.

    Parameters
    -----------
    datafindcache_list : list of glue.lal.Cache objects
        The list of cache files to convert.

    Returns
    --------
    datafind_filelist : FileList of frame File objects
        The list of frame files.
    """
    prev_file = None
    prev_name = None
    this_name = None

    datafind_filelist = FileList([])

    for cache in datafindcache_list:
        # sort the cache into time sequential order
        cache.sort()
        curr_ifo = cache.ifo
        for frame in cache:
            # Pegasus doesn't like "localhost" in URLs.
            frame.url = frame.url.replace('file://localhost','file://')

            # Create one File() object for each unique frame file that we
            # get back in the cache.
            if prev_file:
                prev_name = os.path.basename(prev_file.cache_entry.url)
                this_name = os.path.basename(frame.url)

            if (prev_file is None) or (prev_name != this_name):
                currFile = File(curr_ifo, frame.description,
                    frame.segment, file_url=frame.url, use_tmp_subdirs=True)
                datafind_filelist.append(currFile)
                prev_file = currFile

            # Populate the PFNs for the File() we just created
            if frame.url.startswith('file://'):
                currFile.add_pfn(frame.url, site='local')
                if frame.url.startswith(
                    'file:///cvmfs/oasis.opensciencegrid.org/ligo/frames'):
                    # Datafind returned a URL valid on the osg as well
                    # so add the additional PFNs to allow OSG access.
                    currFile.add_pfn(frame.url, site='osg')
                    currFile.add_pfn(frame.url.replace(
                        'file:///cvmfs/oasis.opensciencegrid.org/',
                        'root://xrootd-local.unl.edu/user/'), site='osg')
                    currFile.add_pfn(frame.url.replace(
                        'file:///cvmfs/oasis.opensciencegrid.org/',
                        'gsiftp://red-gridftp.unl.edu/user/'), site='osg')
                    currFile.add_pfn(frame.url.replace(
                        'file:///cvmfs/oasis.opensciencegrid.org/',
                        'gsiftp://ldas-grid.ligo.caltech.edu/hdfs/'), site='osg')
                elif frame.url.startswith(
                    'file:///cvmfs/gwosc.osgstorage.org/'):
                    # Datafind returned a URL valid on the osg as well
                    # so add the additional PFNs to allow OSG access.
                    for s in ['osg', 'orangegrid', 'osgconnect']:
                        currFile.add_pfn(frame.url, site=s)
                        currFile.add_pfn(frame.url, site="{}-scratch".format(s))
            else:
                currFile.add_pfn(frame.url, site='notlocal')

    return datafind_filelist
Пример #15
0
def setup_analysislogging(workflow, segs_list, insps, args, output_dir,
                          program_name="workflow", tags=[]):
    """
    This module sets up the analysis logging xml file that contains the
    following information:

    * Command line arguments that the code was run with
    * Segment list of times marked as SCIENCE
    * Segment list of times marked as SCIENCE and "OK" ie. not CAT_1 vetoed
    * Segment list of times marked as SCIENCE_OK and present on the cluster
    * The times that will be analysed by the matched-filter jobs

    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance.
    segs_list : pycbc.workflow.core.FileList
        A list of Files containing the information needed to generate the
        segments above. For segments generated at run time the associated
        segmentlist is a property of this object.
    insps : pycbc.workflow.core.FileList
        The output files from the matched-filtering module. Used to identify
        what times have been analysed in this workflow.
    output_dir : path
        Directory to output any files to.
    program_name : string (optional, default = "workflow")
        The program name to stick in the process/process_params tables.
    tags : list (optional, default = [])
        If given restrict to considering inspiral and segment files that
        are tagged with all tags in this list.
    """
    logging.info("Entering analysis logging module.")
    make_analysis_dir(output_dir)

    # Construct the summary XML file
    outdoc = ligolw.Document()
    outdoc.appendChild(ligolw.LIGO_LW())

    # Add process and process_params tables
    proc_id = process.register_to_xmldoc(outdoc, program_name,
                                            vars(args) ).process_id

    # Now add the various segment lists to this file
    summ_segs = segmentlist([workflow.analysis_time])
    
    # If tags is given filter by tags
    if tags:
        for tag in tags:
            segs_list = segs_list.find_output_with_tag(tag)
            insps = insps.find_output_with_tag(tag)

    for ifo in workflow.ifos:
        # Lets get the segment lists we need
        seg_ifo_files = segs_list.find_output_with_ifo(ifo)
        # SCIENCE
        sci_seg_file = seg_ifo_files.find_output_with_tag('SCIENCE')
        if len(sci_seg_file) == 1:
            sci_seg_file = sci_seg_file[0]
            sci_segs = sci_seg_file.segmentList
            sci_def_id = segmentdb_utils.add_to_segment_definer(outdoc, proc_id,
                                                   ifo, "CBC_WORKFLOW_SCIENCE", 0)
            segmentdb_utils.add_to_segment(outdoc, proc_id, sci_def_id,
                                                                      sci_segs)
            segmentdb_utils.add_to_segment_summary(outdoc, proc_id, sci_def_id,
                                                         summ_segs, comment='')
        elif sci_seg_file:
            # FIXME: While the segment module is still fractured (#127) this
            #        may not work. Please update when #127 is resolved
            pass
            #err_msg = "Got %d files matching %s and %s. Expected 1 or 0." \
            #          %(len(sci_seg_file), ifo, 'SCIENCE')
            #raise ValueError(err_msg)

        # SCIENCE_OK
        sci_ok_seg_file = seg_ifo_files.find_output_with_tag('SCIENCE_OK')
        if len(sci_ok_seg_file) == 1:
            sci_ok_seg_file = sci_ok_seg_file[0]
            sci_ok_segs = sci_ok_seg_file.segmentList
            sci_ok_def_id = segmentdb_utils.add_to_segment_definer(outdoc,
                                       proc_id, ifo, "CBC_WORKFLOW_SCIENCE_OK", 0)
            segmentdb_utils.add_to_segment(outdoc, proc_id, sci_ok_def_id,
                                                                   sci_ok_segs)
            segmentdb_utils.add_to_segment_summary(outdoc, proc_id,
                                          sci_ok_def_id, summ_segs, comment='')
        elif sci_ok_seg_file:
            # FIXME: While the segment module is still fractured (#127) this
            #        may not work. Please update when #127 is resolved
            pass
            #err_msg = "Got %d files matching %s and %s. Expected 1 or 0." \
            #          %(len(sci_ok_seg_file), ifo, 'SCIENCE_OK')
            #raise ValueError(err_msg)


        # SCIENCE_AVAILABLE
        sci_available_seg_file = seg_ifo_files.find_output_with_tag(\
                                                           'SCIENCE_AVAILABLE')
        if len(sci_available_seg_file) == 1:
            sci_available_seg_file = sci_available_seg_file[0]
            sci_available_segs = sci_available_seg_file.segmentList
            sci_available_def_id = segmentdb_utils.add_to_segment_definer(\
                        outdoc, proc_id, ifo, "CBC_WORKFLOW_SCIENCE_AVAILABLE", 0)
            segmentdb_utils.add_to_segment(outdoc, proc_id,
                                      sci_available_def_id, sci_available_segs)
            segmentdb_utils.add_to_segment_summary(outdoc, proc_id,
                                   sci_available_def_id, summ_segs, comment='')
        elif sci_available_seg_file:
            # FIXME: While the segment module is still fractured (#127) this
            #        may not work. Please update when #127 is resolved
            pass
            #err_msg = "Got %d files matching %s and %s. Expected 1 or 0." \
            #          %(len(sci_available_seg_file), ifo, 'SCIENCE_AVAILABLE')
            #raise ValueError(err_msg)

        # ANALYSABLE - This one needs to come from inspiral outs
        ifo_insps = insps.find_output_with_ifo(ifo)
        analysable_segs = ifo_insps.get_times_covered_by_files()

        analysable_def_id = segmentdb_utils.add_to_segment_definer(outdoc,
                                     proc_id, ifo, "CBC_WORKFLOW_ANALYSABLE", 0)
        segmentdb_utils.add_to_segment(outdoc, proc_id, analysable_def_id,
                                                               analysable_segs)
        segmentdb_utils.add_to_segment_summary(outdoc, proc_id,
                                      analysable_def_id, summ_segs, comment='')

    summ_file = File(workflow.ifos, "WORKFLOW_SUMMARY",
                     workflow.analysis_time, extension=".xml",
                     directory=output_dir)
    summ_file.PFN(summ_file.storage_path, site='local')
    utils.write_filename(outdoc, summ_file.storage_path)

    return FileList([summ_file])
Пример #16
0
def setup_postproc_coh_PTF_workflow(workflow,
                                    trig_files,
                                    trig_cache,
                                    inj_trig_files,
                                    inj_files,
                                    inj_trig_caches,
                                    inj_caches,
                                    config_file,
                                    output_dir,
                                    html_dir,
                                    segment_dir,
                                    ifos,
                                    inj_tags=[],
                                    tags=[]):
    """
    This module sets up the post-processing stage in the workflow, using a
    coh_PTF style set up. This consists of running trig_combiner to find
    coherent triggers, and injfinder to look for injections. It then runs
    a horizon_dist job, trig_cluster to cluster triggers, and injcombiner to
    calculate injection statistics. Finally, efficiency and sbv_plotter jobs
    calculate efficiency and signal based veto statistics and make plots.
    
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the jobs will be added to.
    trig_files : pycbc.workflow.core.FileList
        A FileList containing the combined databases.
   
    Returns
    --------
    
    """
    cp = workflow.cp
    full_segment = trig_files[0].segment
    trig_name = cp.get("workflow", "trigger-name")
    grb_string = "GRB" + trig_name
    num_trials = int(cp.get("trig_combiner", "num-trials"))

    pp_outs = FileList([])
    pp_nodes = []

    # Set up needed exe classes
    trig_combiner_exe = os.path.basename(cp.get("executables",
                                                "trig_combiner"))
    trig_combiner_class = select_generic_executable(workflow, "trig_combiner")

    trig_cluster_exe = os.path.basename(cp.get("executables", "trig_cluster"))
    trig_cluster_class = select_generic_executable(workflow, "trig_cluster")

    sbv_plotter_exe = os.path.basename(cp.get("executables", "sbv_plotter"))
    sbv_plotter_class = select_generic_executable(workflow, "sbv_plotter")

    efficiency_exe = os.path.basename(cp.get("executables", "efficiency"))
    efficiency_class = select_generic_executable(workflow, "efficiency")
    """
    horizon_dist_exe = os.path.basename(cp.get("executables",
                                               "horizon_dist"))
    horizon_dist_class = select_generic_executable(workflow,
                                                   "horizon_dist")
    """
    html_summary_exe = os.path.basename(cp.get("executables", "html_summary"))
    html_summary_class = select_generic_executable(workflow, "html_summary")

    # Set up trig_combiner job
    trig_combiner_out_tags = ["OFFSOURCE", "ONSOURCE", "ALL_TIMES"]
    trig_combiner_jobs = trig_combiner_class(cp,
                                             "trig_combiner",
                                             ifo=ifos,
                                             out_dir=output_dir,
                                             tags=tags)
    trig_combiner_node, trig_combiner_outs = trig_combiner_jobs.create_node(\
            trig_files, segment_dir, out_tags=trig_combiner_out_tags,
            tags=tags)
    pp_nodes.append(trig_combiner_node)
    workflow.add_node(trig_combiner_node)
    pp_outs.extend(trig_combiner_outs)

    # Initialise trig_cluster class
    trig_cluster_outs = FileList([])
    trig_cluster_jobs = trig_cluster_class(cp,
                                           "trig_cluster",
                                           ifo=ifos,
                                           out_dir=output_dir,
                                           tags=tags)

    # Set up injfinder jobs
    if cp.has_section("workflow-injections"):
        injfinder_nodes = []
        injcombiner_parent_nodes = []
        inj_sbv_plotter_parent_nodes = []

        injfinder_exe = os.path.basename(cp.get("executables", "injfinder"))
        injfinder_class = select_generic_executable(workflow, "injfinder")
        injfinder_jobs = injfinder_class(cp,
                                         "injfinder",
                                         ifo=ifos,
                                         out_dir=output_dir,
                                         tags=tags)

        injcombiner_exe = os.path.basename(cp.get("executables",
                                                  "injcombiner"))
        injcombiner_class = select_generic_executable(workflow, "injcombiner")
        injcombiner_jobs = injcombiner_class(cp,
                                             "injcombiner",
                                             ifo=ifos,
                                             out_dir=output_dir,
                                             tags=tags)

        injfinder_outs = FileList([])
        for inj_tag in inj_tags:
            triggers = FileList([file for file in inj_trig_files \
                                 if inj_tag in file.tag_str])
            injections = FileList([file for file in inj_files \
                                   if inj_tag in file.tag_str])
            trig_cache = [file for file in inj_trig_caches \
                          if inj_tag in file.tag_str][0]
            inj_cache = [file for file in inj_caches \
                         if inj_tag in file.tag_str][0]
            injfinder_node, curr_outs = injfinder_jobs.create_node(\
                    triggers, injections, segment_dir, tags=[inj_tag])
            injfinder_nodes.append(injfinder_node)
            pp_nodes.append(injfinder_node)
            workflow.add_node(injfinder_node)
            injfinder_outs.extend(curr_outs)
            if "DETECTION" not in curr_outs[0].tagged_description:
                injcombiner_parent_nodes.append(injfinder_node)
            else:
                inj_sbv_plotter_parent_nodes.append(injfinder_node)

        pp_outs.extend(injfinder_outs)

        # Make injfinder output cache
        fm_cache = File(ifos,
                        "foundmissed",
                        full_segment,
                        extension="lcf",
                        directory=output_dir)
        fm_cache.PFN(fm_cache.cache_entry.path, site="local")
        injfinder_outs.convert_to_lal_cache().tofile(\
                open(fm_cache.storage_path, "w"))
        pp_outs.extend(FileList([fm_cache]))

        # Set up injcombiner jobs
        injcombiner_outs = FileList([file for file in injfinder_outs \
                                     if "DETECTION" in file.tag_str])
        injcombiner_tags = [inj_tag for inj_tag in inj_tags \
                            if "DETECTION" not in inj_tag]
        injcombiner_out_tags = [injcombiner_outs[0].tag_str.rsplit('_', 1)[0]]
        injcombiner_nodes = []

        for injcombiner_tag in injcombiner_tags:
            max_inc = cp.get_opt_tags("injections", "max-inc",
                                      [injcombiner_tag])
            inj_str = injcombiner_tag[:4]
            inputs = FileList([file for file in injfinder_outs \
                               if injcombiner_tag in file.tagged_description])
            #                   if any(tag in file.tagged_description \
            #                          for tag in injcombiner_tags)])
            injcombiner_node, curr_outs = injcombiner_jobs.create_node(\
                    fm_cache, inputs, inj_str, max_inc, workflow.analysis_time)
            injcombiner_nodes.append(injcombiner_node)
            injcombiner_out_tags.append("%s_FILTERED_%s" % (inj_str, max_inc))
            injcombiner_outs.extend(curr_outs)
            pp_outs.extend(curr_outs)
            pp_nodes.append(injcombiner_node)
            workflow.add_node(injcombiner_node)
            for parent_node in injcombiner_parent_nodes:
                dep = dax.Dependency(parent=parent_node._dax_node,
                                     child=injcombiner_node._dax_node)
                workflow._adag.addDependency(dep)

        # Initialise injection_efficiency class
        inj_efficiency_jobs = efficiency_class(cp,
                                               "inj_efficiency",
                                               ifo=ifos,
                                               out_dir=output_dir,
                                               tags=tags)

    # Initialise sbv_plotter class
    sbv_plotter_outs = FileList([])
    sbv_plotter_jobs = sbv_plotter_class(cp,
                                         "sbv_plotter",
                                         ifo=ifos,
                                         out_dir=output_dir,
                                         tags=tags)

    # Initialise efficiency class
    efficiency_outs = FileList([])
    efficiency_jobs = efficiency_class(cp,
                                       "efficiency",
                                       ifo=ifos,
                                       out_dir=output_dir,
                                       tags=tags)

    # Add trig_cluster jobs and their corresponding plotting jobs
    for out_tag in trig_combiner_out_tags:
        unclust_file = [file for file in trig_combiner_outs \
                        if out_tag in file.tag_str][0]
        trig_cluster_node, curr_outs = trig_cluster_jobs.create_node(\
                unclust_file)
        trig_cluster_outs.extend(curr_outs)
        clust_file = curr_outs[0]
        if out_tag != "ONSOURCE":
            # Add memory requirememnt for jobs with potentially large files
            trig_cluster_node.set_memory(1300)
            pp_nodes.append(trig_cluster_node)
            workflow.add_node(trig_cluster_node)
            dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                                 child=trig_cluster_node._dax_node)
            workflow._adag.addDependency(dep)

            # Add sbv_plotter job
            sbv_out_tags = [out_tag, "_clustered"]
            sbv_plotter_node = sbv_plotter_jobs.create_node(clust_file,
                                                            segment_dir,
                                                            tags=sbv_out_tags)
            pp_nodes.append(sbv_plotter_node)
            workflow.add_node(sbv_plotter_node)
            dep = dax.Dependency(parent=trig_cluster_node._dax_node,
                                 child=sbv_plotter_node._dax_node)
            workflow._adag.addDependency(dep)

            # Add injection sbv_plotter nodes if appropriate
            if out_tag == "OFFSOURCE" and \
                    cp.has_section("workflow-injections"):
                offsource_clustered = clust_file
                off_node = sbv_plotter_node

                found_inj_files = FileList([file for file in injcombiner_outs \
                                            if "FOUND" in file.tag_str])
                for curr_injs in found_inj_files:
                    curr_tags = [tag for tag in injcombiner_out_tags \
                                 if tag in curr_injs.name]
                    curr_tags.append("_clustered")
                    sbv_plotter_node = sbv_plotter_jobs.create_node(
                        clust_file,
                        segment_dir,
                        inj_file=curr_injs,
                        tags=curr_tags)
                    pp_nodes.append(sbv_plotter_node)
                    workflow.add_node(sbv_plotter_node)
                    dep = dax.Dependency(parent=trig_cluster_node._dax_node,
                                         child=sbv_plotter_node._dax_node)
                    workflow._adag.addDependency(dep)
                    if "DETECTION" in curr_injs.tagged_description:
                        for parent_node in inj_sbv_plotter_parent_nodes:
                            dep = dax.Dependency(
                                parent=parent_node._dax_node,
                                child=sbv_plotter_node._dax_node)
                            workflow._adag.addDependency(dep)
                    else:
                        for parent_node in injcombiner_nodes:
                            dep = dax.Dependency(
                                parent=parent_node._dax_node,
                                child=sbv_plotter_node._dax_node)
                            workflow._adag.addDependency(dep)

            # Also add sbv_plotter job for unclustered triggers
            sbv_plotter_node = sbv_plotter_jobs.create_node(
                unclust_file, segment_dir, tags=[out_tag, "_unclustered"])
            sbv_plotter_node.set_memory(1300)
            pp_nodes.append(sbv_plotter_node)
            workflow.add_node(sbv_plotter_node)
            dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                                 child=sbv_plotter_node._dax_node)
            workflow._adag.addDependency(dep)
        else:
            pp_nodes.append(trig_cluster_node)
            workflow.add_node(trig_cluster_node)
            dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                                 child=trig_cluster_node._dax_node)
            workflow._adag.addDependency(dep)

            # Add efficiency job for on/off
            efficiency_node = efficiency_jobs.create_node(clust_file,
                                                          offsource_clustered,
                                                          segment_dir,
                                                          tags=[out_tag])
            pp_nodes.append(efficiency_node)
            workflow.add_node(efficiency_node)
            dep = dax.Dependency(parent=off_node._dax_node,
                                 child=efficiency_node._dax_node)
            workflow._adag.addDependency(dep)

            if cp.has_section("workflow-injections"):
                for tag in injcombiner_out_tags:
                    if "_FILTERED_" in tag:
                        inj_set_tag = [t for t in inj_tags if \
                                       str(tag).replace("_FILTERED_", "") \
                                       in t][0]
                    else:
                        inj_set_tag = str(tag)

                    found_file = [file for file in injcombiner_outs \
                                  if tag + "_FOUND" in file.tag_str][0]
                    missed_file = [file for file in injcombiner_outs \
                                   if tag + "_MISSED" in file.tag_str][0]
                    inj_efficiency_node = inj_efficiency_jobs.create_node(\
                            clust_file, offsource_clustered, segment_dir,
                            found_file, missed_file, tags=[out_tag, tag,
                                                           inj_set_tag])
                    pp_nodes.append(inj_efficiency_node)
                    workflow.add_node(inj_efficiency_node)
                    dep = dax.Dependency(parent=off_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)
                    for injcombiner_node in injcombiner_nodes:
                        dep = dax.Dependency(
                            parent=injcombiner_node._dax_node,
                            child=inj_efficiency_node._dax_node)
                        workflow._adag.addDependency(dep)
                    for injfinder_node in injfinder_nodes:
                        dep = dax.Dependency(
                            parent=injfinder_node._dax_node,
                            child=inj_efficiency_node._dax_node)
                        workflow._adag.addDependency(dep)

    # Add further trig_cluster jobs for trials
    trial = 1

    while trial <= num_trials:
        trial_tag = "OFFTRIAL_%d" % trial
        unclust_file = [file for file in trig_combiner_outs \
                        if trial_tag in file.tag_str][0]
        trig_cluster_node, clust_outs = trig_cluster_jobs.create_node(\
                unclust_file)
        clust_file = clust_outs[0]
        trig_cluster_outs.extend(clust_outs)
        pp_nodes.append(trig_cluster_node)
        workflow.add_node(trig_cluster_node)
        dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                             child=trig_cluster_node._dax_node)
        workflow._adag.addDependency(dep)

        # Add efficiency job
        efficiency_node = efficiency_jobs.create_node(clust_file,
                                                      offsource_clustered,
                                                      segment_dir,
                                                      tags=[trial_tag])
        pp_nodes.append(efficiency_node)
        workflow.add_node(efficiency_node)
        dep = dax.Dependency(parent=off_node._dax_node,
                             child=efficiency_node._dax_node)
        workflow._adag.addDependency(dep)
        dep = dax.Dependency(parent=trig_cluster_node._dax_node,
                             child=efficiency_node._dax_node)
        workflow._adag.addDependency(dep)

        # Adding inj_efficiency job
        if cp.has_section("workflow-injections"):
            for tag in injcombiner_out_tags:
                if "_FILTERED_" in tag:
                    inj_set_tag = [t for t in inj_tags if \
                                   str(tag).replace("_FILTERED_", "") in t][0]
                else:
                    inj_set_tag = str(tag)

                found_file = [file for file in injcombiner_outs \
                              if tag + "_FOUND" in file.tag_str][0]
                missed_file = [file for file in injcombiner_outs \
                               if tag + "_MISSED" in file.tag_str][0]
                inj_efficiency_node = inj_efficiency_jobs.create_node(\
                        clust_file, offsource_clustered, segment_dir,
                        found_file, missed_file, tags=[trial_tag, tag,
                                                       inj_set_tag])
                pp_nodes.append(inj_efficiency_node)
                workflow.add_node(inj_efficiency_node)
                dep = dax.Dependency(parent=off_node._dax_node,
                                     child=inj_efficiency_node._dax_node)
                workflow._adag.addDependency(dep)
                for injcombiner_node in injcombiner_nodes:
                    dep = dax.Dependency(parent=injcombiner_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)
                for injfinder_node in injfinder_nodes:
                    dep = dax.Dependency(parent=injfinder_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)

        trial += 1

    # Initialise html_summary class and set up job
    #FIXME: We may want this job to run even if some jobs fail
    html_summary_jobs = html_summary_class(cp,
                                           "html_summary",
                                           ifo=ifos,
                                           out_dir=output_dir,
                                           tags=tags)
    if cp.has_section("workflow-injections"):
        tuning_tags = [inj_tag for inj_tag in injcombiner_out_tags \
                       if "DETECTION" in inj_tag]
        exclusion_tags = [inj_tag for inj_tag in injcombiner_out_tags \
                          if "DETECTION" not in inj_tag]
        html_summary_node = html_summary_jobs.create_node(
            c_file=config_file,
            tuning_tags=tuning_tags,
            exclusion_tags=exclusion_tags,
            html_dir=html_dir)
    else:
        html_summary_node = html_summary_jobs.create_node(c_file=config_file,
                                                          html_dir=html_dir)
    workflow.add_node(html_summary_node)
    for pp_node in pp_nodes:
        dep = dax.Dependency(parent=pp_node._dax_node,
                             child=html_summary_node._dax_node)
        workflow._adag.addDependency(dep)

    # Make the open box shell script
    open_box_cmd = html_summary_node.executable.get_pfn() + " "
    open_box_cmd += ' '.join(html_summary_node._args + \
                             html_summary_node._options)
    open_box_cmd += " --open-box"
    open_box_path = "%s/open_the_box.sh" % output_dir
    f = open(open_box_path, "w")
    f.write("#!/bin/sh\n%s" % open_box_cmd)
    f.close()
    os.chmod(open_box_path, 0500)

    pp_outs.extend(trig_cluster_outs)

    return pp_outs
Пример #17
0
def setup_timeslides_workflow(workflow, output_dir=None, tags=[],
                              timeSlideSectionName='ligolw_tisi'):
    '''
    Setup generation of time_slide input files in the workflow.
    Currently used
    only with ligolw_tisi to generate files containing the list of slides to be
    performed in each time slide job.

    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
    output_dir : path
        The directory in which output files will be stored.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. This will be used in output names.
    timeSlideSectionName : string (optional, default='injections')
        The string that corresponds to the option describing the exe location
        in the [executables] section of the .ini file and that corresponds to
        the section (and sub-sections) giving the options that will be given to
        the code at run time.
    Returns
    --------
    timeSlideOuts : pycbc.workflow.core.FileList
        The list of time slide files created by this call.
    '''
    logging.info("Entering time slides setup module.")
    make_analysis_dir(output_dir)
    # Get ifo list and full analysis segment for output file naming
    ifoList = workflow.ifos
    ifo_string = workflow.ifo_string
    fullSegment = workflow.analysis_time

    # Identify which time-slides to do by presence of sub-sections in the
    # configuration file
    all_sec = workflow.cp.sections()
    timeSlideSections = [sec for sec in all_sec if sec.startswith('tisi-')]
    timeSlideTags = [(sec.split('-')[-1]).upper() for sec in timeSlideSections]

    timeSlideOuts = FileList([])

    # FIXME: Add ability to specify different exes

    # Make the timeSlideFiles
    for timeSlideTag in timeSlideTags:
        currTags = tags + [timeSlideTag]

        timeSlideMethod = workflow.cp.get_opt_tags("workflow-timeslides",
                                                 "timeslides-method", currTags)

        if timeSlideMethod in ["IN_WORKFLOW", "AT_RUNTIME"]:
            timeSlideExeTag = workflow.cp.get_opt_tags("workflow-timeslides",
                                                    "timeslides-exe", currTags)
            timeSlideExe = select_generic_executable(workflow, timeSlideExeTag)
            timeSlideJob = timeSlideExe(workflow.cp, timeSlideExeTag, ifos=ifo_string,
                                             tags=currTags, out_dir=output_dir)
            timeSlideNode = timeSlideJob.create_node(fullSegment)
            if timeSlideMethod == "AT_RUNTIME":
                workflow.execute_node(timeSlideNode)
            else:
                workflow.add_node(timeSlideNode)
            tisiOutFile = timeSlideNode.output_files[0]
        elif timeSlideMethod == "PREGENERATED":
            timeSlideFilePath = workflow.cp.get_opt_tags("workflow-timeslides",
                                      "timeslides-pregenerated-file", currTags)
            file_url = urlparse.urljoin('file:', urllib.pathname2url(\
                                                  timeSlideFilePath))
            tisiOutFile = File(ifoString, 'PREGEN_TIMESLIDES',
                               fullSegment, file_url, tags=currTags)

        timeSlideOuts.append(tisiOutFile)

    return timeSlideOuts
Пример #18
0
def setup_tmpltbank_dax_generated(workflow,
                                  science_segs,
                                  datafind_outs,
                                  output_dir,
                                  tags=None,
                                  psd_files=None):
    '''
    Setup template bank jobs that are generated as part of the CBC workflow.
    This function will add numerous jobs to the CBC workflow using
    configuration options from the .ini file. The following executables are
    currently supported:

    * lalapps_tmpltbank
    * pycbc_geom_nonspin_bank

    Parameters
    ----------
    workflow: pycbc.workflow.core.Workflow
        An instanced class that manages the constructed workflow.
    science_segs : Keyed dictionary of ligo.segments.segmentlist objects
        scienceSegs[ifo] holds the science segments to be analysed for each
        ifo.
    datafind_outs : pycbc.workflow.core.FileList
        The file list containing the datafind files.
    output_dir : path string
        The directory where data products will be placed.
    tags : list of strings
        If given these tags are used to uniquely name and identify output files
        that would be produced in multiple calls to this function.
    psd_file : pycbc.workflow.core.FileList
        The file list containing predefined PSDs, if provided.

    Returns
    --------
    tmplt_banks : pycbc.workflow.core.FileList
        The FileList holding the details of all the template bank jobs.
    '''
    if tags is None:
        tags = []
    cp = workflow.cp
    # Need to get the exe to figure out what sections are analysed, what is
    # discarded etc. This should *not* be hardcoded, so using a new executable
    # will require a bit of effort here ....

    ifos = science_segs.keys()
    tmplt_bank_exe = os.path.basename(cp.get('executables', 'tmpltbank'))
    # Select the appropriate class
    exe_class = select_tmpltbank_class(tmplt_bank_exe)

    # Set up class for holding the banks
    tmplt_banks = FileList([])

    for ifo in ifos:
        job_instance = exe_class(workflow.cp,
                                 'tmpltbank',
                                 ifo=ifo,
                                 out_dir=output_dir,
                                 tags=tags)
        # Check for the write_psd flag
        if cp.has_option_tags("workflow-tmpltbank", "tmpltbank-write-psd-file",
                              tags):
            job_instance.write_psd = True
        else:
            job_instance.write_psd = False

        sngl_ifo_job_setup(workflow,
                           ifo,
                           tmplt_banks,
                           job_instance,
                           science_segs[ifo],
                           datafind_outs,
                           allow_overlap=True)
    return tmplt_banks
Пример #19
0
def setup_tmpltbank_without_frames(workflow,
                                   output_dir,
                                   tags=None,
                                   independent_ifos=False,
                                   psd_files=None):
    '''
    Setup CBC workflow to use a template bank (or banks) that are generated in
    the workflow, but do not use the data to estimate a PSD, and therefore do
    not vary over the duration of the workflow. This can either generate one
    bank that is valid for all ifos at all times, or multiple banks that are
    valid only for a single ifo at all times (one bank per ifo).

    Parameters
    ----------
    workflow: pycbc.workflow.core.Workflow
        An instanced class that manages the constructed workflow.
    output_dir : path string
        The directory where the template bank outputs will be placed.
    tags : list of strings
        If given these tags are used to uniquely name and identify output files
        that would be produced in multiple calls to this function.
    independent_ifos : Boolean, optional (default=False)
        If given this will produce one template bank per ifo. If not given
        there will be on template bank to cover all ifos.
    psd_file : pycbc.workflow.core.FileList
        The file list containing predefined PSDs, if provided.

    Returns
    --------
    tmplt_banks : pycbc.workflow.core.FileList
        The FileList holding the details of the template bank(s).
    '''
    if tags is None:
        tags = []
    cp = workflow.cp
    # Need to get the exe to figure out what sections are analysed, what is
    # discarded etc. This should *not* be hardcoded, so using a new executable
    # will require a bit of effort here ....

    ifos = workflow.ifos
    fullSegment = workflow.analysis_time

    tmplt_bank_exe = os.path.basename(cp.get('executables', 'tmpltbank'))
    # Can not use lalapps_template bank with this
    if tmplt_bank_exe == 'lalapps_tmpltbank':
        errMsg = "Lalapps_tmpltbank cannot be used to generate template banks "
        errMsg += "without using frames. Try another code."
        raise ValueError(errMsg)

    # Select the appropriate class
    exe_instance = select_tmpltbank_class(tmplt_bank_exe)

    tmplt_banks = FileList([])

    # Make the distinction between one bank for all ifos and one bank per ifo
    if independent_ifos:
        ifoList = [ifo for ifo in ifos]
    else:
        ifoList = [[ifo for ifo in ifos]]

    # Check for the write_psd flag
    if cp.has_option_tags("workflow-tmpltbank", "tmpltbank-write-psd-file",
                          tags):
        exe_instance.write_psd = True
    else:
        exe_instance.write_psd = False

    for ifo in ifoList:
        job_instance = exe_instance(workflow.cp,
                                    'tmpltbank',
                                    ifo=ifo,
                                    out_dir=output_dir,
                                    tags=tags,
                                    psd_files=psd_files)
        node = job_instance.create_nodata_node(fullSegment)
        workflow.add_node(node)
        tmplt_banks += node.output_files

    return tmplt_banks
Пример #20
0
def setup_plotthinca(workflow,
                     input_files,
                     cache_filename,
                     coinc_cachepattern,
                     slide_cachepattern,
                     output_dir,
                     tags=[],
                     **kwargs):
    """
    This function sets up the nodes that will generate summary from a list of
    thinca files.

    Parameters
    -----------
    Workflow : ahope.Workflow
        The ahope workflow instance that the coincidence jobs will be added to.
    input_files : ahope.FileList
        An FileList of files that are used as input at this stage.
    cache_filename : str
        Filename of the ihope cache.
    coinc_cachepattern : str
        The pattern that will be used to find zero-lag coincidence filenames in the cache.
    slide_cachepattern : str
        The pattern that will be used to find time slide filenames in the cache.
    output_dir : path
        The directory in which output files will be stored.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['full_data'].
        This will be used in output names and directories.

    Returns
    --------
    plot_files : ahope.FileList
        A list of the output files from this stage.
    """

    plot_files = FileList([])

    # create executable
    plotthinca_job = Executable(workflow.cp, 'plotthinca', 'vanilla',
                                workflow.ifos, output_dir, tags)

    # get all ifo combinations of at least 2 coincident ifos
    ifo_combos = []
    for n in xrange(len(plotthinca_job.ifo_list) + 1):
        for ifo_list in itertools.combinations(plotthinca_job.ifo_list, n + 2):
            ifo_combos.append(ifo_list)

    for tag in tags:
        for ifo_list in ifo_combos:
            ifo_string = ''.join(ifo_list)

            # create node
            node = Node(plotthinca_job)
            node.add_opt('--gps-start-time', workflow.analysis_time[0])
            node.add_opt('--gps-end-time', workflow.analysis_time[1])
            node.add_opt('--cache-file', cache_filename)
            node.add_opt('--ifo-times', ifo_string)
            node.add_opt('--ifo-tag', 'SECOND_' + ifo_string)
            for ifo in ifo_list:
                node.add_opt('--%s-triggers' % ifo.lower(), '')
            node.add_opt('--user-tag', tag.upper() + '_SUMMARY_PLOTS')
            node.add_opt('--output-path', output_dir)
            node.add_opt('--coinc-pattern', coinc_cachepattern)
            node.add_opt('--slide-pattern', slide_cachepattern)
            node.add_opt('--enable-output')

            # add node to workflow
            workflow.add_node(node)

            # make all input_files parents
            #for f in input_files:
            #    dep = dax.Dependency(parent=f.node._dax_node, child=node._dax_node)
            #    workflow._adag.addDependency(dep)

    return plot_files
Пример #21
0
def setup_matchedfltr_dax_generated(workflow,
                                    science_segs,
                                    datafind_outs,
                                    tmplt_banks,
                                    output_dir,
                                    injection_file=None,
                                    tags=None,
                                    link_to_tmpltbank=False,
                                    compatibility_mode=False):
    '''
    Setup matched-filter jobs that are generated as part of the workflow.
    This
    module can support any matched-filter code that is similar in principle to
    lalapps_inspiral, but for new codes some additions are needed to define
    Executable and Job sub-classes (see jobutils.py).

    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
    science_segs : ifo-keyed dictionary of glue.segments.segmentlist instances
        The list of times that are being analysed in this workflow. 
    datafind_outs : pycbc.workflow.core.FileList
        An FileList of the datafind files that are needed to obtain the
        data used in the analysis.
    tmplt_banks : pycbc.workflow.core.FileList
        An FileList of the template bank files that will serve as input
        in this stage.
    output_dir : path
        The directory in which output will be stored.
    injection_file : pycbc.workflow.core.File, optional (default=None)
        If given the file containing the simulation file to be sent to these
        jobs on the command line. If not given no file will be sent.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['BNSINJECTIONS'] or
        ['NOINJECTIONANALYSIS']. This will be used in output names.
    link_to_tmpltbank : boolean, optional (default=True)
        If this option is given, the job valid_times will be altered so that there
        will be one inspiral file for every template bank and they will cover the
        same time span. Note that this option must also be given during template
        bank generation to be meaningful.
        
    Returns
    -------
    inspiral_outs : pycbc.workflow.core.FileList
        A list of output files written by this stage. This *will not* contain
        any intermediate products produced within this stage of the workflow.
        If you require access to any intermediate products produced at this
        stage you can call the various sub-functions directly.
    '''
    if tags is None:
        tags = []
    # Need to get the exe to figure out what sections are analysed, what is
    # discarded etc. This should *not* be hardcoded, so using a new executable
    # will require a bit of effort here ....

    cp = workflow.cp
    ifos = science_segs.keys()
    match_fltr_exe = os.path.basename(cp.get('executables', 'inspiral'))
    # Select the appropriate class
    exe_class = select_matchedfilter_class(match_fltr_exe)

    if link_to_tmpltbank:
        # Use this to ensure that inspiral and tmpltbank jobs overlap. This
        # means that there will be 1 inspiral job for every 1 tmpltbank and
        # the data read in by both will overlap as much as possible. (If you
        # ask the template bank jobs to use 2000s of data for PSD estimation
        # and the matched-filter jobs to use 4000s, you will end up with
        # twice as many matched-filter jobs that still use 4000s to estimate a
        # PSD but then only generate triggers in the 2000s of data that the
        # template bank jobs ran on.
        tmpltbank_exe = os.path.basename(cp.get('executables', 'tmpltbank'))
        link_exe_instance = select_tmpltbank_class(tmpltbank_exe)
    else:
        link_exe_instance = None

    # Set up class for holding the banks
    inspiral_outs = FileList([])

    # Matched-filtering is done independently for different ifos, but might not be!
    # If we want to use multi-detector matched-filtering or something similar to this
    # it would probably require a new module
    for ifo in ifos:
        logging.info("Setting up matched-filtering for %s." % (ifo))
        job_instance = exe_class(workflow.cp,
                                 'inspiral',
                                 ifo=ifo,
                                 out_dir=output_dir,
                                 injection_file=injection_file,
                                 tags=tags)
        if link_exe_instance:
            link_job_instance = link_exe_instance(cp,
                                                  'tmpltbank',
                                                  ifo=ifo,
                                                  out_dir=output_dir,
                                                  tags=tags)
        else:
            link_job_instance = None

        sngl_ifo_job_setup(workflow,
                           ifo,
                           inspiral_outs,
                           job_instance,
                           science_segs[ifo],
                           datafind_outs,
                           parents=tmplt_banks,
                           allow_overlap=False,
                           link_job_instance=link_job_instance,
                           compatibility_mode=compatibility_mode)
    return inspiral_outs
Пример #22
0
def setup_summary_plots(
        workflow,
        input_files,
        cache_filename,
        tmpltbank_cachepattern,
        inspiral_cachepattern,
        #coinc_cachepattern, slide_cachepattern,
        output_dir,
        tags=[],
        **kwargs):
    """
    This function sets up the summary plots jobs.

    Parameters
    -----------
    Workflow : ahope.Workflow
        The ahope workflow instance that the coincidence jobs will be added to.
    input_files : ahope.FileList
        An FileList of files that are used as input at this stage.
    cache_filename : str
        Filename of the ihope cache.
    tmpltbank_cachepattern : str
        The pattern that will be used to find template_bank filenames in the cache.
    inspiral_cachepattern : str
        The pattern that will be used to find inspiral filenames in the cache.
    coinc_cachepattern : str (currently not implemented)
        The pattern that will be used to find zero-lag coincidence filenames in the cache.
    slide_cachepattern : str (currently not implemented)
        The pattern that will be used to find time slide filenames in the cache.
    output_dir : path
        The directory in which output files will be stored.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['full_data'].
        This will be used in output names and directories.

    Returns
    --------
    plot_files : ahope.FileList
        A list of the output files from this stage.
    """

    plot_files = FileList([])

    # make summary plot dir
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # setup summary plot jobs
    plotinspiral_files = setup_plotinspiral(workflow, input_files,
                                            cache_filename,
                                            inspiral_cachepattern, output_dir,
                                            tags)
    plotinspiralrange_files = setup_plotinspiralrange(workflow, input_files,
                                                      cache_filename,
                                                      tmpltbank_cachepattern,
                                                      inspiral_cachepattern,
                                                      output_dir, tags)
    plotnumtemplates_files = setup_plotnumtemplates(workflow, input_files,
                                                    cache_filename,
                                                    tmpltbank_cachepattern,
                                                    output_dir, tags)
    #plotthinca_files = setup_plotthinca(workflow, input_files, cache_filename,
    #                                        coinc_cachepattern, slide_cachepattern,
    #                                        output_dir, tags)

    # concatenate plot files
    plot_files += plotinspiral_files
    plot_files += plotinspiralrange_files
    plot_files += plotnumtemplates_files
    #plot_files += plotthinca_files

    return plot_files
Пример #23
0
def setup_injection_workflow(workflow,
                             output_dir=None,
                             inj_section_name='injections',
                             exttrig_file=None,
                             tags=None):
    """
    This function is the gateway for setting up injection-generation jobs in a
    workflow. It should be possible for this function to support a number
    of different ways/codes that could be used for doing this, however as this
    will presumably stay as a single call to a single code (which need not be
    inspinj) there are currently no subfunctions in this moudle.

    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
    output_dir : path
        The directory in which injection files will be stored.
    inj_section_name : string (optional, default='injections')
        The string that corresponds to the option describing the exe location
        in the [executables] section of the .ini file and that corresponds to
        the section (and sub-sections) giving the options that will be given to
        the code at run time.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. This will be used in output names.

    Returns
    --------
    inj_files : pycbc.workflow.core.FileList
        The list of injection files created by this call.
    inj_tags : list of strings
        The tag corresponding to each injection file and used to uniquely
        identify them. The FileList class contains functions to search
        based on tags.
    """
    if tags is None:
        tags = []
    logging.info("Entering injection module.")
    make_analysis_dir(output_dir)

    # Get full analysis segment for output file naming
    full_segment = workflow.analysis_time
    ifos = workflow.ifos

    # Identify which injections to do by presence of sub-sections in
    # the configuration file
    inj_tags = []
    inj_files = FileList([])

    for section in workflow.cp.get_subsections(inj_section_name):
        inj_tag = section.upper()
        curr_tags = tags + [inj_tag]

        # Parse for options in ini file
        injection_method = workflow.cp.get_opt_tags("workflow-injections",
                                                    "injections-method",
                                                    curr_tags)

        if injection_method in ["IN_WORKFLOW", "AT_RUNTIME"]:
            # FIXME: Add ability to specify different exes
            inj_job = LalappsInspinjExecutable(workflow.cp,
                                               inj_section_name,
                                               out_dir=output_dir,
                                               ifos='HL',
                                               tags=curr_tags)
            node = inj_job.create_node(full_segment)
            if injection_method == "AT_RUNTIME":
                workflow.execute_node(node)
            else:
                workflow.add_node(node)
            inj_file = node.output_files[0]
            inj_files.append(inj_file)
        elif injection_method == "PREGENERATED":
            file_attrs = {
                'ifos': ['HL'],
                'segs': full_segment,
                'tags': curr_tags
            }
            injection_path = workflow.cp.get_opt_tags(
                "workflow-injections", "injections-pregenerated-file",
                curr_tags)
            curr_file = resolve_url_to_file(injection_path, attrs=file_attrs)
            inj_files.append(curr_file)
        elif injection_method in ["IN_COH_PTF_WORKFLOW", "AT_COH_PTF_RUNTIME"]:
            inj_job = LalappsInspinjExecutable(workflow.cp,
                                               inj_section_name,
                                               out_dir=output_dir,
                                               ifos=ifos,
                                               tags=curr_tags)
            node = inj_job.create_node(full_segment, exttrig_file)
            if injection_method == "AT_COH_PTF_RUNTIME":
                workflow.execute_node(node)
            else:
                workflow.add_node(node)
            inj_file = node.output_files[0]

            if workflow.cp.has_option("workflow-injections", "em-bright-only"):
                em_filter_job = PycbcDarkVsBrightInjectionsExecutable(
                    workflow.cp,
                    'em_bright_filter',
                    tags=curr_tags,
                    out_dir=output_dir,
                    ifos=ifos)
                node = em_filter_job.create_node(inj_file, full_segment,
                                                 curr_tags)
                if injection_method == "AT_COH_PTF_RUNTIME":
                    workflow.execute_node(node)
                else:
                    workflow.add_node(node)
                inj_file = node.output_files[0]

            if workflow.cp.has_option("workflow-injections",
                                      "do-jitter-skyloc"):
                jitter_job = LigolwCBCJitterSkylocExecutable(
                    workflow.cp,
                    'jitter_skyloc',
                    tags=curr_tags,
                    out_dir=output_dir,
                    ifos=ifos)
                node = jitter_job.create_node(inj_file, full_segment,
                                              curr_tags)
                if injection_method == "AT_COH_PTF_RUNTIME":
                    workflow.execute_node(node)
                else:
                    workflow.add_node(node)
                inj_file = node.output_files[0]

            if workflow.cp.has_option("workflow-injections",
                                      "do-align-total-spin"):
                align_job = LigolwCBCAlignTotalSpinExecutable(
                    workflow.cp,
                    'align_total_spin',
                    tags=curr_tags,
                    out_dir=output_dir,
                    ifos=ifos)
                node = align_job.create_node(inj_file, full_segment, curr_tags)

                if injection_method == "AT_COH_PTF_RUNTIME":
                    workflow.execute_node(node)
                else:
                    workflow.add_node(node)
                inj_file = node.output_files[0]

            inj_files.append(inj_file)
        else:
            err = "Injection method must be one of IN_WORKFLOW, "
            err += "AT_RUNTIME or PREGENERATED. Got %s." % (injection_method)
            raise ValueError(err)

        inj_tags.append(inj_tag)

    logging.info("Leaving injection module.")
    return inj_files, inj_tags
Пример #24
0
def setup_hardware_injection_page(workflow,
                                  input_files,
                                  cache_filename,
                                  inspiral_cachepattern,
                                  output_dir,
                                  tags=[],
                                  **kwargs):
    """
    This function sets up the nodes that will create the hardware injection page.

    Parameters
    -----------
    Workflow : ahope.Workflow
        The ahope workflow instance that the coincidence jobs will be added to.
    input_files : ahope.FileList
        An FileList of files that are used as input at this stage.
    cache_filename : str
        Filename of the ihope cache.
    inspiral_cachepattern : str
        The pattern that will be used to find inspiral filenames in the cache.
    output_dir : path
        The directory in which output files will be stored.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['full_data'].
        This will be used to search the cache.

    Returns
    --------
    plot_files : ahope.FileList
        A list of the output files from this stage.
    """

    logging.info("Entering hardware injection page setup.")

    out_files = FileList([])

    # check if hardware injection section exists
    # if not then do not do add hardware injection job to the workflow
    if not workflow.cp.has_section('workflow-hardware-injections'):
        msg = "There is no workflow-hardware-injections section. "
        msg += "The hardware injection page will not be added to the workflow."
        logging.info(msg)
        logging.info("Leaving hardware injection page setup.")
        return out_files

    # make the output dir
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # create executable
    hwinjpage_job = Executable(workflow.cp, 'hardware_injection_page',
                               'vanilla', workflow.ifos, output_dir, tags)

    # retrieve hardware injection file
    hwinjDefUrl = workflow.cp.get_opt_tags('workflow-hardware-injections',
                                           'hwinj-definer-url', tags)
    hwinjDefBaseName = os.path.basename(hwinjDefUrl)
    hwinjDefNewPath = os.path.join(output_dir, hwinjDefBaseName)
    urllib.urlretrieve(hwinjDefUrl, hwinjDefNewPath)

    # update hwinj definer file location
    workflow.cp.set("workflow-hardware-injections", "hwinj-definer-file",
                    hwinjDefNewPath)

    # query for the hardware injection segments
    get_hardware_injection_segment_files(workflow, output_dir, hwinjDefNewPath)

    # create node
    node = Node(hwinjpage_job)
    node.add_opt('--gps-start-time', workflow.analysis_time[0])
    node.add_opt('--gps-end-time', workflow.analysis_time[1])
    node.add_opt('--source-xml', hwinjDefNewPath)
    node.add_opt('--segment-dir', output_dir)
    node.add_opt('--cache-file', cache_filename)
    node.add_opt('--cache-pattern', inspiral_cachepattern)
    node.add_opt('--analyze-injections', '')
    for ifo in workflow.ifos:
        node.add_opt('--%s-injections' % ifo.lower(), '')
    outfile = File(node.executable.ifo_string,
                   'HWINJ_SUMMARY',
                   workflow.analysis_time,
                   extension='html',
                   directory=output_dir)
    node.add_opt('--outfile', outfile.storage_path)

    # add node to workflow
    workflow.add_node(node)

    # make all input_files parents
    #for f in input_files:
    #    dep = dax.Dependency(parent=f.node._dax_node, child=node._dax_node)
    #    workflow._adag.addDependency(dep)

    out_files += node.output_files

    logging.info("Leaving hardware injection page setup.")

    return out_files
Пример #25
0
def setup_interval_coinc_inj(workflow,
                             hdfbank,
                             full_data_trig_files,
                             inj_trig_files,
                             background_file,
                             veto_file,
                             veto_name,
                             out_dir,
                             tags=None):
    """
    This function sets up exact match coincidence and background estimation
    using a folded interval technique.
    """
    if tags is None:
        tags = []
    make_analysis_dir(out_dir)
    logging.info('Setting up coincidence for injection')

    if len(hdfbank) > 1:
        raise ValueError('This coincidence method only supports a '
                         'pregenerated template bank')
    hdfbank = hdfbank[0]

    if len(workflow.ifos) > 2:
        raise ValueError(
            'This coincidence method only supports two ifo searches')

    # Wall time knob and memory knob
    factor = int(
        workflow.cp.get_opt_tags('workflow-coincidence',
                                 'parallelization-factor', tags))

    ffiles = {}
    ifiles = {}
    ifos, files = full_data_trig_files.categorize_by_attr('ifo')
    for ifo, file in zip(ifos, files):
        ffiles[ifo] = file[0]
    ifos, files = inj_trig_files.categorize_by_attr('ifo')
    for ifo, file in zip(ifos, files):
        ifiles[ifo] = file[0]
    ifo0, ifo1 = ifos[0], ifos[1]
    combo = [
        (FileList([ifiles[ifo0], ifiles[ifo1]]), "injinj"),
        (FileList([ifiles[ifo0], ffiles[ifo1]]), "injfull"),
        (FileList([ifiles[ifo1], ffiles[ifo0]]), "fullinj"),
    ]
    bg_files = {'injinj': [], 'injfull': [], 'fullinj': []}

    for trig_files, ctag in combo:
        findcoinc_exe = PyCBCFindCoincExecutable(workflow.cp,
                                                 'coinc',
                                                 ifos=workflow.ifos,
                                                 tags=tags + [ctag],
                                                 out_dir=out_dir)
        for i in range(factor):
            group_str = '%s/%s' % (i, factor)
            coinc_node = findcoinc_exe.create_node(trig_files,
                                                   hdfbank,
                                                   veto_file,
                                                   veto_name,
                                                   group_str,
                                                   tags=([str(i)]))
            bg_files[ctag] += coinc_node.output_files
            workflow.add_node(coinc_node)

    return setup_statmap_inj(workflow,
                             bg_files,
                             background_file,
                             hdfbank,
                             out_dir,
                             tags=tags)
Пример #26
0
def setup_plotnumtemplates(workflow,
                           input_files,
                           cache_filename,
                           tmpltbank_cachepattern,
                           output_dir,
                           tags=[],
                           **kwargs):
    """
    This function sets up the nodes that will generate a plot of the number
    of templates against time.

    Parameters
    -----------
    Workflow : ahope.Workflow
        The ahope workflow instance that the coincidence jobs will be added to.
    input_files : ahope.FileList
        An FileList of files that are used as input at this stage.
    cache_filename : str
        Filename of the ihope cache.
    tmpltbank_cachepattern : str
        The pattern that will be used to find template_bank filenames in the cache.
    output_dir : path
        The directory in which output files will be stored.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['full_data'].
        This will be used in output names and directories.

    Returns
    --------
    plot_files : ahope.FileList
        A list of the output files from this stage.
    """

    plot_files = FileList([])

    # create executable
    plotnumtemplates_job = Executable(workflow.cp, 'plotnumtemplates',
                                      'vanilla', workflow.ifos, output_dir,
                                      tags)

    for tag in tags:
        # create node
        node = Node(plotnumtemplates_job)
        node.add_opt('--gps-start-time', workflow.analysis_time[0])
        node.add_opt('--gps-end-time', workflow.analysis_time[1])
        node.add_opt('--cache-file', cache_filename)
        node.add_opt('--ifo-times', node.executable.ifo_string)
        node.add_opt('--user-tag', tag.upper() + '_SUMMARY_PLOTS')
        node.add_opt('--output-path', output_dir)
        node.add_opt('--bank-pattern', tmpltbank_cachepattern)
        node.add_opt('--enable-output')

        # add node to workflow
        workflow.add_node(node)

        # make all input_files parents
        #for f in input_files:
        #    dep = dax.Dependency(parent=f.node._dax_node, child=node._dax_node)
        #    workflow._adag.addDependency(dep)

    return plot_files
Пример #27
0
def setup_datafind_workflow(workflow, scienceSegs, outputDir, seg_file=None,
                            tags=None):
    """
    Setup datafind section of the workflow. This section is responsible for
    generating, or setting up the workflow to generate, a list of files that
    record the location of the frame files needed to perform the analysis.
    There could be multiple options here, the datafind jobs could be done at
    run time or could be put into a dag. The subsequent jobs will know
    what was done here from the OutFileList containing the datafind jobs
    (and the Dagman nodes if appropriate.
    For now the only implemented option is to generate the datafind files at
    runtime. This module can also check if the frameFiles actually exist, check
    whether the obtained segments line up with the original ones and update the
    science segments to reflect missing data files.

    Parameters
    ----------
    workflow: pycbc.workflow.core.Workflow
        The workflow class that stores the jobs that will be run.
    scienceSegs : Dictionary of ifo keyed glue.segment.segmentlist instances
        This contains the times that the workflow is expected to analyse.
    outputDir : path
        All output files written by datafind processes will be written to this
        directory.
    seg_file : SegFile, optional (default=None)
        The file returned by get_science_segments containing the science
        segments and the associated segment_summary. This will
        be used for the segment_summary test and is required if, and only if,
        performing that test.
    tags : list of string, optional (default=None)
        Use this to specify tags. This can be used if this module is being
        called more than once to give call specific configuration (by setting
        options in [workflow-datafind-${TAG}] rather than [workflow-datafind]).
        This is also used to tag the Files returned by the class to uniqueify
        the Files and uniqueify the actual filename.
        FIXME: Filenames may not be unique with current codes!

    Returns
    --------
    datafindOuts : OutGroupList
        List of all the datafind output files for use later in the pipeline.
    sci_avlble_file : SegFile
        SegFile containing the analysable time after checks in the datafind
        module are applied to the input segment list. For production runs this
        is expected to be equal to the input segment list.
    scienceSegs : Dictionary of ifo keyed glue.segment.segmentlist instances
        This contains the times that the workflow is expected to analyse. If
        the updateSegmentTimes kwarg is given this will be updated to reflect
        any instances of missing data.
    sci_avlble_name : string
        The name with which the analysable time is stored in the
        sci_avlble_file.
    """
    if tags is None:
        tags = []
    logging.info("Entering datafind module")
    make_analysis_dir(outputDir)
    cp = workflow.cp

    # Parse for options in ini file
    datafindMethod = cp.get_opt_tags("workflow-datafind",
                                     "datafind-method", tags)

    if cp.has_option_tags("workflow-datafind",
                          "datafind-check-segment-gaps", tags):
        checkSegmentGaps = cp.get_opt_tags("workflow-datafind",
                                          "datafind-check-segment-gaps", tags)
    else:
        checkSegmentGaps = "no_test"
    if cp.has_option_tags("workflow-datafind",
                          "datafind-check-frames-exist", tags):
        checkFramesExist = cp.get_opt_tags("workflow-datafind",
                                          "datafind-check-frames-exist", tags)
    else:
        checkFramesExist = "no_test"
    if cp.has_option_tags("workflow-datafind",
                          "datafind-check-segment-summary", tags):
        checkSegmentSummary = cp.get_opt_tags("workflow-datafind",
                                       "datafind-check-segment-summary", tags)
    else:
        checkSegmentSummary = "no_test"

    logging.info("Starting datafind with setup_datafind_runtime_generated")
    if datafindMethod == "AT_RUNTIME_MULTIPLE_CACHES":
        datafindcaches, datafindouts = \
            setup_datafind_runtime_cache_multi_calls_perifo(cp, scienceSegs,
                                                          outputDir, tags=tags)
    elif datafindMethod == "AT_RUNTIME_SINGLE_CACHES":
        datafindcaches, datafindouts = \
            setup_datafind_runtime_cache_single_call_perifo(cp, scienceSegs,
                                                          outputDir, tags=tags)
    elif datafindMethod == "AT_RUNTIME_MULTIPLE_FRAMES":
        datafindcaches, datafindouts = \
            setup_datafind_runtime_frames_multi_calls_perifo(cp, scienceSegs,
                                                          outputDir, tags=tags)
    elif datafindMethod == "AT_RUNTIME_SINGLE_FRAMES":
        datafindcaches, datafindouts = \
            setup_datafind_runtime_frames_single_call_perifo(cp, scienceSegs,
                                                          outputDir, tags=tags)

    elif datafindMethod == "FROM_PREGENERATED_LCF_FILES":
        ifos = scienceSegs.keys()
        datafindcaches, datafindouts = \
            setup_datafind_from_pregenerated_lcf_files(cp, ifos,
                                                       outputDir, tags=tags)
    else:
        msg = "Entry datafind-method in [workflow-datafind] does not have "
        msg += "expected value. Valid values are "
        msg += "AT_RUNTIME_MULTIPLE_FRAMES, AT_RUNTIME_SINGLE_FRAMES "
        msg += "AT_RUNTIME_MULTIPLE_CACHES or AT_RUNTIME_SINGLE_CACHES. "
        msg += "Consult the documentation for more info."
        raise ValueError(msg)

    using_backup_server = False
    if datafindMethod == "AT_RUNTIME_MULTIPLE_FRAMES" or \
                                  datafindMethod == "AT_RUNTIME_SINGLE_FRAMES":
        if cp.has_option_tags("workflow-datafind",
                          "datafind-backup-datafind-server", tags):
            using_backup_server = True
            backup_server = cp.get_opt_tags("workflow-datafind",
                                      "datafind-backup-datafind-server", tags)
            cp_new = copy.deepcopy(cp)
            cp_new.set("workflow-datafind",
                                "datafind-ligo-datafind-server", backup_server)
            cp_new.set('datafind', 'urltype', 'gsiftp')
            backup_datafindcaches, backup_datafindouts =\
                setup_datafind_runtime_frames_single_call_perifo(cp_new,
                                             scienceSegs, outputDir, tags=tags)
            backup_datafindouts = datafind_keep_unique_backups(\
                                             backup_datafindouts, datafindouts)
            datafindcaches.extend(backup_datafindcaches)
            datafindouts.extend(backup_datafindouts)

    logging.info("setup_datafind_runtime_generated completed")
    # If we don't have frame files covering all times we can update the science
    # segments.
    if checkSegmentGaps in ['warn','update_times','raise_error']:
        logging.info("Checking science segments against datafind output....")
        newScienceSegs = get_science_segs_from_datafind_outs(datafindcaches)
        logging.info("New segments calculated from data find output.....")
        missingData = False
        for ifo in scienceSegs.keys():
            # If no science segments in input then do nothing
            if not scienceSegs[ifo]:
                msg = "No science segments are present for ifo %s, " %(ifo)
                msg += "the segment metadata indicates there is no analyzable"
                msg += " strain data between the selected GPS start and end "
                msg += "times."
                logging.warning(msg)
                continue
            if not newScienceSegs.has_key(ifo):
                msg = "No data frames were found corresponding to the science "
                msg += "segments for ifo %s" %(ifo)
                logging.error(msg)
                missingData = True
                if checkSegmentGaps == 'update_times':
                    scienceSegs[ifo] = segments.segmentlist()
                continue
            missing = scienceSegs[ifo] - newScienceSegs[ifo]
            if abs(missing):
                msg = "From ifo %s we are missing frames covering:" %(ifo)
                msg += "\n%s" % "\n".join(map(str, missing))
                missingData = True
                logging.error(msg)
                if checkSegmentGaps == 'update_times':
                    # Remove missing time, so that we can carry on if desired
                    logging.info("Updating science segments for ifo %s."
                                 %(ifo))
                    scienceSegs[ifo] = scienceSegs[ifo] - missing

        if checkSegmentGaps == 'raise_error' and missingData:
            raise ValueError("Workflow cannot find needed data, exiting.")
        logging.info("Done checking, any discrepancies are reported above.")
    elif checkSegmentGaps == 'no_test':
        pass
    else:
        errMsg = "checkSegmentGaps kwarg must take a value from 'no_test', "
        errMsg += "'warn', 'update_times' or 'raise_error'."
        raise ValueError(errMsg)

    # Do all of the frame files that were returned actually exist?
    if checkFramesExist in ['warn','update_times','raise_error']:
        logging.info("Verifying that all frames exist on disk.")
        missingFrSegs, missingFrames = \
                          get_missing_segs_from_frame_file_cache(datafindcaches)
        missingFlag = False
        for ifo in missingFrames.keys():
            # If no data in the input then do nothing
            if not scienceSegs[ifo]:
                continue
            # If using a backup server, does the frame exist remotely?
            if using_backup_server:
                # WARNING: This will be slow, but hopefully it will not occur
                #          for too many frames. This could be optimized if
                #          it becomes necessary.
                new_list = []
                for frame in missingFrames[ifo]:
                    for dfout in datafindouts:
                        dfout_pfns = list(dfout.pfns)
                        dfout_urls = [a.url for a in dfout_pfns]
                        if frame.url in dfout_urls:
                            pfn = dfout_pfns[dfout_urls.index(frame.url)]
                            dfout.removePFN(pfn)
                            if len(dfout.pfns) == 0:
                                new_list.append(frame)
                            else:
                                msg = "Frame %s not found locally. "\
                                                                  %(frame.url,)
                                msg += "Replacing with remote url(s) %s." \
                                           %(str([a.url for a in dfout.pfns]),)
                                logging.info(msg)
                            break
                    else:
                        new_list.append(frame)
                missingFrames[ifo] = new_list
            if missingFrames[ifo]:
                msg = "From ifo %s we are missing the following frames:" %(ifo)
                msg +='\n'.join([a.url for a in missingFrames[ifo]])
                missingFlag = True
                logging.error(msg)
            if checkFramesExist == 'update_times':
                # Remove missing times, so that we can carry on if desired
                logging.info("Updating science times for ifo %s." %(ifo))
                scienceSegs[ifo] = scienceSegs[ifo] - missingFrSegs[ifo]

        if checkFramesExist == 'raise_error' and missingFlag:
            raise ValueError("Workflow cannot find all frames, exiting.")
        logging.info("Finished checking frames.")
    elif checkFramesExist == 'no_test':
        pass
    else:
        errMsg = "checkFramesExist kwarg must take a value from 'no_test', "
        errMsg += "'warn', 'update_times' or 'raise_error'."
        raise ValueError(errMsg)

    # Check if there are cases where frames exist, but no entry in the segment
    # summary table are present.
    if checkSegmentSummary in ['warn', 'raise_error']:
        logging.info("Checking the segment summary table against frames.")
        dfScienceSegs = get_science_segs_from_datafind_outs(datafindcaches)
        missingFlag = False
        # NOTE: Should this be overrideable in the config file?
        sci_seg_name = "SCIENCE"
        if seg_file is None:
            err_msg = "You must provide the science segments SegFile object "
            err_msg += "if using the datafind-check-segment-summary option."
            raise ValueError(err_msg)
        if seg_file.seg_summ_dict is None:
            err_msg = "The provided science segments SegFile object must "
            err_msg += "contain a valid segment_summary table if using the "
            err_msg += "datafind-check-segment-summary option."
            raise ValueError(err_msg)
        seg_summary_times = seg_file.seg_summ_dict
        for ifo in dfScienceSegs.keys():
            curr_seg_summ_times = seg_summary_times[ifo + ":" + sci_seg_name]
            missing = (dfScienceSegs[ifo] & seg_file.valid_segments)
            missing.coalesce()
            missing = missing - curr_seg_summ_times
            missing.coalesce()
            scienceButNotFrame = scienceSegs[ifo] - dfScienceSegs[ifo]
            scienceButNotFrame.coalesce()
            missing2 = scienceSegs[ifo] - scienceButNotFrame
            missing2.coalesce()
            missing2 = missing2 - curr_seg_summ_times
            missing2.coalesce()
            if abs(missing):
                msg = "From ifo %s the following times have frames, " %(ifo)
                msg += "but are not covered in the segment summary table."
                msg += "\n%s" % "\n".join(map(str, missing))
                logging.error(msg)
                missingFlag = True
            if abs(missing2):
                msg = "From ifo %s the following times have frames, " %(ifo)
                msg += "are science, and are not covered in the segment "
                msg += "summary table."
                msg += "\n%s" % "\n".join(map(str, missing2))
                logging.error(msg)
                missingFlag = True
        if checkSegmentSummary == 'raise_error' and missingFlag:
            errMsg = "Segment_summary discrepancy detected, exiting."
            raise ValueError(errMsg)
    elif checkSegmentSummary == 'no_test':
        pass
    else:
        errMsg = "checkSegmentSummary kwarg must take a value from 'no_test', "
        errMsg += "'warn', or 'raise_error'."
        raise ValueError(errMsg)

    # Now need to create the file for SCIENCE_AVAILABLE
    sci_avlble_dict = segments.segmentlistdict()
    # NOTE: Should this be overrideable in the config file?
    sci_avlble_name = "SCIENCE_AVAILABLE"
    for ifo in scienceSegs.keys():
        sci_avlble_dict[ifo + ':' + sci_avlble_name] = scienceSegs[ifo]

    sci_avlble_file = SegFile.from_segment_list_dict('SCIENCE_AVAILABLE',
                            sci_avlble_dict, ifo_list = scienceSegs.keys(),
                            valid_segment=workflow.analysis_time,
                            extension='.xml', tags=tags, directory=outputDir)

    logging.info("Leaving datafind module")
    return FileList(datafindouts), sci_avlble_file, scienceSegs, sci_avlble_name
Пример #28
0
    def create_node(self, trig_files=None, segment_dir=None, analysis_seg=None,
                    slide_tag=None, out_tags=None, tags=None):
        import Pegasus.DAX3 as dax
        if out_tags is None:
            out_tags = []
        if tags is None:
            tags = []
        node = Node(self)

        if not trig_files:
            raise ValueError("%s must be supplied with trigger files"
                              % self.name)

        # Data options
        num_trials = int(self.cp.get("trig_combiner", "num-trials"))
        trig_name = self.cp.get('workflow', 'trigger-name')
        if all("COHERENT_NO_INJECTIONS" in t.name for t in trig_files) and \
                self.cp.has_option_tag('inspiral', 'do-short-slides',
                                       'coherent_no_injections'):
            node.add_opt('--short-slides')
        
        node.add_opt('--grb-name', trig_name)
        
        node.add_opt('--trig-start-time', analysis_seg[0])
        node.add_opt('--ifo-tag', self.ifos)
        node.add_opt('--user-tag', 'INSPIRAL')
        if tags:
            node.add_opt('--job-tag', '_'.join(tags))

        if slide_tag is not None:
            node.add_opt('--slide-tag', slide_tag)
            node.add_opt('--long-slides')
            tag_start=["TIMESLIDES_GRB%s_%s" % (trig_name, slide_tag)]+tags
        else:
            tag_start=["GRB%s" % trig_name]+tags

        # Set input / output options
        if all(hasattr(t.node, "executable") for t in trig_files):
            if all(t.node.executable.name == "trig_cluster"
                   for t in trig_files):
                node.add_opt('--input-files',
                             " ".join([t.storage_path for t in trig_files]))
                if self.cp.has_option_tag('inspiral', 'do-short-slides',
                                          'coherent_no_injections'):
                    node.add_opt('--short-slides')
            else:
                node.add_input_list_opt('--input-files', trig_files)
        else:
            node.add_opt('--input-files',
                         " ".join([t.storage_path for t in trig_files]))

        node.add_opt('--segment-dir', segment_dir)
        node.add_opt('--output-dir', self.out_dir)

        out_files = FileList([])
        for out_tag in out_tags:
            out_file = File(self.ifos, 'INSPIRAL', trig_files[0].segment,
                            directory=self.out_dir, extension='xml.gz',
                            tags=tag_start+[out_tag],
                            store_file=self.retain_files)
            out_files.append(out_file)
            #node._dax_node.uses(out_file, link=dax.Link.OUTPUT, register=False,
            #                    transfer=False)
            #node._outputs += [out_file]
            #out_file.node = node
            #node._add_output(out_file)

        for trial in range(1, num_trials + 1):
            out_file = File(self.ifos, 'INSPIRAL', trig_files[0].segment,
                            directory=self.out_dir, extension='xml.gz',
                            tags=tag_start+["OFFTRIAL_%d" % trial],
                            store_file=self.retain_files)
            out_files.append(out_file)
            #node._dax_node.uses(out_file, link=dax.Link.OUTPUT, register=False,
            #                    transfer=False)
            #node._outputs += [out_file]
            #out_file.node = node
            #node._add_output(out_file)

        node.add_profile('condor', 'request_cpus', self.num_threads)

        return node, out_files
Пример #29
0
def make_single_template_plots(workflow, segs, data_read_name, analyzed_name,
                                  params, out_dir, inj_file=None, exclude=None,
                                  require=None, tags=None, params_str=None,
                                  use_exact_inj_params=False):
    """Function for creating jobs to run the pycbc_single_template code and
    to run the associated plotting code pycbc_single_template_plots and add
    these jobs to the workflow.

    Parameters
    -----------
    workflow : workflow.Workflow instance
        The pycbc.workflow.Workflow instance to add these jobs to.
    segs : workflow.File instance
        The pycbc.workflow.File instance that points to the XML file containing
        the segment lists of data read in and data analyzed.
    data_read_name : str
        The name of the segmentlist containing the data read in by each
        inspiral job in the segs file.
    analyzed_name : str
        The name of the segmentlist containing the data analyzed by each
        inspiral job in the segs file.
    params : dictionary
        A dictionary containing the parameters of the template to be used.
        params[ifo+'end_time'] is required for all ifos in workflow.ifos.
        If use_exact_inj_params is False then also need to supply values for
        [mass1, mass2, spin1z, spin2x]. For precessing templates one also
        needs to supply [spin1y, spin1x, spin2x, spin2y, inclination]
        additionally for precession one must supply u_vals or
        u_vals_+ifo for all ifos. u_vals is the ratio between h_+ and h_x to
        use when constructing h(t). h(t) = (h_+ * u_vals) + h_x.
    out_dir : str
        Directory in which to store the output files.
    inj_file : workflow.File (optional, default=None)
        If given send this injection file to the job so that injections are
        made into the data.
    exclude : list (optional, default=None)
        If given, then when considering which subsections in the ini file to
        parse for options to add to single_template_plot, only use subsections
        that *do not* match strings in this list.
    require : list (optional, default=None)
        If given, then when considering which subsections in the ini file to
        parse for options to add to single_template_plot, only use subsections
        matching strings in this list.
    tags : list (optional, default=None)
        Add this list of tags to all jobs.
    params_str : str (optional, default=None)
        If given add this string to plot title and caption to describe the
        template that was used.
    use_exact_inj_params : boolean (optional, default=False)
        If True do not use masses and spins listed in the params dictionary
        but instead use the injection closest to the filter time as a template.

    Returns
    --------
    output_files : workflow.FileList
        The list of workflow.Files created in this function.
    """
    tags = [] if tags is None else tags
    makedir(out_dir)
    name = 'single_template_plot'
    secs = requirestr(workflow.cp.get_subsections(name), require)
    secs = excludestr(secs, exclude)
    files = FileList([])
    for tag in secs:
        for ifo in workflow.ifos:
            if params['%s_end_time' % ifo] == -1.0:
                continue
            # Reanalyze the time around the trigger in each detector
            node = SingleTemplateExecutable(workflow.cp, 'single_template',
                                            ifos=[ifo], out_dir=out_dir,
                                            tags=[tag] + tags).create_node()
            if use_exact_inj_params:
                node.add_opt('--use-params-of-closest-injection')
            else:
                node.add_opt('--mass1', "%.6f" % params['mass1'])
                node.add_opt('--mass2', "%.6f" % params['mass2'])
                node.add_opt('--spin1z',"%.6f" % params['spin1z'])
                node.add_opt('--spin2z',"%.6f" % params['spin2z'])
                node.add_opt('--template-start-frequency',
                             "%.6f" % params['f_lower'])
                # Is this precessing?
                if 'u_vals' in params or 'u_vals_%s' % ifo in params:
                    node.add_opt('--spin1x',"%.6f" % params['spin1x'])
                    node.add_opt('--spin1y',"%.6f" % params['spin1y'])
                    node.add_opt('--spin2x',"%.6f" % params['spin2x'])
                    node.add_opt('--spin2y',"%.6f" % params['spin2y'])
                    node.add_opt('--inclination',"%.6f" % params['inclination'])
                    try:
                        node.add_opt('--u-val',"%.6f" % params['u_vals'])
                    except:
                        node.add_opt('--u-val',
                                     "%.6f" % params['u_vals_%s' % ifo])

            # str(numpy.float64) restricts to 2d.p. BE CAREFUL WITH THIS!!!
            str_trig_time = '%.6f' %(params[ifo + '_end_time'])
            node.add_opt('--trigger-time', str_trig_time)
            node.add_input_opt('--inspiral-segments', segs)
            if inj_file is not None:
                node.add_input_opt('--injection-file', inj_file)
            node.add_opt('--data-read-name', data_read_name)
            node.add_opt('--data-analyzed-name', analyzed_name)
            node.new_output_file_opt(workflow.analysis_time, '.hdf',
                                     '--output-file', store_file=False)
            data = node.output_files[0]
            workflow += node
            # Make the plot for this trigger and detector
            node = PlotExecutable(workflow.cp, name, ifos=[ifo],
                              out_dir=out_dir, tags=[tag] + tags).create_node()
            node.add_input_opt('--single-template-file', data)
            node.new_output_file_opt(workflow.analysis_time, '.png',
                                     '--output-file')
            title="'%s SNR and chi^2 timeseries" %(ifo)
            if params_str is not None:
                title+= " using %s" %(params_str)
            title+="'"
            node.add_opt('--plot-title', title)
            caption = "'The SNR and chi^2 timeseries around the injection"
            if params_str is not None:
                caption += " using %s" %(params_str)
            if use_exact_inj_params:
                caption += ". The injection itself was used as the template.'"
            else:
                caption += ". The template used has the following parameters: "
                caption += "mass1=%s, mass2=%s, spin1z=%s, spin2z=%s'"\
                       %(params['mass1'], params['mass2'], params['spin1z'],
                         params['spin2z'])
            node.add_opt('--plot-caption', caption)
            workflow += node
            files += node.output_files
    return files
Пример #30
0
def setup_psd_pregenerated(workflow, tags=None):
    '''
    Setup CBC workflow to use pregenerated psd files.
    The file given in cp.get('workflow','pregenerated-psd-file-(ifo)') will
    be used as the --psd-file argument to geom_nonspinbank, geom_aligned_bank
    and pycbc_plot_psd_file.

    Parameters
    ----------
    workflow: pycbc.workflow.core.Workflow
        An instanced class that manages the constructed workflow.
    tags : list of strings
        If given these tags are used to uniquely name and identify output files
        that would be produced in multiple calls to this function.

    Returns
    --------
    psd_files : pycbc.workflow.core.FileList
        The FileList holding the gating files
    '''
    if tags is None:
        tags = []
    psd_files = FileList([])

    cp = workflow.cp
    global_seg = workflow.analysis_time
    user_tag = "PREGEN_PSD"

    # Check for one psd for all ifos
    try:
        pre_gen_file = cp.get_opt_tags('workflow-psd',
                        'psd-pregenerated-file', tags)
        pre_gen_file = resolve_url(pre_gen_file)
        file_url = urlparse.urljoin('file:',
                                     urllib.pathname2url(pre_gen_file))
        curr_file = File(workflow.ifos, user_tag, global_seg, file_url,
                                                    tags=tags)
        curr_file.PFN(file_url, site='local')
        psd_files.append(curr_file)
    except ConfigParser.Error:
        # Check for one psd per ifo
        for ifo in workflow.ifos:
            try:
                pre_gen_file = cp.get_opt_tags('workflow-psd',
                                'psd-pregenerated-file-%s' % ifo.lower(),
                                tags)
                pre_gen_file = resolve_url(pre_gen_file)
                file_url = urlparse.urljoin('file:',
                                             urllib.pathname2url(pre_gen_file))
                curr_file = File(ifo, user_tag, global_seg, file_url,
                                                            tags=tags)
                curr_file.PFN(file_url, site='local')
                psd_files.append(curr_file)

            except ConfigParser.Error:
                # It's unlikely, but not impossible, that only some ifos
                # will have pregenerated PSDs
                logging.warn("No psd file specified for IFO %s." % (ifo,))
                pass

    return psd_files