Пример #1
0
    def add_node(self, node):
        """ Add a node to this workflow

        This function adds nodes to the workflow. It also determines
        parent/child relations from the DataStorage inputs to this job.

        Parameters
        ----------
        node : pycbc.workflow.pegasus_workflow.Node
            A node that should be executed as part of this workflow.
        """
        node._finalize()
        node.in_workflow = self

        # Record the executable that this node uses
        if not node.executable.in_workflow:
            for exe in self._executables:
                if node.executable.is_same_as(exe):
                    node.executable.in_workflow = True
                    node._dax_node.name = exe.logical_name
                    node.executable.logical_name = exe.logical_name
                    break
            else:
                node.executable.in_workflow = True
                self._executables += [node.executable]

        # Add the node itself
        self._adag.addJob(node._dax_node)

        # Determine the parent child relationships based on the inputs that
        # this node requires.
        added_nodes = []
        for inp in node._inputs:
            if inp.node is not None and inp.node.in_workflow == self:
                if inp.node not in added_nodes:
                    parent = inp.node._dax_node
                    child = node._dax_node
                    dep = dax.Dependency(parent=parent, child=child)
                    self._adag.addDependency(dep)
                    added_nodes.append(inp.node)

            elif inp.node is not None and not inp.node.in_workflow:
                raise ValueError('Parents of this node must be added to the '
                                 'workflow first.')

            elif inp.node is None and not inp.workflow_input:
                self._inputs += [inp]
                inp.workflow_input = True

            elif inp.node is not None and inp.node.in_workflow != self and inp not in self._inputs:
                self._inputs += [inp]
                self._external_workflow_inputs += [inp]

        # Record the outputs that this node generates
        self._outputs += node._outputs

        return self
Пример #2
0
def make_results_web_page(workflow, results_dir, explicit_dependencies=None):
    template_path = 'templates/orange.html'

    out_dir = workflow.cp.get('results_page', 'output-path')
    makedir(out_dir)
    node = PlotExecutable(workflow.cp, 'results_page', ifos=workflow.ifos,
                out_dir=out_dir).create_node()
    node.add_opt('--plots-dir', results_dir)
    node.add_opt('--template-file', template_path)
    workflow += node
    if explicit_dependencies is not None:
        import Pegasus.DAX3 as dax
        for dep in explicit_dependencies:
            dax_dep = dax.Dependency(parent=dep._dax_node,
                                     child=node._dax_node)
            workflow._adag.addDependency(dax_dep)
Пример #3
0
    def _make_root_dependency(self, inp):
        def root_path(v):
            path = [v]
            while v.in_workflow:
                path += [v.in_workflow]
                v = v.in_workflow
            return path

        workflow_root = root_path(self)
        input_root = root_path(inp)
        for step in workflow_root:
            if step in input_root:
                common = step
                break
        dep = dax.Dependency(
            parent=input_root[input_root.index(common) - 1].as_job,
            child=workflow_root[workflow_root.index(common) - 1].as_job)
        common._adag.addDependency(dep)
    def add_node(self, node):
        """ Add a node to this workflow
        
        This function adds nodes to the workflow. It also determines
        parent/child relations from the DataStorage inputs to this job. 
        
        Parameters
        ----------
        node : Node
            A node that should be exectuded as part of this workflow.
        """
        node._finalize()
        node.in_workflow = True
        self._adag.addJob(node._dax_node)

        # Determine the parent child relationships based on the inputs that
        # this node requires.
        for inp in node._inputs:
            if inp.node is not None and inp.node.in_workflow:
                parent = inp.node._dax_node
                child = node._dax_node
                dep = dax.Dependency(parent=parent, child=child)
                self._adag.addDependency(dep)

            elif inp.node is not None and not inp.node.in_workflow:
                raise ValueError('Parents of this node must be added to the '
                                 'workflow first.')

            elif inp.node is None and inp.workflow_input is False:
                self._inputs += [inp]
                inp.workflow_input = True

        # Record the outputs that this node generates
        self._outputs += node._outputs

        # Record the executable that this node uses
        if not node.executable.in_workflow:
            node.executable.in_workflow = True
            self._executables += [node.executable]

        return self
Пример #5
0
def setup_foreground_minifollowups(workflow, coinc_file, single_triggers,
                       tmpltbank_file, insp_segs, insp_data_name,
                       insp_anal_name, dax_output, out_dir, tags=None):
    """ Create plots that followup the Nth loudest coincident injection
    from a statmap produced HDF file.

    Parameters
    ----------
    workflow: pycbc.workflow.Workflow
        The core workflow instance we are populating
    coinc_file:
    single_triggers: list of pycbc.workflow.File
        A list cointaining the file objects associated with the merged
        single detector trigger files for each ifo.
    tmpltbank_file: pycbc.workflow.File
        The file object pointing to the HDF format template bank
    insp_segs: SegFile
       The segment file containing the data read and analyzed by each inspiral
       job.
    insp_data_name: str
        The name of the segmentlist storing data read.
    insp_anal_name: str
        The name of the segmentlist storing data analyzed.
    out_dir: path
        The directory to store minifollowups result plots and files
    tags: {None, optional}
        Tags to add to the minifollowups executables

    Returns
    -------
    layout: list
        A list of tuples which specify the displayed file layout for the
        minifollops plots.
    """
    logging.info('Entering minifollowups module')

    if not workflow.cp.has_section('workflow-minifollowups'):
        logging.info('There is no [workflow-minifollowups] section in configuration file')
        logging.info('Leaving minifollowups')
        return

    tags = [] if tags is None else tags
    makedir(dax_output)

    # turn the config file into a File class
    config_path = os.path.abspath(dax_output + '/' + '_'.join(tags) + 'foreground_minifollowup.ini')
    workflow.cp.write(open(config_path, 'w'))

    config_file = wdax.File(os.path.basename(config_path))
    config_file.PFN(urljoin('file:', pathname2url(config_path)), site='local')

    exe = Executable(workflow.cp, 'foreground_minifollowup',
                     ifos=workflow.ifos, out_dir=dax_output, tags=tags)

    node = exe.create_node()
    node.add_input_opt('--config-files', config_file)
    node.add_input_opt('--bank-file', tmpltbank_file)
    node.add_input_opt('--statmap-file', coinc_file)
    node.add_multiifo_input_list_opt('--single-detector-triggers',
                                     single_triggers)
    node.add_input_opt('--inspiral-segments', insp_segs)
    node.add_opt('--inspiral-data-read-name', insp_data_name)
    node.add_opt('--inspiral-data-analyzed-name', insp_anal_name)
    if tags:
        node.add_list_opt('--tags', tags)
    node.new_output_file_opt(workflow.analysis_time, '.dax', '--output-file')
    node.new_output_file_opt(workflow.analysis_time, '.dax.map', '--output-map')
    node.new_output_file_opt(workflow.analysis_time, '.tc.txt',
                             '--transformation-catalog')

    name = node.output_files[0].name
    map_file = node.output_files[1]
    tc_file = node.output_files[2]

    node.add_opt('--workflow-name', name)
    node.add_opt('--output-dir', out_dir)

    workflow += node

    # execute this in a sub-workflow
    fil = node.output_files[0]

    # determine if a staging site has been specified
    try:
        staging_site = workflow.cp.get('workflow-foreground_minifollowups',
                                       'staging-site')
    except:
        staging_site = None

    job = dax.DAX(fil)
    job.addArguments('--basename %s' % os.path.splitext(os.path.basename(name))[0])
    Workflow.set_job_properties(job, map_file, tc_file, staging_site=staging_site)
    workflow._adag.addJob(job)
    dep = dax.Dependency(parent=node._dax_node, child=job)
    workflow._adag.addDependency(dep)
    logging.info('Leaving minifollowups module')
Пример #6
0
def setup_single_det_minifollowups(workflow, single_trig_file, tmpltbank_file,
                                   insp_segs, insp_data_name, insp_anal_name,
                                   dax_output, out_dir, veto_file=None,
                                   veto_segment_name=None, statfiles=None,
                                   tags=None):
    """ Create plots that followup the Nth loudest clustered single detector
    triggers from a merged single detector trigger HDF file.

    Parameters
    ----------
    workflow: pycbc.workflow.Workflow
        The core workflow instance we are populating
    single_trig_file: pycbc.workflow.File
        The File class holding the single detector triggers.
    tmpltbank_file: pycbc.workflow.File
        The file object pointing to the HDF format template bank
    insp_segs: SegFile
       The segment file containing the data read by each inspiral job.
    insp_data_name: str
        The name of the segmentlist storing data read.
    insp_anal_name: str
        The name of the segmentlist storing data analyzed.
    out_dir: path
        The directory to store minifollowups result plots and files
    statfiles: FileList (optional, default=None)
        Supplementary files necessary for computing the single-detector
        statistic.
    tags: {None, optional}
        Tags to add to the minifollowups executables
    Returns
    -------
    layout: list
        A list of tuples which specify the displayed file layout for the
        minifollops plots.
    """
    logging.info('Entering minifollowups module')

    if not workflow.cp.has_section('workflow-sngl_minifollowups'):
        msg = 'There is no [workflow-sngl_minifollowups] section in '
        msg += 'configuration file'
        logging.info(msg)
        logging.info('Leaving minifollowups')
        return

    tags = [] if tags is None else tags
    makedir(dax_output)

    # turn the config file into a File class
    curr_ifo = single_trig_file.ifo
    config_path = os.path.abspath(dax_output + '/' + curr_ifo + \
                                   '_'.join(tags) + 'singles_minifollowup.ini')
    workflow.cp.write(open(config_path, 'w'))

    config_file = wdax.File(os.path.basename(config_path))
    config_file.PFN(urljoin('file:', pathname2url(config_path)), site='local')

    exe = Executable(workflow.cp, 'singles_minifollowup',
                     ifos=curr_ifo, out_dir=dax_output, tags=tags)

    wikifile = curr_ifo + '_'.join(tags) + 'loudest_table.txt'

    node = exe.create_node()
    node.add_input_opt('--config-files', config_file)
    node.add_input_opt('--bank-file', tmpltbank_file)
    node.add_input_opt('--single-detector-file', single_trig_file)
    node.add_input_opt('--inspiral-segments', insp_segs)
    node.add_opt('--inspiral-data-read-name', insp_data_name)
    node.add_opt('--inspiral-data-analyzed-name', insp_anal_name)
    node.add_opt('--instrument', curr_ifo)
    node.add_opt('--wiki-file', wikifile)
    if veto_file is not None:
        assert(veto_segment_name is not None)
        node.add_input_opt('--veto-file', veto_file)
        node.add_opt('--veto-segment-name', veto_segment_name)
    if statfiles:
        statfiles = statfiles.find_output_with_ifo(curr_ifo)
        node.add_input_list_opt('--statistic-files', statfiles)
    node.new_output_file_opt(workflow.analysis_time, '.dax', '--output-file')
    node.new_output_file_opt(workflow.analysis_time, '.dax.map',
                             '--output-map')
    node.new_output_file_opt(workflow.analysis_time, '.tc.txt',
                             '--transformation-catalog')

    name = node.output_files[0].name
    map_file = node.output_files[1]
    tc_file = node.output_files[2]

    node.add_opt('--workflow-name', name)
    node.add_opt('--output-dir', out_dir)

    workflow += node

    # execute this in a sub-workflow
    fil = node.output_files[0]

    # determine if a staging site has been specified
    try:
        staging_site = workflow.cp.get('workflow-sngl_minifollowups',
                                       'staging-site')
    except:
        staging_site = None

    job = dax.DAX(fil)
    job.addArguments('--basename %s' \
                     % os.path.splitext(os.path.basename(name))[0])
    Workflow.set_job_properties(job, map_file, tc_file,
                                staging_site=staging_site)
    workflow._adag.addJob(job)
    dep = dax.Dependency(parent=node._dax_node, child=job)
    workflow._adag.addDependency(dep)
    logging.info('Leaving minifollowups module')
Пример #7
0
def setup_injection_minifollowups(workflow,
                                  injection_file,
                                  inj_xml_file,
                                  single_triggers,
                                  tmpltbank_file,
                                  insp_segs,
                                  insp_seg_name,
                                  dax_output,
                                  out_dir,
                                  tags=None):
    """ Create plots that followup the closest missed injections
    
    Parameters
    ----------
    workflow: pycbc.workflow.Workflow
        The core workflow instance we are populating
    coinc_file: 
    single_triggers: list of pycbc.workflow.File
        A list cointaining the file objects associated with the merged
        single detector trigger files for each ifo.
    tmpltbank_file: pycbc.workflow.File
        The file object pointing to the HDF format template bank
    insp_segs: dict
        A dictionary, keyed by ifo name, of the data read by each inspiral job.
    insp_segs_name: str 
        The name of the segmentlist to read from the inspiral segment file
    out_dir: path
        The directory to store minifollowups result plots and files
    tags: {None, optional}
        Tags to add to the minifollowups executables
    
    Returns
    -------
    layout: list
        A list of tuples which specify the displayed file layout for the 
        minifollops plots.
    """
    logging.info('Entering injection minifollowups module')

    if not workflow.cp.has_section('workflow-injection_minifollowups'):
        logging.info(
            'There is no [workflow-injection_minifollowups] section in configuration file'
        )
        logging.info('Leaving minifollowups')
        return

    tags = [] if tags is None else tags
    makedir(dax_output)

    # turn the config file into a File class
    config_path = os.path.abspath(dax_output + '/' + '_'.join(tags) +
                                  'injection_minifollowup.ini')
    workflow.cp.write(open(config_path, 'w'))

    config_file = wdax.File(os.path.basename(config_path))
    config_file.PFN(config_path, 'local')

    exe = Executable(workflow.cp,
                     'injection_minifollowup',
                     ifos=workflow.ifos,
                     out_dir=dax_output)

    node = exe.create_node()
    node.add_input_opt('--config-files', config_file)
    node.add_input_opt('--bank-file', tmpltbank_file)
    node.add_input_opt('--injection-file', injection_file)
    node.add_input_opt('--injection-xml-file', inj_xml_file)
    node.add_multiifo_input_list_opt('--single-detector-triggers',
                                     single_triggers)
    node.add_multiifo_input_list_opt('--inspiral-segments', insp_segs.values())
    node.add_opt('--inspiral-segment-name', insp_seg_name)
    node.new_output_file_opt(workflow.analysis_time,
                             '.dax',
                             '--output-file',
                             tags=tags)
    node.new_output_file_opt(workflow.analysis_time,
                             '.dax.map',
                             '--output-map',
                             tags=tags)

    name = node.output_files[0].name
    map_loc = node.output_files[1].name

    node.add_opt('--workflow-name', name)
    node.add_opt('--output-dir', out_dir)

    workflow += node

    # execute this is a sub-workflow
    fil = node.output_files[0]

    job = dax.DAX(fil)
    job.addArguments('--basename %s' %
                     os.path.splitext(os.path.basename(name))[0])
    Workflow.set_job_properties(job, map_loc)
    workflow._adag.addJob(job)
    dep = dax.Dependency(parent=node._dax_node, child=job)
    workflow._adag.addDependency(dep)
    logging.info('Leaving injection minifollowups module')
def setup_foreground_inference(workflow,
                               coinc_file,
                               single_triggers,
                               tmpltbank_file,
                               insp_segs,
                               insp_data_name,
                               insp_anal_name,
                               dax_output,
                               out_dir,
                               tags=None):
    """ Creates workflow node that will run the inference workflow.

    Parameters
    ----------
    workflow: pycbc.workflow.Workflow
        The core workflow instance we are populating
    coinc_file: pycbc.workflow.File
        The file associated with coincident triggers.
    single_triggers: list of pycbc.workflow.File
        A list cointaining the file objects associated with the merged
        single detector trigger files for each ifo.
    tmpltbank_file: pycbc.workflow.File
        The file object pointing to the HDF format template bank
    insp_segs: SegFile
       The segment file containing the data read and analyzed by each inspiral
       job.
    insp_data_name: str
        The name of the segmentlist storing data read.
    insp_anal_name: str
        The name of the segmentlist storing data analyzed.
    dax_output : str
        The name of the output DAX file.
    out_dir: path
        The directory to store inference result plots and files
    tags: {None, optional}
        Tags to add to the inference executables
    """

    logging.info("Entering inference module")

    # check if configuration file has inference section
    if not workflow.cp.has_section("workflow-inference"):
        logging.info(
            "There is no [workflow-inference] section in configuration file")
        logging.info("Leaving inference module")
        return

    # default tags is a list
    tags = [] if tags is None else tags

    # make the directory that will contain the dax file
    makedir(dax_output)

    # turn the config file into a File class
    config_path = os.path.abspath(dax_output + "/" + "_".join(tags) \
                                        + "foreground_inference.ini")
    workflow.cp.write(open(config_path, "w"))
    config_file = wdax.File(os.path.basename(config_path))
    config_file.PFN(config_path, "local")

    # create an Executable for the inference workflow generator
    exe = Executable(workflow.cp,
                     "foreground_inference",
                     ifos=workflow.ifos,
                     out_dir=dax_output)

    # create the node that will run in the workflow
    node = exe.create_node()
    node.add_input_opt("--config-files", config_file)
    node.add_input_opt("--bank-file", tmpltbank_file)
    node.add_input_opt("--statmap-file", coinc_file)
    node.add_multiifo_input_list_opt("--single-detector-triggers",
                                     single_triggers)
    node.new_output_file_opt(workflow.analysis_time,
                             ".dax",
                             "--output-file",
                             tags=tags)
    node.new_output_file_opt(workflow.analysis_time,
                             ".dax.map",
                             "--output-map",
                             tags=tags)
    node.new_output_file_opt(workflow.analysis_time,
                             ".tc.txt",
                             "--transformation-catalog",
                             tags=tags)

    # get dax name and use it for the workflow name
    name = node.output_files[0].name
    node.add_opt("--workflow-name", name)

    # get output map name and use it for the output dir name
    map_file = node.output_files[1]
    node.add_opt("--output-dir", out_dir)

    # get the transformation catalog name
    tc_file = node.output_files[2]

    # add this node to the workflow
    workflow += node

    # create job for dax that will run a sub-workflow
    # and add it to the workflow
    fil = node.output_files[0]
    job = dax.DAX(fil)
    job.addArguments("--basename %s" %
                     os.path.splitext(os.path.basename(name))[0])
    Workflow.set_job_properties(job, map_file, tc_file)
    workflow._adag.addJob(job)

    # make dax a child of the inference workflow generator node
    dep = dax.Dependency(parent=node._dax_node, child=job)
    workflow._adag.addDependency(dep)

    logging.info("Leaving inference module")
Пример #9
0
def setup_single_det_minifollowups(workflow,
                                   single_trig_file,
                                   tmpltbank_file,
                                   insp_segs,
                                   insp_seg_name,
                                   dax_output,
                                   out_dir,
                                   veto_file=None,
                                   veto_segment_name=None,
                                   tags=None):
    """ Create plots that followup the Nth loudest clustered single detector
    triggers from a merged single detector trigger HDF file.
    
    Parameters
    ----------
    workflow: pycbc.workflow.Workflow
        The core workflow instance we are populating
    single_trig_file: pycbc.workflow.File
        The File class holding the single detector triggers.
    tmpltbank_file: pycbc.workflow.File
        The file object pointing to the HDF format template bank
    insp_segs: dict
        A dictionary, keyed by ifo name, of the data read by each inspiral job.
    insp_segs_name: str 
        The name of the segmentlist to read from the inspiral segment file
    out_dir: path
        The directory to store minifollowups result plots and files
    tags: {None, optional}
        Tags to add to the minifollowups executables    
    Returns
    -------
    layout: list
        A list of tuples which specify the displayed file layout for the 
        minifollops plots.
    """
    logging.info('Entering minifollowups module')

    if not workflow.cp.has_section('workflow-minifollowups'):
        msg = 'There is no [workflow-minifollowups] section in '
        msg += 'configuration file'
        logging.info(msg)
        logging.info('Leaving minifollowups')
        return

    tags = [] if tags is None else tags
    makedir(dax_output)

    # turn the config file into a File class
    curr_ifo = single_trig_file.ifo
    config_path = os.path.abspath(dax_output + '/' + curr_ifo + \
                                   '_'.join(tags) + 'singles_minifollowup.ini')
    workflow.cp.write(open(config_path, 'w'))

    config_file = wdax.File(os.path.basename(config_path))
    config_file.PFN(config_path, 'local')

    exe = Executable(workflow.cp,
                     'singles_minifollowup',
                     ifos=curr_ifo,
                     out_dir=dax_output)

    node = exe.create_node()
    node.add_input_opt('--config-files', config_file)
    node.add_input_opt('--bank-file', tmpltbank_file)
    node.add_input_opt('--single-detector-file', single_trig_file)
    node.add_input_opt('--inspiral-segments', insp_segs[curr_ifo])
    node.add_opt('--inspiral-segment-name', insp_seg_name)
    node.add_opt('--instrument', curr_ifo)
    if veto_file is not None:
        assert (veto_segment_name is not None)
        node.add_input_opt('--veto-file', veto_file)
        node.add_opt('--veto-segment-name', veto_segment_name)
    node.new_output_file_opt(workflow.analysis_time,
                             '.dax',
                             '--output-file',
                             tags=tags)
    node.new_output_file_opt(workflow.analysis_time,
                             '.dax.map',
                             '--output-map',
                             tags=tags)

    name = node.output_files[0].name
    map_loc = node.output_files[1].name

    node.add_opt('--workflow-name', name)
    node.add_opt('--output-dir', out_dir)

    workflow += node

    # execute this is a sub-workflow
    fil = node.output_files[0]

    job = dax.DAX(fil)
    job.addArguments('--basename %s' \
                     % os.path.splitext(os.path.basename(name))[0])
    Workflow.set_job_properties(job, map_loc)
    workflow._adag.addJob(job)
    dep = dax.Dependency(parent=node._dax_node, child=job)
    workflow._adag.addDependency(dep)
    logging.info('Leaving minifollowups module')
Пример #10
0
def setup_postproc_coh_PTF_workflow(workflow,
                                    trig_files,
                                    trig_cache,
                                    inj_trig_files,
                                    inj_files,
                                    inj_trig_caches,
                                    inj_caches,
                                    config_file,
                                    output_dir,
                                    html_dir,
                                    segment_dir,
                                    ifos,
                                    inj_tags=[],
                                    tags=[]):
    """
    This module sets up the post-processing stage in the workflow, using a
    coh_PTF style set up. This consists of running trig_combiner to find
    coherent triggers, and injfinder to look for injections. It then runs
    a horizon_dist job, trig_cluster to cluster triggers, and injcombiner to
    calculate injection statistics. Finally, efficiency and sbv_plotter jobs
    calculate efficiency and signal based veto statistics and make plots.
    
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the jobs will be added to.
    trig_files : pycbc.workflow.core.FileList
        A FileList containing the combined databases.
   
    Returns
    --------
    
    """
    cp = workflow.cp
    full_segment = trig_files[0].segment
    trig_name = cp.get("workflow", "trigger-name")
    grb_string = "GRB" + trig_name
    num_trials = int(cp.get("trig_combiner", "num-trials"))

    pp_outs = FileList([])
    pp_nodes = []

    # Set up needed exe classes
    trig_combiner_exe = os.path.basename(cp.get("executables",
                                                "trig_combiner"))
    trig_combiner_class = select_generic_executable(workflow, "trig_combiner")

    trig_cluster_exe = os.path.basename(cp.get("executables", "trig_cluster"))
    trig_cluster_class = select_generic_executable(workflow, "trig_cluster")

    sbv_plotter_exe = os.path.basename(cp.get("executables", "sbv_plotter"))
    sbv_plotter_class = select_generic_executable(workflow, "sbv_plotter")

    efficiency_exe = os.path.basename(cp.get("executables", "efficiency"))
    efficiency_class = select_generic_executable(workflow, "efficiency")
    """
    horizon_dist_exe = os.path.basename(cp.get("executables",
                                               "horizon_dist"))
    horizon_dist_class = select_generic_executable(workflow,
                                                   "horizon_dist")
    """
    html_summary_exe = os.path.basename(cp.get("executables", "html_summary"))
    html_summary_class = select_generic_executable(workflow, "html_summary")

    # Set up trig_combiner job
    trig_combiner_out_tags = ["OFFSOURCE", "ONSOURCE", "ALL_TIMES"]
    trig_combiner_jobs = trig_combiner_class(cp,
                                             "trig_combiner",
                                             ifo=ifos,
                                             out_dir=output_dir,
                                             tags=tags)
    trig_combiner_node, trig_combiner_outs = trig_combiner_jobs.create_node(\
            trig_files, segment_dir, out_tags=trig_combiner_out_tags,
            tags=tags)
    pp_nodes.append(trig_combiner_node)
    workflow.add_node(trig_combiner_node)
    pp_outs.extend(trig_combiner_outs)

    # Initialise trig_cluster class
    trig_cluster_outs = FileList([])
    trig_cluster_jobs = trig_cluster_class(cp,
                                           "trig_cluster",
                                           ifo=ifos,
                                           out_dir=output_dir,
                                           tags=tags)

    # Set up injfinder jobs
    if cp.has_section("workflow-injections"):
        injfinder_nodes = []
        injcombiner_parent_nodes = []
        inj_sbv_plotter_parent_nodes = []

        injfinder_exe = os.path.basename(cp.get("executables", "injfinder"))
        injfinder_class = select_generic_executable(workflow, "injfinder")
        injfinder_jobs = injfinder_class(cp,
                                         "injfinder",
                                         ifo=ifos,
                                         out_dir=output_dir,
                                         tags=tags)

        injcombiner_exe = os.path.basename(cp.get("executables",
                                                  "injcombiner"))
        injcombiner_class = select_generic_executable(workflow, "injcombiner")
        injcombiner_jobs = injcombiner_class(cp,
                                             "injcombiner",
                                             ifo=ifos,
                                             out_dir=output_dir,
                                             tags=tags)

        injfinder_outs = FileList([])
        for inj_tag in inj_tags:
            triggers = FileList([file for file in inj_trig_files \
                                 if inj_tag in file.tag_str])
            injections = FileList([file for file in inj_files \
                                   if inj_tag in file.tag_str])
            trig_cache = [file for file in inj_trig_caches \
                          if inj_tag in file.tag_str][0]
            inj_cache = [file for file in inj_caches \
                         if inj_tag in file.tag_str][0]
            injfinder_node, curr_outs = injfinder_jobs.create_node(\
                    triggers, injections, segment_dir, tags=[inj_tag])
            injfinder_nodes.append(injfinder_node)
            pp_nodes.append(injfinder_node)
            workflow.add_node(injfinder_node)
            injfinder_outs.extend(curr_outs)
            if "DETECTION" not in curr_outs[0].tagged_description:
                injcombiner_parent_nodes.append(injfinder_node)
            else:
                inj_sbv_plotter_parent_nodes.append(injfinder_node)

        pp_outs.extend(injfinder_outs)

        # Make injfinder output cache
        fm_cache = File(ifos,
                        "foundmissed",
                        full_segment,
                        extension="lcf",
                        directory=output_dir)
        fm_cache.PFN(fm_cache.cache_entry.path, site="local")
        injfinder_outs.convert_to_lal_cache().tofile(\
                open(fm_cache.storage_path, "w"))
        pp_outs.extend(FileList([fm_cache]))

        # Set up injcombiner jobs
        injcombiner_outs = FileList([file for file in injfinder_outs \
                                     if "DETECTION" in file.tag_str])
        injcombiner_tags = [inj_tag for inj_tag in inj_tags \
                            if "DETECTION" not in inj_tag]
        injcombiner_out_tags = [injcombiner_outs[0].tag_str.rsplit('_', 1)[0]]
        injcombiner_nodes = []

        for injcombiner_tag in injcombiner_tags:
            max_inc = cp.get_opt_tags("injections", "max-inc",
                                      [injcombiner_tag])
            inj_str = injcombiner_tag[:4]
            inputs = FileList([file for file in injfinder_outs \
                               if injcombiner_tag in file.tagged_description])
            #                   if any(tag in file.tagged_description \
            #                          for tag in injcombiner_tags)])
            injcombiner_node, curr_outs = injcombiner_jobs.create_node(\
                    fm_cache, inputs, inj_str, max_inc, workflow.analysis_time)
            injcombiner_nodes.append(injcombiner_node)
            injcombiner_out_tags.append("%s_FILTERED_%s" % (inj_str, max_inc))
            injcombiner_outs.extend(curr_outs)
            pp_outs.extend(curr_outs)
            pp_nodes.append(injcombiner_node)
            workflow.add_node(injcombiner_node)
            for parent_node in injcombiner_parent_nodes:
                dep = dax.Dependency(parent=parent_node._dax_node,
                                     child=injcombiner_node._dax_node)
                workflow._adag.addDependency(dep)

        # Initialise injection_efficiency class
        inj_efficiency_jobs = efficiency_class(cp,
                                               "inj_efficiency",
                                               ifo=ifos,
                                               out_dir=output_dir,
                                               tags=tags)

    # Initialise sbv_plotter class
    sbv_plotter_outs = FileList([])
    sbv_plotter_jobs = sbv_plotter_class(cp,
                                         "sbv_plotter",
                                         ifo=ifos,
                                         out_dir=output_dir,
                                         tags=tags)

    # Initialise efficiency class
    efficiency_outs = FileList([])
    efficiency_jobs = efficiency_class(cp,
                                       "efficiency",
                                       ifo=ifos,
                                       out_dir=output_dir,
                                       tags=tags)

    # Add trig_cluster jobs and their corresponding plotting jobs
    for out_tag in trig_combiner_out_tags:
        unclust_file = [file for file in trig_combiner_outs \
                        if out_tag in file.tag_str][0]
        trig_cluster_node, curr_outs = trig_cluster_jobs.create_node(\
                unclust_file)
        trig_cluster_outs.extend(curr_outs)
        clust_file = curr_outs[0]
        if out_tag != "ONSOURCE":
            # Add memory requirememnt for jobs with potentially large files
            trig_cluster_node.set_memory(1300)
            pp_nodes.append(trig_cluster_node)
            workflow.add_node(trig_cluster_node)
            dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                                 child=trig_cluster_node._dax_node)
            workflow._adag.addDependency(dep)

            # Add sbv_plotter job
            sbv_out_tags = [out_tag, "_clustered"]
            sbv_plotter_node = sbv_plotter_jobs.create_node(clust_file,
                                                            segment_dir,
                                                            tags=sbv_out_tags)
            pp_nodes.append(sbv_plotter_node)
            workflow.add_node(sbv_plotter_node)
            dep = dax.Dependency(parent=trig_cluster_node._dax_node,
                                 child=sbv_plotter_node._dax_node)
            workflow._adag.addDependency(dep)

            # Add injection sbv_plotter nodes if appropriate
            if out_tag == "OFFSOURCE" and \
                    cp.has_section("workflow-injections"):
                offsource_clustered = clust_file
                off_node = sbv_plotter_node

                found_inj_files = FileList([file for file in injcombiner_outs \
                                            if "FOUND" in file.tag_str])
                for curr_injs in found_inj_files:
                    curr_tags = [tag for tag in injcombiner_out_tags \
                                 if tag in curr_injs.name]
                    curr_tags.append("_clustered")
                    sbv_plotter_node = sbv_plotter_jobs.create_node(
                        clust_file,
                        segment_dir,
                        inj_file=curr_injs,
                        tags=curr_tags)
                    pp_nodes.append(sbv_plotter_node)
                    workflow.add_node(sbv_plotter_node)
                    dep = dax.Dependency(parent=trig_cluster_node._dax_node,
                                         child=sbv_plotter_node._dax_node)
                    workflow._adag.addDependency(dep)
                    if "DETECTION" in curr_injs.tagged_description:
                        for parent_node in inj_sbv_plotter_parent_nodes:
                            dep = dax.Dependency(
                                parent=parent_node._dax_node,
                                child=sbv_plotter_node._dax_node)
                            workflow._adag.addDependency(dep)
                    else:
                        for parent_node in injcombiner_nodes:
                            dep = dax.Dependency(
                                parent=parent_node._dax_node,
                                child=sbv_plotter_node._dax_node)
                            workflow._adag.addDependency(dep)

            # Also add sbv_plotter job for unclustered triggers
            sbv_plotter_node = sbv_plotter_jobs.create_node(
                unclust_file, segment_dir, tags=[out_tag, "_unclustered"])
            sbv_plotter_node.set_memory(1300)
            pp_nodes.append(sbv_plotter_node)
            workflow.add_node(sbv_plotter_node)
            dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                                 child=sbv_plotter_node._dax_node)
            workflow._adag.addDependency(dep)
        else:
            pp_nodes.append(trig_cluster_node)
            workflow.add_node(trig_cluster_node)
            dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                                 child=trig_cluster_node._dax_node)
            workflow._adag.addDependency(dep)

            # Add efficiency job for on/off
            efficiency_node = efficiency_jobs.create_node(clust_file,
                                                          offsource_clustered,
                                                          segment_dir,
                                                          tags=[out_tag])
            pp_nodes.append(efficiency_node)
            workflow.add_node(efficiency_node)
            dep = dax.Dependency(parent=off_node._dax_node,
                                 child=efficiency_node._dax_node)
            workflow._adag.addDependency(dep)

            if cp.has_section("workflow-injections"):
                for tag in injcombiner_out_tags:
                    if "_FILTERED_" in tag:
                        inj_set_tag = [t for t in inj_tags if \
                                       str(tag).replace("_FILTERED_", "") \
                                       in t][0]
                    else:
                        inj_set_tag = str(tag)

                    found_file = [file for file in injcombiner_outs \
                                  if tag + "_FOUND" in file.tag_str][0]
                    missed_file = [file for file in injcombiner_outs \
                                   if tag + "_MISSED" in file.tag_str][0]
                    inj_efficiency_node = inj_efficiency_jobs.create_node(\
                            clust_file, offsource_clustered, segment_dir,
                            found_file, missed_file, tags=[out_tag, tag,
                                                           inj_set_tag])
                    pp_nodes.append(inj_efficiency_node)
                    workflow.add_node(inj_efficiency_node)
                    dep = dax.Dependency(parent=off_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)
                    for injcombiner_node in injcombiner_nodes:
                        dep = dax.Dependency(
                            parent=injcombiner_node._dax_node,
                            child=inj_efficiency_node._dax_node)
                        workflow._adag.addDependency(dep)
                    for injfinder_node in injfinder_nodes:
                        dep = dax.Dependency(
                            parent=injfinder_node._dax_node,
                            child=inj_efficiency_node._dax_node)
                        workflow._adag.addDependency(dep)

    # Add further trig_cluster jobs for trials
    trial = 1

    while trial <= num_trials:
        trial_tag = "OFFTRIAL_%d" % trial
        unclust_file = [file for file in trig_combiner_outs \
                        if trial_tag in file.tag_str][0]
        trig_cluster_node, clust_outs = trig_cluster_jobs.create_node(\
                unclust_file)
        clust_file = clust_outs[0]
        trig_cluster_outs.extend(clust_outs)
        pp_nodes.append(trig_cluster_node)
        workflow.add_node(trig_cluster_node)
        dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                             child=trig_cluster_node._dax_node)
        workflow._adag.addDependency(dep)

        # Add efficiency job
        efficiency_node = efficiency_jobs.create_node(clust_file,
                                                      offsource_clustered,
                                                      segment_dir,
                                                      tags=[trial_tag])
        pp_nodes.append(efficiency_node)
        workflow.add_node(efficiency_node)
        dep = dax.Dependency(parent=off_node._dax_node,
                             child=efficiency_node._dax_node)
        workflow._adag.addDependency(dep)
        dep = dax.Dependency(parent=trig_cluster_node._dax_node,
                             child=efficiency_node._dax_node)
        workflow._adag.addDependency(dep)

        # Adding inj_efficiency job
        if cp.has_section("workflow-injections"):
            for tag in injcombiner_out_tags:
                if "_FILTERED_" in tag:
                    inj_set_tag = [t for t in inj_tags if \
                                   str(tag).replace("_FILTERED_", "") in t][0]
                else:
                    inj_set_tag = str(tag)

                found_file = [file for file in injcombiner_outs \
                              if tag + "_FOUND" in file.tag_str][0]
                missed_file = [file for file in injcombiner_outs \
                               if tag + "_MISSED" in file.tag_str][0]
                inj_efficiency_node = inj_efficiency_jobs.create_node(\
                        clust_file, offsource_clustered, segment_dir,
                        found_file, missed_file, tags=[trial_tag, tag,
                                                       inj_set_tag])
                pp_nodes.append(inj_efficiency_node)
                workflow.add_node(inj_efficiency_node)
                dep = dax.Dependency(parent=off_node._dax_node,
                                     child=inj_efficiency_node._dax_node)
                workflow._adag.addDependency(dep)
                for injcombiner_node in injcombiner_nodes:
                    dep = dax.Dependency(parent=injcombiner_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)
                for injfinder_node in injfinder_nodes:
                    dep = dax.Dependency(parent=injfinder_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)

        trial += 1

    # Initialise html_summary class and set up job
    #FIXME: We may want this job to run even if some jobs fail
    html_summary_jobs = html_summary_class(cp,
                                           "html_summary",
                                           ifo=ifos,
                                           out_dir=output_dir,
                                           tags=tags)
    if cp.has_section("workflow-injections"):
        tuning_tags = [inj_tag for inj_tag in injcombiner_out_tags \
                       if "DETECTION" in inj_tag]
        exclusion_tags = [inj_tag for inj_tag in injcombiner_out_tags \
                          if "DETECTION" not in inj_tag]
        html_summary_node = html_summary_jobs.create_node(
            c_file=config_file,
            tuning_tags=tuning_tags,
            exclusion_tags=exclusion_tags,
            html_dir=html_dir)
    else:
        html_summary_node = html_summary_jobs.create_node(c_file=config_file,
                                                          html_dir=html_dir)
    workflow.add_node(html_summary_node)
    for pp_node in pp_nodes:
        dep = dax.Dependency(parent=pp_node._dax_node,
                             child=html_summary_node._dax_node)
        workflow._adag.addDependency(dep)

    # Make the open box shell script
    open_box_cmd = html_summary_node.executable.get_pfn() + " "
    open_box_cmd += ' '.join(html_summary_node._args + \
                             html_summary_node._options)
    open_box_cmd += " --open-box"
    open_box_path = "%s/open_the_box.sh" % output_dir
    f = open(open_box_path, "w")
    f.write("#!/bin/sh\n%s" % open_box_cmd)
    f.close()
    os.chmod(open_box_path, 0500)

    pp_outs.extend(trig_cluster_outs)

    return pp_outs
Пример #11
0
def setup_postproc_coh_PTF_online_workflow(workflow, trig_files, trig_cache,
        inj_trig_files, inj_files, inj_trig_caches, inj_caches, config_file,
        output_dir, html_dir, segment_dir, segs_plot, ifos, inj_tags=None,
        tags=None):
    """
    This module sets up a stripped down post-processing stage for the online
    workflow, using a coh_PTF style set up. This consists of running
    trig_combiner to find coherent triggers, and trig_cluster to cluster them.
    This process may be done in two stages to reduce memory requirements. It
    also runs injfinder to look for injections, and injcombiner to calculate
    injection statistics. Finally, efficiency and sbv_plotter jobs calculate
    efficiency and signal based veto statistics and make plots.
    
    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
    trig_files : pycbc.workflow.core.FileList
        A FileList of the trigger files from the on/off source analysis jobs.
    trig_cache : pycbc.workflow.core.File
        A cache file pointing to the trigger files.
    inj_trig_files : pycbc.workflow.core.FileList
        A FileList of the trigger files produced by injection jobs.
    inj_files : pycbc.workflow.core.FileList
        A FileList of the injection set files.
    inj_trig_caches : pycbc.workflow.core.FileList
        A FileList containing the cache files that point to the injection
        trigger files.
    inj_caches : pycbc.workflow.core.FileList
        A FileList containing cache files that point to the injection files.
    config_file : pycbc.workflow.core.File
        The parsed configuration file.
    output_dir : path
        The directory in which output files will be stored.
    html_dir : path
        The directory where the result webpage will be placed.
    segment_dir : path
        The directory in which data segment information is stored.
    segs_plot : pycbc.workflow.core.File
        The plot showing the analysis segments for each IFO around the GRB time.
        This is produced at the time of workflow generation.
    ifos : list
        A list containing the analysis interferometers.
    inj_tags : list
        List containing the strings used to uniquely identify the injection
        sets included in the analysis.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['POSTPROC1'] or
        ['DENTYSNEWPOSTPROC']. This will be used in output names.

    Returns
    --------
    pp_outs : pycbc.workflow.core.FileList
        A list of the output from this stage.
    """
    if inj_tags is None:
        inj_tags = []
    if tags is None:
        tags = []
    cp = workflow.cp
    full_segment = trig_files[0].segment
    trig_name = cp.get("workflow", "trigger-name")
    grb_string = "GRB" + trig_name
    num_trials = int(cp.get("trig_combiner", "num-trials"))

    pp_outs = FileList([])
    pp_nodes = []

    # Set up needed exe classes
    trig_combiner_class = select_generic_executable(workflow, "trig_combiner")

    trig_cluster_class = select_generic_executable(workflow, "trig_cluster")

    sbv_plotter_class = select_generic_executable(workflow, "sbv_plotter")
    
    efficiency_class = select_generic_executable(workflow, "efficiency")

    #horizon_dist_class = select_generic_executable(workflow, "horizon_dist")

    html_summary_class = select_generic_executable(workflow, "html_summary")

    # Set up trig_combiner job
    trig_combiner_out_tags = ["OFFSOURCE", "ONSOURCE", "ALL_TIMES"]
    if all("COHERENT_NO_INJECTIONS" in t.name for t in trig_files) and \
            cp.has_option_tag("inspiral", "do-short-slides",
                              "coherent_no_injections"):
        trig_combiner_out_tags.extend(["ZEROLAG_OFF", "ZEROLAG_ALL"])

    trig_combiner_jobs = trig_combiner_class(cp, "trig_combiner", ifo=ifos, 
                                             out_dir=output_dir, tags=tags)

    # Do first stage of trig_combiner and trig_cluster jobs if desired
    if workflow.cp.has_option("workflow-postproc", "do-two-stage-clustering"):
        logging.info("Doing two-stage clustering.")
        trig_combiner_s1_jobs = trig_combiner_class(cp, "trig_combiner",
                ifo=ifos, out_dir=output_dir, tags=tags+["INTERMEDIATE"])

        num_stage_one_jobs = int(workflow.cp.get("workflow-postproc",
            "num-stage-one-cluster-jobs"))
        num_inputs_per_job = -(-len(trig_files) // num_stage_one_jobs)
        split_trig_files = (trig_files[p:p + num_inputs_per_job] for p in \
                            xrange(0, len(trig_files), num_inputs_per_job))
        trig_cluster_s1_jobs = trig_cluster_class(cp, "trig_cluster", ifo=ifos,
                out_dir=output_dir, tags=tags+["INTERMEDIATE"])
        trig_cluster_s1_nodes = []
        trig_cluster_s1_outs = FileList([])
        for j, s1_inputs in zip(range(num_stage_one_jobs), split_trig_files):
            trig_combiner_s1_node, trig_combiner_s1_outs = \
                    trig_combiner_s1_jobs.create_node(s1_inputs,
                            segment_dir, workflow.analysis_time,
                            out_tags=trig_combiner_out_tags, tags=tags+[str(j)])
            pp_nodes.append(trig_combiner_s1_node)
            workflow.add_node(trig_combiner_s1_node)

            unclust_file = [f for f in trig_combiner_s1_outs \
                            if "ALL_TIMES" in f.tag_str][0]
            trig_cluster_s1_node, curr_outs = trig_cluster_s1_jobs.create_node(\
                    unclust_file)
            trig_cluster_s1_outs.extend(curr_outs)
            clust_file = curr_outs[0]
            trig_cluster_s1_node.set_memory(1300)
            trig_cluster_s1_nodes.append(trig_cluster_s1_node)
            pp_nodes.append(trig_cluster_s1_node)
            workflow.add_node(trig_cluster_s1_node)
            dep = dax.Dependency(parent=trig_combiner_s1_node._dax_node,
                                 child=trig_cluster_s1_node._dax_node)
            workflow._adag.addDependency(dep)

        trig_combiner_node, trig_combiner_outs = \
                trig_combiner_jobs.create_node(trig_cluster_s1_outs,
                        segment_dir, workflow.analysis_time,
                        out_tags=trig_combiner_out_tags, tags=tags)
        pp_nodes.append(trig_combiner_node)
        workflow.add_node(trig_combiner_node)
        pp_outs.extend(trig_combiner_outs)
        for trig_cluster_s1_node in trig_cluster_s1_nodes:
            dep = dax.Dependency(parent=trig_cluster_s1_node._dax_node,
                                 child=trig_combiner_node._dax_node)
            workflow._adag.addDependency(dep)

    else:
        trig_combiner_node, trig_combiner_outs = \
                trig_combiner_jobs.create_node(trig_files, segment_dir,
                        workflow.analysis_time, out_tags=trig_combiner_out_tags,
                        tags=tags)
        pp_nodes.append(trig_combiner_node)
        workflow.add_node(trig_combiner_node)
        pp_outs.extend(trig_combiner_outs)

    # Initialise trig_cluster class
    trig_cluster_outs = FileList([])
    trig_cluster_jobs = trig_cluster_class(cp, "trig_cluster", ifo=ifos,
                                           out_dir=output_dir, tags=tags)

    # Set up injfinder jobs
    if cp.has_section("workflow-injections"):
        injfinder_nodes = []
        injcombiner_parent_nodes = []
        inj_sbv_plotter_parent_nodes = []

        injfinder_exe = os.path.basename(cp.get("executables", "injfinder"))
        injfinder_class = select_generic_executable(workflow, "injfinder")
        injfinder_jobs = injfinder_class(cp, "injfinder", ifo=ifos,
                                         out_dir=output_dir, tags=tags)

        injcombiner_exe = os.path.basename(cp.get("executables",
                                                  "injcombiner"))
        injcombiner_class = select_generic_executable(workflow, "injcombiner")
        injcombiner_jobs = injcombiner_class(cp, "injcombiner", ifo=ifos,
                                             out_dir=output_dir, tags=tags)

        injfinder_outs = FileList([])
        for inj_tag in inj_tags:
            triggers = FileList([file for file in inj_trig_files \
                                 if inj_tag in file.tag_str])
            injections = FileList([file for file in inj_files \
                                   if inj_tag in file.tag_str])
            trig_cache = [file for file in inj_trig_caches \
                          if inj_tag in file.tag_str][0]
            inj_cache = [file for file in inj_caches \
                         if inj_tag in file.tag_str][0]
            injfinder_node, curr_outs = injfinder_jobs.create_node(\
                    triggers, injections, segment_dir, tags=[inj_tag])
            injfinder_nodes.append(injfinder_node)
            pp_nodes.append(injfinder_node)
            workflow.add_node(injfinder_node)
            injfinder_outs.extend(curr_outs)
            if "DETECTION" not in curr_outs[0].tagged_description:
                injcombiner_parent_nodes.append(injfinder_node)
            else:
                inj_sbv_plotter_parent_nodes.append(injfinder_node)

        pp_outs.extend(injfinder_outs)

        # Make injfinder output cache
        fm_cache = File(ifos, "foundmissed", full_segment,
                        extension="lcf", directory=output_dir)
        fm_cache.PFN(fm_cache.cache_entry.path, site="local")
        injfinder_outs.convert_to_lal_cache().tofile(\
                open(fm_cache.storage_path, "w"))
        pp_outs.extend(FileList([fm_cache]))

        # Set up injcombiner jobs
        injcombiner_outs = FileList([f for f in injfinder_outs \
                                     if "DETECTION" in f.tag_str])
        injcombiner_tags = [inj_tag for inj_tag in inj_tags \
                            if "DETECTION" not in inj_tag]
        injcombiner_out_tags = [i.tag_str.rsplit('_', 1)[0] for i in \
                                injcombiner_outs if "FOUND" in i.tag_str]
        injcombiner_nodes = []

        for injcombiner_tag in injcombiner_tags:
            max_inc = cp.get_opt_tags("injections", "max-inc",
                                      [injcombiner_tag])
            inj_str = injcombiner_tag.replace("INJ", "")
            inputs = FileList([f for f in injfinder_outs \
                               if injcombiner_tag in f.tagged_description])
            injcombiner_node, curr_outs = injcombiner_jobs.create_node(\
                    fm_cache, inputs, inj_str, max_inc, workflow.analysis_time)
            injcombiner_nodes.append(injcombiner_node)
            injcombiner_out_tags.append("%s_FILTERED_%s"
                                        % (inj_str.split(max_inc)[0], max_inc))
            injcombiner_outs.extend(curr_outs)
            pp_outs.extend(curr_outs)
            pp_nodes.append(injcombiner_node)
            workflow.add_node(injcombiner_node)
            for parent_node in injcombiner_parent_nodes:
                dep = dax.Dependency(parent=parent_node._dax_node,
                                     child=injcombiner_node._dax_node)
                workflow._adag.addDependency(dep)

        # Initialise injection_efficiency class
        inj_efficiency_jobs = efficiency_class(cp, "inj_efficiency", ifo=ifos,
                                               out_dir=output_dir, tags=tags)

    # Initialise sbv_plotter class
    sbv_plotter_outs = FileList([])
    sbv_plotter_jobs = sbv_plotter_class(cp, "sbv_plotter", ifo=ifos,
                                         out_dir=output_dir, tags=tags)

    # Initialise efficiency class
    efficiency_outs = FileList([])
    efficiency_jobs = efficiency_class(cp, "efficiency", ifo=ifos,
                                       out_dir=output_dir, tags=tags)

    # Add trig_cluster jobs and their corresponding plotting jobs
    for out_tag in trig_combiner_out_tags:
        unclust_file = [f for f in trig_combiner_outs \
                        if out_tag in f.tag_str][0]
        trig_cluster_node, curr_outs = trig_cluster_jobs.create_node(\
                unclust_file)
        trig_cluster_outs.extend(curr_outs)
        clust_file = curr_outs[0]
        if out_tag != "ONSOURCE":
            # Add memory requirememnt for jobs with potentially large files
            trig_cluster_node.set_memory(1300)
            pp_nodes.append(trig_cluster_node)
            workflow.add_node(trig_cluster_node)
            dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                                 child=trig_cluster_node._dax_node)
            workflow._adag.addDependency(dep)

            # Add sbv_plotter job
            sbv_out_tags = [out_tag, "_clustered"]
            sbv_plotter_node = sbv_plotter_jobs.create_node(clust_file,
                                                            segment_dir,
                                                            tags=sbv_out_tags)
            pp_nodes.append(sbv_plotter_node)
            workflow.add_node(sbv_plotter_node)
            dep = dax.Dependency(parent=trig_cluster_node._dax_node,
                                 child=sbv_plotter_node._dax_node)
            workflow._adag.addDependency(dep)

            # Add injection sbv_plotter nodes if appropriate
            if out_tag == "OFFSOURCE":
                offsource_clustered = clust_file
                off_node = sbv_plotter_node

            if out_tag == "OFFSOURCE" and \
                    cp.has_section("workflow-injections"):
                found_inj_files = FileList([file for file in injcombiner_outs \
                                            if "FOUND" in file.tag_str])
                for curr_injs in found_inj_files:
                    curr_tags = [tag for tag in injcombiner_out_tags \
                                 if tag in curr_injs.name]
                    curr_tags.append("_clustered")
                    sbv_plotter_node = sbv_plotter_jobs.create_node(clust_file,
                            segment_dir, inj_file=curr_injs, tags=curr_tags)
                    pp_nodes.append(sbv_plotter_node)
                    workflow.add_node(sbv_plotter_node)
                    dep = dax.Dependency(parent=trig_cluster_node._dax_node,
                                         child=sbv_plotter_node._dax_node)
                    workflow._adag.addDependency(dep)
                    if "DETECTION" in curr_injs.tagged_description:
                        for parent_node in inj_sbv_plotter_parent_nodes:
                            dep = dax.Dependency(parent=parent_node._dax_node,
                                    child=sbv_plotter_node._dax_node)
                            workflow._adag.addDependency(dep)
                    else:
                        for parent_node in injcombiner_nodes:
                            dep = dax.Dependency(parent=parent_node._dax_node,
                                    child=sbv_plotter_node._dax_node)
                            workflow._adag.addDependency(dep)

        else:
            pp_nodes.append(trig_cluster_node)
            workflow.add_node(trig_cluster_node)
            dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                                 child=trig_cluster_node._dax_node)
            workflow._adag.addDependency(dep)

            # Add efficiency job for on/off
            efficiency_node = efficiency_jobs.create_node(clust_file,
                    offsource_clustered, segment_dir, tags=[out_tag])
            pp_nodes.append(efficiency_node)
            workflow.add_node(efficiency_node)
            dep = dax.Dependency(parent=off_node._dax_node,
                                 child=efficiency_node._dax_node)
            workflow._adag.addDependency(dep)

            if cp.has_section("workflow-injections"):
                for tag in injcombiner_out_tags:
                    if "_FILTERED_" in tag:
                        inj_set_tag = [t for t in inj_tags if \
                                       str(tag).replace("_FILTERED_", "") \
                                       in t][0]
                    else:
                        inj_set_tag = str(tag)
                    
                    found_file = [file for file in injcombiner_outs \
                                  if tag + "_FOUND" in file.tag_str][0]
                    missed_file = [file for file in injcombiner_outs \
                                   if tag + "_MISSED" in file.tag_str][0]
                    inj_efficiency_node = inj_efficiency_jobs.create_node(\
                            clust_file, offsource_clustered, segment_dir,
                            found_file, missed_file, tags=[out_tag, tag,
                                                           inj_set_tag])
                    pp_nodes.append(inj_efficiency_node)
                    workflow.add_node(inj_efficiency_node)
                    dep = dax.Dependency(parent=off_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)
                    for injcombiner_node in injcombiner_nodes:
                        dep = dax.Dependency(parent=injcombiner_node._dax_node,
                                child=inj_efficiency_node._dax_node)
                        workflow._adag.addDependency(dep)
                    for injfinder_node in injfinder_nodes:
                        dep = dax.Dependency(parent=injfinder_node._dax_node,
                                child=inj_efficiency_node._dax_node)
                        workflow._adag.addDependency(dep)

    # Add further trig_cluster jobs for trials
    trial = 1

    while trial <= num_trials:
        trial_tag = "OFFTRIAL_%d" % trial
        unclust_file = [f for f in trig_combiner_outs \
                        if trial_tag in f.tag_str][0]
        trig_cluster_node, clust_outs = trig_cluster_jobs.create_node(\
                unclust_file)
        clust_file = clust_outs[0]
        trig_cluster_outs.extend(clust_outs)
        pp_nodes.append(trig_cluster_node)
        workflow.add_node(trig_cluster_node)
        dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                             child=trig_cluster_node._dax_node)
        workflow._adag.addDependency(dep)

        # Add efficiency job
        efficiency_node = efficiency_jobs.create_node(clust_file,
                offsource_clustered, segment_dir, tags=[trial_tag])
        pp_nodes.append(efficiency_node)
        workflow.add_node(efficiency_node)
        dep = dax.Dependency(parent=off_node._dax_node,
                             child=efficiency_node._dax_node)
        workflow._adag.addDependency(dep)
        dep = dax.Dependency(parent=trig_cluster_node._dax_node,
                             child=efficiency_node._dax_node)
        workflow._adag.addDependency(dep)

        # Adding inj_efficiency job
        if cp.has_section("workflow-injections"):
            for tag in injcombiner_out_tags:
                if "_FILTERED_" in tag:
                    inj_set_tag = [t for t in inj_tags if \
                                   str(tag).replace("_FILTERED_", "") in t][0]
                else:
                    inj_set_tag = str(tag)

                found_file = [file for file in injcombiner_outs \
                              if tag + "_FOUND" in file.tag_str][0]
                missed_file = [file for file in injcombiner_outs \
                               if tag + "_MISSED" in file.tag_str][0]
                inj_efficiency_node = inj_efficiency_jobs.create_node(\
                        clust_file, offsource_clustered, segment_dir,
                        found_file, missed_file, tags=[trial_tag, tag,
                                                       inj_set_tag])
                pp_nodes.append(inj_efficiency_node)
                workflow.add_node(inj_efficiency_node)
                dep = dax.Dependency(parent=off_node._dax_node,
                                     child=inj_efficiency_node._dax_node)
                workflow._adag.addDependency(dep)
                for injcombiner_node in injcombiner_nodes:
                    dep = dax.Dependency(parent=injcombiner_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)
                for injfinder_node in injfinder_nodes:
                    dep = dax.Dependency(parent=injfinder_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)

        trial += 1

    # Initialise html_summary class and set up job
    #FIXME: We may want this job to run even if some jobs fail
    html_summary_jobs = html_summary_class(cp, "html_summary", ifo=ifos,
                                           out_dir=output_dir, tags=tags)
    if cp.has_section("workflow-injections"):
        tuning_tags = [inj_tag for inj_tag in injcombiner_out_tags \
                       if "DETECTION" in inj_tag]
        exclusion_tags = [inj_tag for inj_tag in injcombiner_out_tags \
                          if "DETECTION" not in inj_tag]
        html_summary_node = html_summary_jobs.create_node(c_file=config_file,
                tuning_tags=tuning_tags, exclusion_tags=exclusion_tags,
                seg_plot=segs_plot, html_dir=html_dir)
    else:
        html_summary_node = html_summary_jobs.create_node(c_file=config_file,
                seg_plot=segs_plot, html_dir=html_dir)
    workflow.add_node(html_summary_node)
    for pp_node in pp_nodes:
        dep = dax.Dependency(parent=pp_node._dax_node,
                             child=html_summary_node._dax_node)
        workflow._adag.addDependency(dep)

    # Make the open box shell script
    try:
        open_box_cmd = html_summary_node.executable.get_pfn() + " "
    except:
        exe_path = html_summary_node.executable.get_pfn('nonlocal').replace(\
                "https", "http")
        exe_name = exe_path.rsplit('/', 1)[-1]
        open_box_cmd = "wget %s\n" % exe_path
        open_box_cmd += "chmod 500 ./%s\n./%s " % (exe_name, exe_name)
    open_box_cmd += ' '.join(html_summary_node._args + \
                             html_summary_node._options)
    open_box_cmd += " --open-box"
    open_box_path = "%s/open_the_box.sh" % output_dir
    f = open(open_box_path, "w")
    f.write("#!/bin/sh\n%s" % open_box_cmd)
    f.close()
    os.chmod(open_box_path, 0500)

    pp_outs.extend(trig_cluster_outs)

    return pp_outs
Пример #12
0
def setup_postproc_coh_PTF_offline_workflow(workflow,
                                            trig_files,
                                            trig_cache,
                                            ts_trig_files,
                                            inj_trig_files,
                                            inj_files,
                                            inj_trig_caches,
                                            inj_caches,
                                            config_file,
                                            output_dir,
                                            html_dir,
                                            segment_dir,
                                            segs_plot,
                                            ifos,
                                            inj_tags=None,
                                            tags=None):
    """
    This module sets up the post-processing stage in the workflow, using a
    coh_PTF style set up. This consists of running trig_combiner to find
    coherent triggers, and injfinder to look for injections. It then runs
    a horizon_dist job, trig_cluster to cluster triggers, and injcombiner to
    calculate injection statistics. Finally, efficiency and sbv_plotter jobs
    calculate efficiency and signal based veto statistics and make plots.
    
    Parameters
    -----------
    workflow : pycbc.workflow.core.Workflow
        The Workflow instance that the coincidence jobs will be added to.
    trig_files : pycbc.workflow.core.FileList
        A FileList of the trigger files from the on/off source analysis jobs.
    trig_cache : pycbc.workflow.core.File
        A cache file pointing to the trigger files.
    ts_trig_files : pycbc.workflow.core.FileList
        A FileList of the trigger files from the timeslide analysis jobs.
    inj_trig_files : pycbc.workflow.core.FileList
        A FileList of the trigger files produced by injection jobs.
    inj_files : pycbc.workflow.core.FileList
        A FileList of the injection set files.
    inj_trig_caches : pycbc.workflow.core.FileList
        A FileList containing the cache files that point to the injection
        trigger files.
    inj_caches : pycbc.workflow.core.FileList
        A FileList containing cache files that point to the injection files.
    config_file : pycbc.workflow.core.File
        The parsed configuration file.
    output_dir : path
        The directory in which output files will be stored.
    html_dir : path
        The directory where the result webpage will be placed.
    segment_dir : path
        The directory in which data segment information is stored.
    segs_plot : pycbc.workflow.core.File
        The plot showing the analysis segments for each IFO around the GRB time.
        This is produced at the time of workflow generation.
    ifos : list
        A list containing the analysis interferometers.
    inj_tags : list
        List containing the strings used to uniquely identify the injection
        sets included in the analysis.
    tags : list of strings (optional, default = [])
        A list of the tagging strings that will be used for all jobs created
        by this call to the workflow. An example might be ['POSTPROC1'] or
        ['DENTYSNEWPOSTPROC']. This will be used in output names.

    Returns
    --------
    pp_outs : pycbc.workflow.core.FileList
        A list of the output from this stage.
    """
    if inj_tags is None:
        inj_tags = []
    if tags is None:
        tags = []
    cp = workflow.cp
    full_segment = trig_files[0].segment
    trig_name = cp.get("workflow", "trigger-name")
    grb_string = "GRB" + trig_name
    num_trials = int(cp.get("trig_combiner", "num-trials"))
    do_injections = cp.has_section("workflow-injections")

    pp_outs = FileList([])
    pp_nodes = []

    # Set up needed exe classes
    trig_combiner_class = select_generic_executable(workflow, "trig_combiner")

    trig_cluster_class = select_generic_executable(workflow, "trig_cluster")

    sbv_plotter_class = select_generic_executable(workflow, "sbv_plotter")

    efficiency_class = select_generic_executable(workflow, "efficiency")

    #horizon_dist_class = select_generic_executable(workflow, "horizon_dist")

    html_summary_class = select_generic_executable(workflow, "html_summary")

    # Set up injection jobs if desired
    if do_injections:
        workflow, injfinder_nodes, injfinder_outs, fm_cache, \
                injcombiner_nodes, injcombiner_outs, injcombiner_out_tags, \
                inj_sbv_plotter_parent_nodes, pp_nodes, pp_outs = \
                setup_coh_PTF_injections_pp(workflow, inj_trig_files,
                        inj_files, inj_trig_caches, inj_caches, pp_nodes,
                        pp_outs, inj_tags, output_dir, segment_dir, ifos,
                        tags=tags)

        # Initialise injection_efficiency class
        inj_efficiency_jobs = efficiency_class(cp,
                                               "inj_efficiency",
                                               ifo=ifos,
                                               out_dir=output_dir,
                                               tags=tags)

    # Set up main trig_combiner class and tags
    trig_combiner_out_tags = ["OFFSOURCE", "ONSOURCE", "ALL_TIMES"]
    slides = all("COHERENT_NO_INJECTIONS" in t.name for t in trig_files) and \
            cp.has_option_tag("inspiral", "do-short-slides",
                              "coherent_no_injections")
    if slides:
        trig_combiner_out_tags.extend(["ZEROLAG_OFF", "ZEROLAG_ALL"])

    trig_combiner_jobs = trig_combiner_class(cp,
                                             "trig_combiner",
                                             ifo=ifos,
                                             out_dir=output_dir,
                                             tags=tags)

    # Do first stage of trig_combiner and trig_cluster jobs if desired
    if workflow.cp.has_option("workflow-postproc", "do-two-stage-clustering"):
        logging.info("Doing two-stage clustering.")
        trig_combiner_s1_jobs = trig_combiner_class(cp,
                                                    "trig_combiner",
                                                    ifo=ifos,
                                                    out_dir=output_dir,
                                                    tags=tags +
                                                    ["INTERMEDIATE"])

        num_stage_one_jobs = int(
            workflow.cp.get("workflow-postproc", "num-stage-one-cluster-jobs"))
        num_inputs_per_job = -(-len(trig_files) // num_stage_one_jobs)
        split_trig_files = (trig_files[p:p + num_inputs_per_job] for p in \
                            xrange(0, len(trig_files), num_inputs_per_job))
        trig_cluster_s1_jobs = trig_cluster_class(cp,
                                                  "trig_cluster",
                                                  ifo=ifos,
                                                  out_dir=output_dir,
                                                  tags=tags + ["INTERMEDIATE"])
        trig_cluster_s1_nodes = []
        trig_cluster_s1_outs = FileList([])
        for j, s1_inputs in zip(range(num_stage_one_jobs), split_trig_files):
            trig_combiner_s1_node, trig_combiner_s1_outs = \
                    trig_combiner_s1_jobs.create_node(s1_inputs,
                            segment_dir, workflow.analysis_time,
                            out_tags=trig_combiner_out_tags, tags=tags+[str(j)])
            pp_nodes.append(trig_combiner_s1_node)
            workflow.add_node(trig_combiner_s1_node)

            unclust_file = [f for f in trig_combiner_s1_outs \
                            if "ALL_TIMES" in f.tag_str][0]
            trig_cluster_s1_node, curr_outs = trig_cluster_s1_jobs.create_node(\
                    unclust_file)
            trig_cluster_s1_outs.extend(curr_outs)
            clust_file = curr_outs[0]
            trig_cluster_s1_nodes.append(trig_cluster_s1_node)
            pp_nodes.append(trig_cluster_s1_node)
            workflow.add_node(trig_cluster_s1_node)
            dep = dax.Dependency(parent=trig_combiner_s1_node._dax_node,
                                 child=trig_cluster_s1_node._dax_node)
            workflow._adag.addDependency(dep)

        trig_combiner_node, trig_combiner_outs = \
                trig_combiner_jobs.create_node(trig_cluster_s1_outs,
                        segment_dir, workflow.analysis_time,
                        out_tags=trig_combiner_out_tags, tags=tags)
        pp_nodes.append(trig_combiner_node)
        workflow.add_node(trig_combiner_node)
        pp_outs.extend(trig_combiner_outs)
        for trig_cluster_s1_node in trig_cluster_s1_nodes:
            dep = dax.Dependency(parent=trig_cluster_s1_node._dax_node,
                                 child=trig_combiner_node._dax_node)
            workflow._adag.addDependency(dep)

    else:
        trig_combiner_node, trig_combiner_outs = \
                trig_combiner_jobs.create_node(trig_files, segment_dir,
                        workflow.analysis_time, out_tags=trig_combiner_out_tags,
                        tags=tags)
        pp_nodes.append(trig_combiner_node)
        workflow.add_node(trig_combiner_node)
        pp_outs.extend(trig_combiner_outs)

    # Initialise trig_cluster class
    trig_cluster_outs = FileList([])
    trig_cluster_jobs = trig_cluster_class(cp,
                                           "trig_cluster",
                                           ifo=ifos,
                                           out_dir=output_dir,
                                           tags=tags)

    # Initialise sbv_plotter class
    sbv_plotter_outs = FileList([])
    sbv_plotter_jobs = sbv_plotter_class(cp,
                                         "sbv_plotter",
                                         ifo=ifos,
                                         out_dir=output_dir,
                                         tags=tags)

    # Initialise efficiency class
    efficiency_outs = FileList([])
    efficiency_jobs = efficiency_class(cp,
                                       "efficiency",
                                       ifo=ifos,
                                       out_dir=output_dir,
                                       tags=tags)

    # Set up trig_cluster jobs
    trig_cluster_nodes = []
    for out_tag in trig_combiner_out_tags:
        unclust_file = [f for f in trig_combiner_outs \
                        if out_tag in f.tag_str][0]
        trig_cluster_node, curr_outs = trig_cluster_jobs.create_node(\
                unclust_file)
        trig_cluster_outs.extend(curr_outs)
        clust_file = curr_outs[0]
        trig_cluster_nodes.append(trig_cluster_node)
        pp_nodes.append(trig_cluster_node)
        workflow.add_node(trig_cluster_node)
        dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                             child=trig_cluster_node._dax_node)
        workflow._adag.addDependency(dep)
        # Are we not doing time slides?
        if ts_trig_files is None:
            if out_tag == "OFFSOURCE":
                off_node = trig_cluster_node
                offsource_clustered = clust_file

            # Add sbv_plotter and efficiency jobs
            workflow, pp_nodes = setup_coh_PTF_plotting_jobs(
                workflow,
                unclust_file,
                clust_file,
                sbv_plotter_jobs,
                efficiency_jobs,
                inj_efficiency_jobs,
                off_node,
                trig_cluster_node,
                offsource_clustered,
                injfinder_nodes,
                injcombiner_nodes,
                injcombiner_outs,
                inj_sbv_plotter_parent_nodes,
                inj_tags,
                injcombiner_out_tags,
                pp_nodes,
                output_dir,
                segment_dir,
                ifos,
                out_tag,
                do_injs=do_injections,
                tags=tags)

    # If doing time slides
    if ts_trig_files is not None:
        trig_combiner_ts_nodes = []
        trig_cluster_ts_nodes = []
        trig_cluster_all_times_nodes = []
        ts_all_times_outs = FileList(
            [out for out in trig_cluster_outs if "ALL_TIMES" in out.tag_str])
        trig_combiner_ts_out_tags = ["ALL_TIMES", "OFFSOURCE"]
        ts_tags = list(
            set([[ts_tag for ts_tag in ts_trig_file.tags
                  if "SLIDE" in ts_tag][0] for ts_trig_file in ts_trig_files]))
        for ts_tag in ts_tags:
            # Do one slide at a time
            ts_trigs = FileList([
                ts_trig_file for ts_trig_file in ts_trig_files
                if ts_tag in ts_trig_file.tags
            ])

            # And do two-stage clustering if desired
            if workflow.cp.has_option("workflow-postproc",
                                      "do-two-stage-clustering"):

                split_trig_files = (
                    ts_trigs[p:p + num_inputs_per_job]
                    for p in xrange(0, len(ts_trigs), num_inputs_per_job))
                trig_cluster_s1_nodes = []
                trig_cluster_s1_outs = FileList([])
                for j, s1_inputs in zip(range(num_stage_one_jobs),
                                        split_trig_files):
                    trig_combiner_s1_node, trig_combiner_s1_outs = \
                            trig_combiner_s1_jobs.create_node(s1_inputs,
                                     segment_dir, workflow.analysis_time,
                                     out_tags=trig_combiner_ts_out_tags,
                                     slide_tag=ts_tag, tags=tags+[str(j)])
                    pp_nodes.append(trig_combiner_s1_node)
                    workflow.add_node(trig_combiner_s1_node)

                    unclust_file = [f for f in trig_combiner_s1_outs \
                                    if "ALL_TIMES" in f.tag_str][0]
                    trig_cluster_s1_node, curr_outs = \
                            trig_cluster_s1_jobs.create_node(unclust_file)
                    trig_cluster_s1_outs.extend(curr_outs)
                    clust_file = curr_outs[0]
                    trig_cluster_s1_nodes.append(trig_cluster_s1_node)
                    pp_nodes.append(trig_cluster_s1_node)
                    workflow.add_node(trig_cluster_s1_node)
                    dep = dax.Dependency(
                        parent=trig_combiner_s1_node._dax_node,
                        child=trig_cluster_s1_node._dax_node)
                    workflow._adag.addDependency(dep)

                trig_combiner_ts_node, trig_combiner_ts_outs = \
                        trig_combiner_jobs.create_node(trig_cluster_s1_outs,
                                segment_dir, workflow.analysis_time,
                                slide_tag=ts_tag,
                                out_tags=trig_combiner_ts_out_tags, tags=tags)
                trig_combiner_ts_nodes.append(trig_combiner_ts_node)
                pp_nodes.append(trig_combiner_ts_node)
                workflow.add_node(trig_combiner_ts_node)
                pp_outs.extend(trig_combiner_ts_outs)
                for trig_cluster_s1_node in trig_cluster_s1_nodes:
                    dep = dax.Dependency(parent=trig_cluster_s1_node._dax_node,
                                         child=trig_combiner_ts_node._dax_node)
                    workflow._adag.addDependency(dep)
            else:
                trig_combiner_ts_node, trig_combiner_ts_outs = \
                        trig_combiner_jobs.create_node(ts_trigs, segment_dir,
                                workflow.analysis_time, slide_tag=ts_tag,
                                out_tags=trig_combiner_ts_out_tags, tags=tags)
                trig_combiner_ts_nodes.append(trig_combiner_ts_node)
                pp_nodes.append(trig_combiner_ts_node)
                workflow.add_node(trig_combiner_ts_node)
                pp_outs.extend(trig_combiner_ts_outs)

            # Set up trig cluster jobs for each timeslide
            for ts_out_tag in trig_combiner_ts_out_tags:
                unclust_file = [f for f in trig_combiner_ts_outs \
                                if ts_out_tag in f.tag_str][0]
                trig_cluster_node, curr_outs = trig_cluster_jobs.create_node(\
                        unclust_file)
                trig_cluster_outs.extend(curr_outs)
                clust_file = curr_outs[0]
                trig_cluster_ts_nodes.append(trig_cluster_node)
                pp_nodes.append(trig_cluster_node)
                workflow.add_node(trig_cluster_node)
                dep = dax.Dependency(parent=trig_combiner_ts_node._dax_node,
                                     child=trig_cluster_node._dax_node)
                workflow._adag.addDependency(dep)
                if ts_out_tag == "ALL_TIMES":
                    trig_cluster_all_times_nodes.append(trig_cluster_node)
                    ts_all_times_outs.extend(FileList([clust_file]))

        # Combine all timeslides
        trig_combiner_all_node, trig_combiner_all_outs = \
                trig_combiner_jobs.create_node(ts_all_times_outs, segment_dir,
                            workflow.analysis_time, slide_tag="ALL_SLIDES",
                            out_tags=trig_combiner_ts_out_tags, tags=tags)
        pp_nodes.append(trig_combiner_all_node)
        workflow.add_node(trig_combiner_all_node)
        for trig_cluster_ts_node in trig_cluster_all_times_nodes:
            dep = dax.Dependency(parent=trig_cluster_ts_node._dax_node,
                                 child=trig_combiner_all_node._dax_node)
            workflow._adag.addDependency(dep)

        for out_tag in trig_combiner_ts_out_tags:
            trig_cluster_outs = FileList(
                [f for f in trig_cluster_outs if out_tag not in f.tag_str])
        trig_cluster_outs.extend(trig_combiner_all_outs)
        off_node = trig_combiner_all_node
        offsource_clustered = [
            f for f in trig_cluster_outs
            if "OFFSOURCE" in f.tag_str and "ZERO_LAG" not in f.tag_str
        ][0]

        # Add sbv_plotter and efficiency jobs
        for out_tag in trig_combiner_out_tags:
            clust_file = [f for f in trig_cluster_outs \
                          if out_tag in f.tag_str][0]

            workflow, pp_nodes = setup_coh_PTF_plotting_jobs(
                workflow,
                unclust_file,
                clust_file,
                sbv_plotter_jobs,
                efficiency_jobs,
                inj_efficiency_jobs,
                off_node,
                off_node,
                offsource_clustered,
                injfinder_nodes,
                injcombiner_nodes,
                injcombiner_outs,
                inj_sbv_plotter_parent_nodes,
                inj_tags,
                injcombiner_out_tags,
                pp_nodes,
                output_dir,
                segment_dir,
                ifos,
                out_tag,
                do_injs=do_injections,
                tags=tags)

    trial = 1
    while trial <= num_trials:
        trial_tag = "OFFTRIAL_%d" % trial
        unclust_file = [f for f in trig_combiner_outs \
                        if trial_tag in f.tag_str][0]
        trig_cluster_node, clust_outs = trig_cluster_jobs.create_node(\
                unclust_file)
        clust_file = clust_outs[0]
        trig_cluster_outs.extend(clust_outs)
        pp_nodes.append(trig_cluster_node)
        workflow.add_node(trig_cluster_node)
        dep = dax.Dependency(parent=trig_combiner_node._dax_node,
                             child=trig_cluster_node._dax_node)
        workflow._adag.addDependency(dep)

        # Add efficiency job
        efficiency_node = efficiency_jobs.create_node(clust_file,
                                                      offsource_clustered,
                                                      segment_dir,
                                                      tags=[trial_tag])
        pp_nodes.append(efficiency_node)
        workflow.add_node(efficiency_node)
        dep = dax.Dependency(parent=off_node._dax_node,
                             child=efficiency_node._dax_node)
        workflow._adag.addDependency(dep)
        dep = dax.Dependency(parent=trig_cluster_node._dax_node,
                             child=efficiency_node._dax_node)
        workflow._adag.addDependency(dep)

        # Adding inj_efficiency job
        if do_injections:
            for tag in injcombiner_out_tags:
                if "_FILTERED_" in tag:
                    inj_set_tag = [t for t in inj_tags if \
                                   str(tag).replace("_FILTERED_", "") in t][0]
                else:
                    inj_set_tag = str(tag)

                found_file = [file for file in injcombiner_outs \
                              if tag + "_FOUND" in file.tag_str][0]
                missed_file = [file for file in injcombiner_outs \
                               if tag + "_MISSED" in file.tag_str][0]
                inj_efficiency_node = inj_efficiency_jobs.create_node(\
                        clust_file, offsource_clustered, segment_dir,
                        found_file, missed_file, tags=[trial_tag, tag,
                                                       inj_set_tag])
                pp_nodes.append(inj_efficiency_node)
                workflow.add_node(inj_efficiency_node)
                dep = dax.Dependency(parent=off_node._dax_node,
                                     child=inj_efficiency_node._dax_node)
                workflow._adag.addDependency(dep)
                for injcombiner_node in injcombiner_nodes:
                    dep = dax.Dependency(parent=injcombiner_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)
                for injfinder_node in injfinder_nodes:
                    dep = dax.Dependency(parent=injfinder_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)

        trial += 1

    # Initialise html_summary class and set up job
    #FIXME: We may want this job to run even if some jobs fail
    html_summary_jobs = html_summary_class(cp,
                                           "html_summary",
                                           ifo=ifos,
                                           out_dir=output_dir,
                                           tags=tags)
    if do_injections:
        tuning_tags = [inj_tag for inj_tag in injcombiner_out_tags \
                       if "DETECTION" in inj_tag]
        exclusion_tags = [inj_tag for inj_tag in injcombiner_out_tags \
                          if "DETECTION" not in inj_tag]
        html_summary_node = html_summary_jobs.create_node(
            c_file=config_file,
            tuning_tags=tuning_tags,
            exclusion_tags=exclusion_tags,
            seg_plot=segs_plot,
            html_dir=html_dir,
            time_slides=slides)
    else:
        html_summary_node = html_summary_jobs.create_node(c_file=config_file,
                                                          seg_plot=segs_plot,
                                                          html_dir=html_dir,
                                                          time_slides=slides)
    workflow.add_node(html_summary_node)
    for pp_node in pp_nodes:
        dep = dax.Dependency(parent=pp_node._dax_node,
                             child=html_summary_node._dax_node)
        workflow._adag.addDependency(dep)

    # Make the open box shell script
    try:
        open_box_cmd = html_summary_node.executable.get_pfn() + " "
    except:
        exe_path = html_summary_node.executable.get_pfn('nonlocal').replace(\
                "https", "http")
        exe_name = exe_path.rsplit('/', 1)[-1]
        open_box_cmd = "wget %s\n" % exe_path
        open_box_cmd += "chmod 500 ./%s\n./%s " % (exe_name, exe_name)
    open_box_cmd += ' '.join(html_summary_node._args + \
                             html_summary_node._options)
    open_box_cmd += " --open-box"
    open_box_path = "%s/open_the_box.sh" % output_dir
    f = open(open_box_path, "w")
    f.write("#!/bin/sh\n%s" % open_box_cmd)
    f.close()
    os.chmod(open_box_path, 0500)

    pp_outs.extend(trig_cluster_outs)

    return pp_outs
Пример #13
0
def setup_coh_PTF_plotting_jobs(workflow,
                                unclust_file,
                                clust_file,
                                sbv_plotter_jobs,
                                efficiency_jobs,
                                inj_efficiency_jobs,
                                off_node,
                                dep_node,
                                offsource_clustered,
                                injfinder_nodes,
                                injcombiner_nodes,
                                injcombiner_outs,
                                inj_sbv_plotter_parent_nodes,
                                inj_tags,
                                injcombiner_out_tags,
                                pp_nodes,
                                output_dir,
                                segment_dir,
                                ifos,
                                out_tag,
                                do_injs=False,
                                tags=None):
    """
    Creates signal-based veto and efficiency jobs
    """
    if out_tag != "ONSOURCE":
        # Add sbv_plotter job
        sbv_out_tags = [out_tag, "_clustered"]
        sbv_plotter_node = sbv_plotter_jobs.create_node(clust_file,
                                                        segment_dir,
                                                        tags=sbv_out_tags)
        pp_nodes.append(sbv_plotter_node)
        workflow.add_node(sbv_plotter_node)
        for n in set((off_node, dep_node)):
            dep = dax.Dependency(parent=n._dax_node,
                                 child=sbv_plotter_node._dax_node)
            workflow._adag.addDependency(dep)

        # Add injection sbv_plotter nodes if appropriate
        if out_tag == "OFFSOURCE" and do_injs:
            found_inj_files = FileList([file for file in injcombiner_outs \
                                        if "FOUND" in file.tag_str])
            for curr_injs in found_inj_files:
                curr_tags = [tag for tag in injcombiner_out_tags \
                             if tag in curr_injs.name]
                curr_tags.append("_clustered")
                sbv_plotter_node = sbv_plotter_jobs.create_node(
                    clust_file,
                    segment_dir,
                    inj_file=curr_injs,
                    tags=curr_tags)
                pp_nodes.append(sbv_plotter_node)
                workflow.add_node(sbv_plotter_node)
                dep = dax.Dependency(parent=dep_node._dax_node,
                                     child=sbv_plotter_node._dax_node)
                workflow._adag.addDependency(dep)
                if "DETECTION" in curr_injs.tagged_description:
                    for parent_node in inj_sbv_plotter_parent_nodes:
                        dep = dax.Dependency(parent=parent_node._dax_node,
                                             child=sbv_plotter_node._dax_node)
                        workflow._adag.addDependency(dep)
                else:
                    for parent_node in injcombiner_nodes:
                        dep = dax.Dependency(parent=parent_node._dax_node,
                                             child=sbv_plotter_node._dax_node)
                        workflow._adag.addDependency(dep)

        # Also add sbv_plotter job for unclustered triggers
        #sbv_plotter_node = sbv_plotter_jobs.create_node(unclust_file,
        #        segment_dir, tags=[out_tag, "_unclustered"])
        #pp_nodes.append(sbv_plotter_node)
        #workflow.add_node(sbv_plotter_node)
        #dep = dax.Dependency(parent=trig_combiner_node._dax_node,
        #                     child=sbv_plotter_node._dax_node)
        #workflow._adag.addDependency(dep)
    else:
        # Add efficiency job for on/off
        efficiency_node = efficiency_jobs.create_node(clust_file,
                                                      offsource_clustered,
                                                      segment_dir,
                                                      tags=[out_tag])
        pp_nodes.append(efficiency_node)
        workflow.add_node(efficiency_node)
        dep = dax.Dependency(parent=dep_node._dax_node,
                             child=efficiency_node._dax_node)
        workflow._adag.addDependency(dep)

        if do_injs:
            for tag in injcombiner_out_tags:
                if "_FILTERED_" in tag:
                    inj_set_tag = [t for t in inj_tags if \
                                   str(tag).replace("_FILTERED_", "") \
                                   in t][0]
                else:
                    inj_set_tag = str(tag)

                found_file = [file for file in injcombiner_outs \
                              if tag + "_FOUND" in file.tag_str][0]
                missed_file = [file for file in injcombiner_outs \
                               if tag + "_MISSED" in file.tag_str][0]
                inj_efficiency_node = inj_efficiency_jobs.create_node(\
                        clust_file, offsource_clustered, segment_dir,
                        found_file, missed_file, tags=[out_tag, tag,
                                                       inj_set_tag])
                pp_nodes.append(inj_efficiency_node)
                workflow.add_node(inj_efficiency_node)
                dep = dax.Dependency(parent=dep_node._dax_node,
                                     child=inj_efficiency_node._dax_node)
                workflow._adag.addDependency(dep)
                for injcombiner_node in injcombiner_nodes:
                    dep = dax.Dependency(parent=injcombiner_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)
                for injfinder_node in injfinder_nodes:
                    dep = dax.Dependency(parent=injfinder_node._dax_node,
                                         child=inj_efficiency_node._dax_node)
                    workflow._adag.addDependency(dep)

    return workflow, pp_nodes
Пример #14
0
def setup_coh_PTF_injections_pp(wf,
                                inj_trigger_files,
                                inj_files,
                                inj_trigger_caches,
                                inj_caches,
                                pp_nodes,
                                pp_outs,
                                inj_tags,
                                out_dir,
                                seg_dir,
                                ifos,
                                tags=None):
    """
    Set up post processing for injections
    """
    injfinder_nodes = []
    injcombiner_parent_nodes = []
    inj_sbv_plotter_parent_nodes = []
    full_segment = inj_trigger_files[0].segment

    injfinder_exe = os.path.basename(wf.cp.get("executables", "injfinder"))
    injfinder_class = select_generic_executable(wf, "injfinder")
    injfinder_jobs = injfinder_class(wf.cp,
                                     "injfinder",
                                     ifo=ifos,
                                     out_dir=out_dir,
                                     tags=tags)

    injcombiner_exe = os.path.basename(wf.cp.get("executables", "injcombiner"))
    injcombiner_class = select_generic_executable(wf, "injcombiner")
    injcombiner_jobs = injcombiner_class(wf.cp,
                                         "injcombiner",
                                         ifo=ifos,
                                         out_dir=out_dir,
                                         tags=tags)

    injfinder_outs = FileList([])
    for inj_tag in inj_tags:
        triggers = FileList([file for file in inj_trigger_files \
                             if inj_tag in file.tag_str])
        injections = FileList([file for file in inj_files \
                               if inj_tag in file.tag_str])
        trig_cache = [file for file in inj_trigger_caches \
                      if inj_tag in file.tag_str][0]
        inj_cache = [file for file in inj_caches \
                     if inj_tag in file.tag_str][0]
        injfinder_node, curr_outs = injfinder_jobs.create_node(\
                triggers, injections, seg_dir, tags=[inj_tag])
        injfinder_nodes.append(injfinder_node)
        pp_nodes.append(injfinder_node)
        wf.add_node(injfinder_node)
        injfinder_outs.extend(curr_outs)
        if "DETECTION" not in curr_outs[0].tagged_description:
            injcombiner_parent_nodes.append(injfinder_node)
        else:
            inj_sbv_plotter_parent_nodes.append(injfinder_node)

    pp_outs.extend(injfinder_outs)

    # Make injfinder output cache
    fm_cache = File(ifos,
                    "foundmissed",
                    full_segment,
                    extension="lcf",
                    directory=out_dir)
    fm_cache.PFN(fm_cache.cache_entry.path, site="local")
    injfinder_outs.convert_to_lal_cache().tofile(\
            open(fm_cache.storage_path, "w"))
    pp_outs.extend(FileList([fm_cache]))

    # Set up injcombiner jobs
    injcombiner_outs = FileList([f for f in injfinder_outs \
                                 if "DETECTION" in f.tag_str])
    injcombiner_tags = [inj_tag for inj_tag in inj_tags \
                        if "DETECTION" not in inj_tag]
    injcombiner_out_tags = [i.tag_str.rsplit('_', 1)[0] for i in \
                            injcombiner_outs if "FOUND" in i.tag_str]
    injcombiner_nodes = []

    for injcombiner_tag in injcombiner_tags:
        max_inc = wf.cp.get_opt_tags("injections", "max-inc",
                                     [injcombiner_tag])
        inj_str = injcombiner_tag.replace("INJ", "")
        inputs = FileList([f for f in injfinder_outs \
                           if injcombiner_tag in f.tagged_description])
        injcombiner_node, curr_outs = injcombiner_jobs.create_node(\
                fm_cache, inputs, inj_str, max_inc, wf.analysis_time)
        injcombiner_nodes.append(injcombiner_node)
        injcombiner_out_tags.append("%s_FILTERED_%s" %
                                    (inj_str.split(max_inc)[0], max_inc))
        injcombiner_outs.extend(curr_outs)
        pp_outs.extend(curr_outs)
        pp_nodes.append(injcombiner_node)
        wf.add_node(injcombiner_node)
        for parent_node in injcombiner_parent_nodes:
            dep = dax.Dependency(parent=parent_node._dax_node,
                                 child=injcombiner_node._dax_node)
            wf._adag.addDependency(dep)

    return (wf, injfinder_nodes, injfinder_outs, fm_cache, injcombiner_nodes,
            injcombiner_outs, injcombiner_out_tags,
            inj_sbv_plotter_parent_nodes, pp_nodes, pp_outs)