Пример #1
0
    def from_config(cls, cp, data_section='data', **kwargs):
        r"""Initializes an instance of this class from the given config file.

        In addition to ``[model]``, a ``data_section`` (default ``[data]``)
        must be in the configuration file. The data section specifies settings
        for loading data and estimating PSDs. See the `online documentation
        <http://pycbc.org/pycbc/latest/html/inference.html#setting-data>`_ for
        more details.

        The following options are read from the ``[model]`` section, in
        addition to ``name`` (which must be set):

        * ``{{DET}}-low-frequency-cutoff = FLOAT`` :
          The low frequency cutoff to use for each detector {{DET}}. A cutoff
          must be provided for every detector that may be analyzed (any
          additional detectors are ignored).
        * ``{{DET}}-high-frequency-cutoff = FLOAT`` :
          (Optional) A high frequency cutoff for each detector. If not
          provided, the Nyquist frequency is used.
        * ``check-for-valid-times =`` :
          (Optional) If provided, will check that there are no data quality
          flags on during the analysis segment and the segment used for PSD
          estimation in each detector. To check for flags,
          :py:func:`pycbc.dq.query_flag` is used, with settings pulled from the
          ``dq-*`` options in the ``[data]`` section. If a detector has bad
          data quality during either the analysis segment or PSD segment, it
          will be removed from the analysis.
        * ``shift-psd-times-to-valid =`` :
          (Optional) If provided, the segment used for PSD estimation will
          automatically be shifted left or right until a continous block of
          data with no data quality issues can be found. If no block can be
          found with a maximum shift of +/- the requested psd segment length,
          the detector will not be analyzed.
        * ``err-on-missing-detectors =`` :
          Raises an error if any detector is removed from the analysis because
          a valid time could not be found. Otherwise, a warning is printed
          to screen and the detector is removed from the analysis.
        * ``normalize =``:
          (Optional) Turn on the normalization factor.

        Parameters
        ----------
        cp : WorkflowConfigParser
            Config file parser to read.
        data_section : str, optional
            The name of the section to load data options from.
        \**kwargs :
            All additional keyword arguments are passed to the class. Any
            provided keyword will over ride what is in the config file.
        """
        args = cls._init_args_from_config(cp)
        # check if normalize is set
        if cp.has_option('model', 'normalize'):
            args['normalize'] = True
        # get any other keyword arguments provided in the model section
        ignore_args = ['name', 'normalize']
        for option in cp.options("model"):
            if option in ("low-frequency-cutoff", "high-frequency-cutoff"):
                ignore_args.append(option)
                name = option.replace('-', '_')
                args[name] = cp.get_cli_option('model', name,
                                               nargs='+', type=float,
                                               action=MultiDetOptionAction)
        if 'low_frequency_cutoff' not in args:
            raise ValueError("low-frequency-cutoff must be provided in the"
                             " model section, but is not found!")

        # data args
        bool_args = ['check-for-valid-times', 'shift-psd-times-to-valid',
                     'err-on-missing-detectors']
        data_args = {arg.replace('-', '_'): True for arg in bool_args
                     if cp.has_option('model', arg)}
        ignore_args += bool_args
        # load the data
        opts = data_opts_from_config(cp, data_section,
                                     args['low_frequency_cutoff'])
        strain_dict, psd_strain_dict = data_from_cli(opts, **data_args)
        # convert to frequency domain and get psds
        stilde_dict, psds = fd_data_from_strain_dict(opts, strain_dict,
                                                     psd_strain_dict)
        # save the psd data segments if the psd was estimated from data
        if opts.psd_estimation is not None:
            _tdict = psd_strain_dict or strain_dict
            for det in psds:
                psds[det].psd_segment = (_tdict[det].start_time,
                                         _tdict[det].end_time)
        # gate overwhitened if desired
        if opts.gate_overwhitened and opts.gate is not None:
            stilde_dict = gate_overwhitened_data(stilde_dict, psds, opts.gate)
        args.update({'data': stilde_dict, 'psds': psds})
        # any extra args
        args.update(cls.extra_args_from_config(cp, "model",
                                               skip_args=ignore_args))
        # get the injection file
        # Note: PyCBC's multi-ifo parser uses key:ifo for
        # the injection file, even though we will use the same
        # injection file for all detectors. This
        # should be fixed in a future version of PyCBC. Once it is,
        # update this. Until then, just use the first file.
        if opts.injection_file:
            injection_file = tuple(opts.injection_file.values())[0]
            # None if not set
        else:
            injection_file = None
        args['injection_file'] = injection_file
        # update any static params that are set to FROM_INJECTION
        replace_params = get_static_params_from_injection(
            args['static_params'], injection_file)
        args['static_params'].update(replace_params)
        # get ifo-specific instances of calibration model
        if cp.has_section('calibration'):
            logging.info("Initializing calibration model")
            recalib = {
                ifo: Recalibrate.from_config(cp, ifo, section='calibration')
                for ifo in opts.instruments}
            args['recalibration'] = recalib
        # get gates for templates
        gates = gates_from_cli(opts)
        if gates:
            args['gates'] = gates
        return cls(**args)
Пример #2
0
def data_from_cli(opts):
    """Loads the data needed for a model from the given
    command-line options. Gates specifed on the command line are also applied.

    Parameters
    ----------
    opts : ArgumentParser parsed args
        Argument options parsed from a command line string (the sort of thing
        returned by `parser.parse_args`).

    Returns
    -------
    strain_dict : dict
        Dictionary of instruments -> `TimeSeries` strain.
    stilde_dict : dict
        Dictionary of instruments -> `FrequencySeries` strain.
    psd_dict : dict
        Dictionary of instruments -> `FrequencySeries` psds.
    """
    # get gates to apply
    gates = gates_from_cli(opts)
    psd_gates = psd_gates_from_cli(opts)

    # get strain time series
    instruments = opts.instruments if opts.instruments is not None else []
    strain_dict = strain_from_cli_multi_ifos(opts, instruments,
                                             precision="double")
    # apply gates if not waiting to overwhiten
    if not opts.gate_overwhitened:
        strain_dict = apply_gates_to_td(strain_dict, gates)

    # get strain time series to use for PSD estimation
    # if user has not given the PSD time options then use same data as analysis
    if opts.psd_start_time and opts.psd_end_time:
        logging.info("Will generate a different time series for PSD "
                     "estimation")
        psd_opts = opts
        psd_opts.gps_start_time = psd_opts.psd_start_time
        psd_opts.gps_end_time = psd_opts.psd_end_time
        psd_strain_dict = strain_from_cli_multi_ifos(psd_opts,
                                                     instruments,
                                                     precision="double")
        # apply any gates
        logging.info("Applying gates to PSD data")
        psd_strain_dict = apply_gates_to_td(psd_strain_dict, psd_gates)

    elif opts.psd_start_time or opts.psd_end_time:
        raise ValueError("Must give --psd-start-time and --psd-end-time")
    else:
        psd_strain_dict = strain_dict

    # FFT strain and save each of the length of the FFT, delta_f, and
    # low frequency cutoff to a dict
    stilde_dict = {}
    length_dict = {}
    delta_f_dict = {}
    low_frequency_cutoff_dict = low_frequency_cutoff_from_cli(opts)
    for ifo in instruments:
        stilde_dict[ifo] = strain_dict[ifo].to_frequencyseries()
        length_dict[ifo] = len(stilde_dict[ifo])
        delta_f_dict[ifo] = stilde_dict[ifo].delta_f

    # get PSD as frequency series
    psd_dict = psd_from_cli_multi_ifos(
        opts, length_dict, delta_f_dict, low_frequency_cutoff_dict,
        instruments, strain_dict=psd_strain_dict, precision="double")

    # apply any gates to overwhitened data, if desired
    if opts.gate_overwhitened and opts.gate is not None:
        logging.info("Applying gates to overwhitened data")
        # overwhiten the data
        for ifo in gates:
            stilde_dict[ifo] /= psd_dict[ifo]
        stilde_dict = apply_gates_to_fd(stilde_dict, gates)
        # unwhiten the data for the model
        for ifo in gates:
            stilde_dict[ifo] *= psd_dict[ifo]

    return strain_dict, stilde_dict, psd_dict
Пример #3
0
def data_from_cli(opts):
    """Loads the data needed for a model from the given
    command-line options. Gates specifed on the command line are also applied.

    Parameters
    ----------
    opts : ArgumentParser parsed args
        Argument options parsed from a command line string (the sort of thing
        returned by `parser.parse_args`).

    Returns
    -------
    strain_dict : dict
        Dictionary of instruments -> `TimeSeries` strain.
    stilde_dict : dict
        Dictionary of instruments -> `FrequencySeries` strain.
    psd_dict : dict
        Dictionary of instruments -> `FrequencySeries` psds.
    """
    # get gates to apply
    gates = gates_from_cli(opts)
    psd_gates = psd_gates_from_cli(opts)

    # get strain time series
    instruments = opts.instruments if opts.instruments is not None else []
    strain_dict = strain_from_cli_multi_ifos(opts,
                                             instruments,
                                             precision="double")
    # apply gates if not waiting to overwhiten
    if not opts.gate_overwhitened:
        strain_dict = apply_gates_to_td(strain_dict, gates)

    # get strain time series to use for PSD estimation
    # if user has not given the PSD time options then use same data as analysis
    if opts.psd_start_time and opts.psd_end_time:
        logging.info("Will generate a different time series for PSD "
                     "estimation")
        psd_opts = opts
        psd_opts.gps_start_time = psd_opts.psd_start_time
        psd_opts.gps_end_time = psd_opts.psd_end_time
        psd_strain_dict = strain_from_cli_multi_ifos(psd_opts,
                                                     instruments,
                                                     precision="double")
        # apply any gates
        logging.info("Applying gates to PSD data")
        psd_strain_dict = apply_gates_to_td(psd_strain_dict, psd_gates)

    elif opts.psd_start_time or opts.psd_end_time:
        raise ValueError("Must give --psd-start-time and --psd-end-time")
    else:
        psd_strain_dict = strain_dict

    # FFT strain and save each of the length of the FFT, delta_f, and
    # low frequency cutoff to a dict
    stilde_dict = {}
    length_dict = {}
    delta_f_dict = {}
    low_frequency_cutoff_dict = low_frequency_cutoff_from_cli(opts)
    for ifo in instruments:
        stilde_dict[ifo] = strain_dict[ifo].to_frequencyseries()
        length_dict[ifo] = len(stilde_dict[ifo])
        delta_f_dict[ifo] = stilde_dict[ifo].delta_f

    # get PSD as frequency series
    psd_dict = psd_from_cli_multi_ifos(opts,
                                       length_dict,
                                       delta_f_dict,
                                       low_frequency_cutoff_dict,
                                       instruments,
                                       strain_dict=psd_strain_dict,
                                       precision="double")

    # apply any gates to overwhitened data, if desired
    if opts.gate_overwhitened and opts.gate is not None:
        logging.info("Applying gates to overwhitened data")
        # overwhiten the data
        for ifo in gates:
            stilde_dict[ifo] /= psd_dict[ifo]
        stilde_dict = apply_gates_to_fd(stilde_dict, gates)
        # unwhiten the data for the model
        for ifo in gates:
            stilde_dict[ifo] *= psd_dict[ifo]

    return strain_dict, stilde_dict, psd_dict
Пример #4
0
def data_from_cli(opts,
                  check_for_valid_times=False,
                  shift_psd_times_to_valid=False,
                  err_on_missing_detectors=False):
    """Loads the data needed for a model from the given command-line options.

    Gates specifed on the command line are also applied.

    Parameters
    ----------
    opts : ArgumentParser parsed args
        Argument options parsed from a command line string (the sort of thing
        returned by `parser.parse_args`).
    check_for_valid_times : bool, optional
        Check that valid data exists in the requested gps times. Default is
        False.
    shift_psd_times_to_valid : bool, optional
        If estimating the PSD from data, shift the PSD times to a valid
        segment if needed. Default is False.
    err_on_missing_detectors : bool, optional
        Raise a NoValidDataError if any detector does not have valid data.
        Otherwise, a warning is printed, and that detector is skipped.

    Returns
    -------
    strain_dict : dict
        Dictionary of detectors -> time series strain.
    psd_strain_dict : dict or None
        If ``opts.psd_(start|end)_time`` were set, a dctionary of
        detectors -> time series data to use for PSD estimation. Otherwise,
        ``None``.
    """
    # get gates to apply
    gates = gates_from_cli(opts)
    psd_gates = psd_gates_from_cli(opts)

    # get strain time series
    instruments = opts.instruments

    # validate times
    if check_for_valid_times:
        dets_with_data = detectors_with_valid_data(
            instruments,
            opts.gps_start_time,
            opts.gps_end_time,
            pad_data=opts.pad_data,
            err_on_missing_detectors=err_on_missing_detectors,
            shift_to_valid=False,
            segment_name=opts.dq_segment_name,
            source=opts.dq_source,
            server=opts.dq_server,
            veto_definer=opts.veto_definer)
        # reset instruments to only be those with valid data
        instruments = list(dets_with_data.keys())

    strain_dict = strain_from_cli_multi_ifos(opts,
                                             instruments,
                                             precision="double")
    # apply gates if not waiting to overwhiten
    if not opts.gate_overwhitened:
        strain_dict = apply_gates_to_td(strain_dict, gates)

    # check that there aren't nans in the data
    check_for_nans(strain_dict)

    # get strain time series to use for PSD estimation
    # if user has not given the PSD time options then use same data as analysis
    if opts.psd_start_time and opts.psd_end_time:
        logging.info("Will generate a different time series for PSD "
                     "estimation")
        if check_for_valid_times:
            psd_times = detectors_with_valid_data(
                instruments,
                opts.psd_start_time,
                opts.psd_end_time,
                pad_data=opts.pad_data,
                err_on_missing_detectors=err_on_missing_detectors,
                shift_to_valid=shift_psd_times_to_valid,
                segment_name=opts.dq_segment_name,
                source=opts.dq_source,
                server=opts.dq_server,
                veto_definer=opts.veto_definer)
            # remove detectors from the strain dict that did not have valid
            # times for PSD estimation
            for det in set(strain_dict.keys()) - set(psd_times.keys()):
                _ = strain_dict.pop(det)
            # reset instruments to only be those with valid data
            instruments = list(psd_times.keys())
        else:
            psd_times = {
                det: (opts.psd_start_time[det], opts.psd_end_time[det])
                for det in instruments
            }
        psd_strain_dict = {}
        for det, (psd_start, psd_end) in psd_times.items():
            opts.gps_start_time = psd_start
            opts.gps_end_time = psd_end
            psd_strain_dict.update(
                strain_from_cli_multi_ifos(opts, [det], precision="double"))
        # apply any gates
        logging.info("Applying gates to PSD data")
        psd_strain_dict = apply_gates_to_td(psd_strain_dict, psd_gates)
        # check that there aren't nans in the psd data
        check_for_nans(psd_strain_dict)
    elif opts.psd_start_time or opts.psd_end_time:
        raise ValueError("Must give psd-start-time and psd-end-time")
    else:
        psd_strain_dict = None

    # check that we have data left to analyze
    if instruments == []:
        raise NoValidDataError("No valid data could be found in any of the "
                               "requested instruments.")

    return strain_dict, psd_strain_dict