def from_config(cls, cp, data_section='data', **kwargs): r"""Initializes an instance of this class from the given config file. In addition to ``[model]``, a ``data_section`` (default ``[data]``) must be in the configuration file. The data section specifies settings for loading data and estimating PSDs. See the `online documentation <http://pycbc.org/pycbc/latest/html/inference.html#setting-data>`_ for more details. The following options are read from the ``[model]`` section, in addition to ``name`` (which must be set): * ``{{DET}}-low-frequency-cutoff = FLOAT`` : The low frequency cutoff to use for each detector {{DET}}. A cutoff must be provided for every detector that may be analyzed (any additional detectors are ignored). * ``{{DET}}-high-frequency-cutoff = FLOAT`` : (Optional) A high frequency cutoff for each detector. If not provided, the Nyquist frequency is used. * ``check-for-valid-times =`` : (Optional) If provided, will check that there are no data quality flags on during the analysis segment and the segment used for PSD estimation in each detector. To check for flags, :py:func:`pycbc.dq.query_flag` is used, with settings pulled from the ``dq-*`` options in the ``[data]`` section. If a detector has bad data quality during either the analysis segment or PSD segment, it will be removed from the analysis. * ``shift-psd-times-to-valid =`` : (Optional) If provided, the segment used for PSD estimation will automatically be shifted left or right until a continous block of data with no data quality issues can be found. If no block can be found with a maximum shift of +/- the requested psd segment length, the detector will not be analyzed. * ``err-on-missing-detectors =`` : Raises an error if any detector is removed from the analysis because a valid time could not be found. Otherwise, a warning is printed to screen and the detector is removed from the analysis. * ``normalize =``: (Optional) Turn on the normalization factor. Parameters ---------- cp : WorkflowConfigParser Config file parser to read. data_section : str, optional The name of the section to load data options from. \**kwargs : All additional keyword arguments are passed to the class. Any provided keyword will over ride what is in the config file. """ args = cls._init_args_from_config(cp) # check if normalize is set if cp.has_option('model', 'normalize'): args['normalize'] = True # get any other keyword arguments provided in the model section ignore_args = ['name', 'normalize'] for option in cp.options("model"): if option in ("low-frequency-cutoff", "high-frequency-cutoff"): ignore_args.append(option) name = option.replace('-', '_') args[name] = cp.get_cli_option('model', name, nargs='+', type=float, action=MultiDetOptionAction) if 'low_frequency_cutoff' not in args: raise ValueError("low-frequency-cutoff must be provided in the" " model section, but is not found!") # data args bool_args = ['check-for-valid-times', 'shift-psd-times-to-valid', 'err-on-missing-detectors'] data_args = {arg.replace('-', '_'): True for arg in bool_args if cp.has_option('model', arg)} ignore_args += bool_args # load the data opts = data_opts_from_config(cp, data_section, args['low_frequency_cutoff']) strain_dict, psd_strain_dict = data_from_cli(opts, **data_args) # convert to frequency domain and get psds stilde_dict, psds = fd_data_from_strain_dict(opts, strain_dict, psd_strain_dict) # save the psd data segments if the psd was estimated from data if opts.psd_estimation is not None: _tdict = psd_strain_dict or strain_dict for det in psds: psds[det].psd_segment = (_tdict[det].start_time, _tdict[det].end_time) # gate overwhitened if desired if opts.gate_overwhitened and opts.gate is not None: stilde_dict = gate_overwhitened_data(stilde_dict, psds, opts.gate) args.update({'data': stilde_dict, 'psds': psds}) # any extra args args.update(cls.extra_args_from_config(cp, "model", skip_args=ignore_args)) # get the injection file # Note: PyCBC's multi-ifo parser uses key:ifo for # the injection file, even though we will use the same # injection file for all detectors. This # should be fixed in a future version of PyCBC. Once it is, # update this. Until then, just use the first file. if opts.injection_file: injection_file = tuple(opts.injection_file.values())[0] # None if not set else: injection_file = None args['injection_file'] = injection_file # update any static params that are set to FROM_INJECTION replace_params = get_static_params_from_injection( args['static_params'], injection_file) args['static_params'].update(replace_params) # get ifo-specific instances of calibration model if cp.has_section('calibration'): logging.info("Initializing calibration model") recalib = { ifo: Recalibrate.from_config(cp, ifo, section='calibration') for ifo in opts.instruments} args['recalibration'] = recalib # get gates for templates gates = gates_from_cli(opts) if gates: args['gates'] = gates return cls(**args)
def data_from_cli(opts): """Loads the data needed for a model from the given command-line options. Gates specifed on the command line are also applied. Parameters ---------- opts : ArgumentParser parsed args Argument options parsed from a command line string (the sort of thing returned by `parser.parse_args`). Returns ------- strain_dict : dict Dictionary of instruments -> `TimeSeries` strain. stilde_dict : dict Dictionary of instruments -> `FrequencySeries` strain. psd_dict : dict Dictionary of instruments -> `FrequencySeries` psds. """ # get gates to apply gates = gates_from_cli(opts) psd_gates = psd_gates_from_cli(opts) # get strain time series instruments = opts.instruments if opts.instruments is not None else [] strain_dict = strain_from_cli_multi_ifos(opts, instruments, precision="double") # apply gates if not waiting to overwhiten if not opts.gate_overwhitened: strain_dict = apply_gates_to_td(strain_dict, gates) # get strain time series to use for PSD estimation # if user has not given the PSD time options then use same data as analysis if opts.psd_start_time and opts.psd_end_time: logging.info("Will generate a different time series for PSD " "estimation") psd_opts = opts psd_opts.gps_start_time = psd_opts.psd_start_time psd_opts.gps_end_time = psd_opts.psd_end_time psd_strain_dict = strain_from_cli_multi_ifos(psd_opts, instruments, precision="double") # apply any gates logging.info("Applying gates to PSD data") psd_strain_dict = apply_gates_to_td(psd_strain_dict, psd_gates) elif opts.psd_start_time or opts.psd_end_time: raise ValueError("Must give --psd-start-time and --psd-end-time") else: psd_strain_dict = strain_dict # FFT strain and save each of the length of the FFT, delta_f, and # low frequency cutoff to a dict stilde_dict = {} length_dict = {} delta_f_dict = {} low_frequency_cutoff_dict = low_frequency_cutoff_from_cli(opts) for ifo in instruments: stilde_dict[ifo] = strain_dict[ifo].to_frequencyseries() length_dict[ifo] = len(stilde_dict[ifo]) delta_f_dict[ifo] = stilde_dict[ifo].delta_f # get PSD as frequency series psd_dict = psd_from_cli_multi_ifos( opts, length_dict, delta_f_dict, low_frequency_cutoff_dict, instruments, strain_dict=psd_strain_dict, precision="double") # apply any gates to overwhitened data, if desired if opts.gate_overwhitened and opts.gate is not None: logging.info("Applying gates to overwhitened data") # overwhiten the data for ifo in gates: stilde_dict[ifo] /= psd_dict[ifo] stilde_dict = apply_gates_to_fd(stilde_dict, gates) # unwhiten the data for the model for ifo in gates: stilde_dict[ifo] *= psd_dict[ifo] return strain_dict, stilde_dict, psd_dict
def data_from_cli(opts): """Loads the data needed for a model from the given command-line options. Gates specifed on the command line are also applied. Parameters ---------- opts : ArgumentParser parsed args Argument options parsed from a command line string (the sort of thing returned by `parser.parse_args`). Returns ------- strain_dict : dict Dictionary of instruments -> `TimeSeries` strain. stilde_dict : dict Dictionary of instruments -> `FrequencySeries` strain. psd_dict : dict Dictionary of instruments -> `FrequencySeries` psds. """ # get gates to apply gates = gates_from_cli(opts) psd_gates = psd_gates_from_cli(opts) # get strain time series instruments = opts.instruments if opts.instruments is not None else [] strain_dict = strain_from_cli_multi_ifos(opts, instruments, precision="double") # apply gates if not waiting to overwhiten if not opts.gate_overwhitened: strain_dict = apply_gates_to_td(strain_dict, gates) # get strain time series to use for PSD estimation # if user has not given the PSD time options then use same data as analysis if opts.psd_start_time and opts.psd_end_time: logging.info("Will generate a different time series for PSD " "estimation") psd_opts = opts psd_opts.gps_start_time = psd_opts.psd_start_time psd_opts.gps_end_time = psd_opts.psd_end_time psd_strain_dict = strain_from_cli_multi_ifos(psd_opts, instruments, precision="double") # apply any gates logging.info("Applying gates to PSD data") psd_strain_dict = apply_gates_to_td(psd_strain_dict, psd_gates) elif opts.psd_start_time or opts.psd_end_time: raise ValueError("Must give --psd-start-time and --psd-end-time") else: psd_strain_dict = strain_dict # FFT strain and save each of the length of the FFT, delta_f, and # low frequency cutoff to a dict stilde_dict = {} length_dict = {} delta_f_dict = {} low_frequency_cutoff_dict = low_frequency_cutoff_from_cli(opts) for ifo in instruments: stilde_dict[ifo] = strain_dict[ifo].to_frequencyseries() length_dict[ifo] = len(stilde_dict[ifo]) delta_f_dict[ifo] = stilde_dict[ifo].delta_f # get PSD as frequency series psd_dict = psd_from_cli_multi_ifos(opts, length_dict, delta_f_dict, low_frequency_cutoff_dict, instruments, strain_dict=psd_strain_dict, precision="double") # apply any gates to overwhitened data, if desired if opts.gate_overwhitened and opts.gate is not None: logging.info("Applying gates to overwhitened data") # overwhiten the data for ifo in gates: stilde_dict[ifo] /= psd_dict[ifo] stilde_dict = apply_gates_to_fd(stilde_dict, gates) # unwhiten the data for the model for ifo in gates: stilde_dict[ifo] *= psd_dict[ifo] return strain_dict, stilde_dict, psd_dict
def data_from_cli(opts, check_for_valid_times=False, shift_psd_times_to_valid=False, err_on_missing_detectors=False): """Loads the data needed for a model from the given command-line options. Gates specifed on the command line are also applied. Parameters ---------- opts : ArgumentParser parsed args Argument options parsed from a command line string (the sort of thing returned by `parser.parse_args`). check_for_valid_times : bool, optional Check that valid data exists in the requested gps times. Default is False. shift_psd_times_to_valid : bool, optional If estimating the PSD from data, shift the PSD times to a valid segment if needed. Default is False. err_on_missing_detectors : bool, optional Raise a NoValidDataError if any detector does not have valid data. Otherwise, a warning is printed, and that detector is skipped. Returns ------- strain_dict : dict Dictionary of detectors -> time series strain. psd_strain_dict : dict or None If ``opts.psd_(start|end)_time`` were set, a dctionary of detectors -> time series data to use for PSD estimation. Otherwise, ``None``. """ # get gates to apply gates = gates_from_cli(opts) psd_gates = psd_gates_from_cli(opts) # get strain time series instruments = opts.instruments # validate times if check_for_valid_times: dets_with_data = detectors_with_valid_data( instruments, opts.gps_start_time, opts.gps_end_time, pad_data=opts.pad_data, err_on_missing_detectors=err_on_missing_detectors, shift_to_valid=False, segment_name=opts.dq_segment_name, source=opts.dq_source, server=opts.dq_server, veto_definer=opts.veto_definer) # reset instruments to only be those with valid data instruments = list(dets_with_data.keys()) strain_dict = strain_from_cli_multi_ifos(opts, instruments, precision="double") # apply gates if not waiting to overwhiten if not opts.gate_overwhitened: strain_dict = apply_gates_to_td(strain_dict, gates) # check that there aren't nans in the data check_for_nans(strain_dict) # get strain time series to use for PSD estimation # if user has not given the PSD time options then use same data as analysis if opts.psd_start_time and opts.psd_end_time: logging.info("Will generate a different time series for PSD " "estimation") if check_for_valid_times: psd_times = detectors_with_valid_data( instruments, opts.psd_start_time, opts.psd_end_time, pad_data=opts.pad_data, err_on_missing_detectors=err_on_missing_detectors, shift_to_valid=shift_psd_times_to_valid, segment_name=opts.dq_segment_name, source=opts.dq_source, server=opts.dq_server, veto_definer=opts.veto_definer) # remove detectors from the strain dict that did not have valid # times for PSD estimation for det in set(strain_dict.keys()) - set(psd_times.keys()): _ = strain_dict.pop(det) # reset instruments to only be those with valid data instruments = list(psd_times.keys()) else: psd_times = { det: (opts.psd_start_time[det], opts.psd_end_time[det]) for det in instruments } psd_strain_dict = {} for det, (psd_start, psd_end) in psd_times.items(): opts.gps_start_time = psd_start opts.gps_end_time = psd_end psd_strain_dict.update( strain_from_cli_multi_ifos(opts, [det], precision="double")) # apply any gates logging.info("Applying gates to PSD data") psd_strain_dict = apply_gates_to_td(psd_strain_dict, psd_gates) # check that there aren't nans in the psd data check_for_nans(psd_strain_dict) elif opts.psd_start_time or opts.psd_end_time: raise ValueError("Must give psd-start-time and psd-end-time") else: psd_strain_dict = None # check that we have data left to analyze if instruments == []: raise NoValidDataError("No valid data could be found in any of the " "requested instruments.") return strain_dict, psd_strain_dict