def is_valid(self, gps_time, delta_t=16, dq_bits=(0, 1, 2, 3), inj_bits=(0, 1, 2, 4)): """ For a given `gps_time`, check if is a valid time to sample noise from by checking if all data points in the interval `[gps_time - delta_t, gps_time + delta_t]` have the specified `dq_bits` and `inj_bits` set. .. seealso:: For more information about the `dq_bits` and `inj_bits`, check out the website of the GW Open Science Center, which explains these for the case of O1: https://www.gw-openscience.org/archive/dataset/O1 Args: gps_time (int): The GPS time whose validity we are checking. delta_t (int): The number of seconds around `gps_time` which we also want to be valid (because the sample will be an interval). dq_bits (tuple): The Data Quality Bits which one would like to require (see note above). *For example:* `dq_bits=(0, 1, 2, 3)` means that the data quality needs to pass all tests up to `CAT3`. inj_bits (tuple): The Injection Bits which one would like to require (see note above). *For example:* `inj_bits=(0, 1, 2, 4)` means that only continuous wave (CW) injections are permitted; all recordings containing any of other type of injection will be invalid for sampling. Returns: `True` if `gps_time` is valid, otherwise `False`. """ # --------------------------------------------------------------------- # Perform some basic sanity checks # --------------------------------------------------------------------- assert isinstance(gps_time, int), \ 'Received GPS time that is not an integer!' assert delta_t >= 0, \ 'Received an invalid value for delta_t!' assert set(dq_bits).issubset(set(range(7))), \ 'Invalid Data Quality bit specification passed to is_valid()!' assert set(inj_bits).issubset(set(range(5))), \ 'Invalid Injection bit specification passed to is_valid()!' # --------------------------------------------------------------------- # Check if given time is too close to a real event # --------------------------------------------------------------------- # Get GPS times of all confirmed mergers catalog = Catalog() real_event_times = [catalog.mergers[_].time for _ in catalog.names] # Check if gps_time is too close to any of these times if any(abs(gps_time - _) <= delta_t for _ in real_event_times): return False # --------------------------------------------------------------------- # Check if the given time is too close to the edge within its HDF file # --------------------------------------------------------------------- # Loop over all HDF files to find the one that contains the given # gps_time. Here, we do not distinguish between H1 and L1, because # we assume that the files for the detectors are aligned on a grid. for hdf_file in self.hdf_files: # Get the start and end time for the current HDF file start_time = hdf_file['start_time'] end_time = start_time + hdf_file['duration'] # Find the file that contains the given gps_time if start_time < gps_time < end_time: # Check if it is far away enough from the edges: If not, it # is not a valid time; otherwise we can still stop searching if not start_time + delta_t < gps_time < end_time - delta_t: return False else: break # --------------------------------------------------------------------- # Select the environment around the specified time # --------------------------------------------------------------------- # Map time to indices idx_start = self.gps2idx(gps_time) - delta_t idx_end = self.gps2idx(gps_time) + delta_t # Select the mask intervals environment = \ dict(h1_inj_mask=self.timeline['h1_inj_mask'][idx_start:idx_end], l1_inj_mask=self.timeline['l1_inj_mask'][idx_start:idx_end], h1_dq_mask=self.timeline['h1_dq_mask'][idx_start:idx_end], l1_dq_mask=self.timeline['l1_dq_mask'][idx_start:idx_end]) # --------------------------------------------------------------------- # Data Quality Check # --------------------------------------------------------------------- # Compute the minimum data quality min_dq = sum([2**i for i in dq_bits]) # Perform the DQ check for H1 environment['h1_dq_mask'] = environment['h1_dq_mask'] > min_dq if not np.all(environment['h1_dq_mask']): return False # Perform the DQ check for L1 environment['l1_dq_mask'] = environment['l1_dq_mask'] > min_dq if not np.all(environment['l1_dq_mask']): return False # --------------------------------------------------------------------- # Injection Check # --------------------------------------------------------------------- # Define an array of ones that matches the length of the environment. # This is needed because for a given number N, we can check if the # K-th bit is set by evaluating the expression: N & (1 << K) ones = np.ones(2 * delta_t, dtype=np.int32) # For each requested injection bit, check if it is set for the whole # environment (for both H1 and L1) for i in inj_bits: # Perform the injection check for H1 if not np.all( np.bitwise_and(environment['h1_inj_mask'], np.left_shift(ones, i))): return False # Perform the injection check for L1 if not np.all( np.bitwise_and(environment['l1_inj_mask'], np.left_shift(ones, i))): return False # If we have not returned False yet, the time must be valid! return True
def __init__(self, run_dir, configs={}): ''' Stores config files for pycbc_inference runs Parameters ---------- run_dir : string configs : dict Usage Notes ----------- [1] Compatible with `ConfigWriter`. This class is easiest used with the writer it returns. [2] Arguments for `sampler.ini` and `inference.ini` are formatted in the initialization of this class Therefore, when configuring for Injections ------------------------------------------ No special notes [3] Arguments for `data.ini` are not formatted in this class, but can be when writing it through its ConfigWriter. Therefore, when configuring for Events -------------------------------------- Need the following named variables to be provided to the ConfigWriter's `write` function: gpstime : int H1_frame_file : str H1_channel : str L1_frame_file : str L1_channel : str V1_frame_file : str V1_channel : str sample_rate : int (power of 2) ''' super(InferenceConfigs, self).__init__(run_dir, configs) # Add prior configs if 'prior' not in self.configs: self.configs['prior'] = {} self.add_default_bbh_prior_config() self.add_bilby_prior_files_configs() # Add configs for injections if 'injection' not in self.configs: self.configs['injection'] = {} self.add_injection_configs() # Add event configs if 'event' not in self.configs: self.configs['event'] = {} from pycbc.catalog import Catalog self.event_names = Catalog().names for event_name in self.event_names: self.add_event_configs(event_name) # Initialize their config writers self.update_config_writers()
def build_timeline( self, window: int = 32, dq_bits: Tuple[int] = (0, 1, 2, 3), inj_bits: Tuple[int] = (0, 1, 2, 4), chunk_size: int = 100000, ) -> np.ndarray: # For a given `gps_time`, check if is a valid time to sampleim # noise from by checking if all data points in the interval # `[gps_time - window / 2, gps_time + window / 2]` have the specified # `dq_bits` and `inj_bits` set. """For more information about the `dq_bits` and `inj_bits`, check out the website of the GW Open Science Center, which explains these for the case of O1: https://www.gw-openscience.org/archive/dataset/O1 Args: window : int The number of seconds around `gps_time` which we also want to be valid (because the sample will be an interval). dq_bits : Tuple[int] The Data Quality Bits which one would like to require (see note above). *For example:* `dq_bits=(0, 1, 2, 3)` means that the data quality needs to pass all tests up to `CAT3`. inj_bits : Tuple[int]s The Injection Bits which one would like to require (see note above). *For example:* `inj_bits=(0, 1, 2, 4)` means that only continuous wave (CW) injections are permitted; all recordings containing any of other type of injection will be invalid for sampling. Returns: A boolean array - `True` if the data is valid, otherwise `False`. """ assert isinstance( window, int) and window >= 0, 'Received an invalid int for window!' assert set(dq_bits).issubset(set( range(7))), 'Invalid Data Quality bit specification!' assert set(inj_bits).issubset(set( range(5))), 'Invalid Injection bit specification!' match = self._cache[(self._cache['window'] == window) & (self._cache['dq_bits'] == dq_bits) & (self._cache['inj_bits'] == inj_bits)] assert len(match) in (0, 1), "Duplicated detected in timeline cache!" if len(match) == 1: timeline = self._masks[match.index.item()] else: # build data quality masks for each detector (window independent) masks = self.build_masks(dq_bits, inj_bits, as_array=True) timeline = np.zeros((len(masks), self.n_entries - window + 1), dtype=bool) assert 0 < chunk_size < ( self.n_entries - window + 1), "chunk_size must be smaller than length of timeline." n_chunks = int(np.ceil((self.n_entries - window + 1) / chunk_size)) mask_buffer = np.stack([ np.arange(window, dtype=np.int32) + i for i in range(chunk_size) ]) with tqdm( total=self.n_entries, desc=f'Processing timeline windows', disable=not self.verbose, ) as progress: # loop through generator that chunks timeline array chunker = chunk_counter(self.n_entries, n_chunks, chunk_size, window) for start, end in chunker: # edit timeline mask for all (:) detectors timeline[:, start:end] = masks[:, mask_buffer[:end - start, :]].all( axis=2) progress.update(end - start) # update tqdm iters progress.refresh() mask_buffer += chunk_size # increment buffer matrix for next chunk # append with "deadzone" masks for completeness (dead_zone not long enough for full window) dead_zone = np.stack([ np.array([False] * (window - 1)) for _ in range(masks.shape[0]) ]) timeline = np.concatenate([timeline, dead_zone], axis=1).all( axis=0) # np.all down ifo dim progress.update(window - 1) # Get GPS times of all confirmed mergers and filter if within delta_t of event time catalog = Catalog() real_event_times = [ merger.time for merger in catalog.mergers.values() ] event_mask = np.array([ list( range(self.gps2idx(event_time - (window / 2)), self.gps2idx(event_time + (window / 2)))) for event_time in real_event_times if self.gps_start_time < event_time < self.gps_end_time ]) timeline[event_mask] = False # add timeline to cache metadata = [{ 'window': window, 'dq_bits': dq_bits, 'inj_bits': inj_bits }] self._cache = self._cache.append(metadata, ignore_index=True) self._masks[self._cache.index[-1]] = timeline return timeline
# Make sure the output directory exists output_dir = os.path.join('.', 'output') if not os.path.exists(output_dir): os.mkdir(output_dir) # Construct path to results file and open it to ensure its empty results_file = os.path.join(output_dir, 'real_events.hdf') with h5py.File(results_file, 'w'): pass # ------------------------------------------------------------------------- # Create an event catalog and loop over all events # ------------------------------------------------------------------------- # Set up a new catalog catalog = Catalog() # Loop over the events it contains for event in sorted(catalog.names): print('Processing', event.upper()) print(64 * '-') # Get the strain for detectors H1 and L1 (if necessary, this will # download the strain from GWOSC) strain = dict( H1=catalog[event].strain('H1'), L1=catalog[event].strain('L1'), ) # ---------------------------------------------------------------------