def final_model(self): """Return tuple of iteration and step count for final accepted model""" try: return self.steps.index[-1], self.steps[-1][-1] except TypeError: logger.warning("Inspector has no 'steps' data, returning None") return None, None
def initial_model(self): """Return tuple of the iteration and step count corresponding M00""" try: return self.steps.index[0], self.steps[0][0] except TypeError: logger.warning("Inspector has no 'steps' data, returning None") return None, None
def taper_time_offset(st, taper_percentage=0.05, time_offset_sec=0): """ Taper the leading edge of the waveform. If a time offset is given, e.g. 20s before the event origin time (T_0), taper all the way up from T=0 to T=T_0, to ensure that there are no impulse-like signals prior to the event origin. :type st: obspy.core.stream.Stream :param st: Stream object to be tapered :type taper_percentage: float :param taper_percentage: default taper percentage :type time_offset_sec: float :param time_offset_sec: Any time offset between the start of the stream to the event origin time. All time between these two points will be tapered to reduce any signals prior to the event origin. :rtype: obspy.core.stream.Stream :return: tapered Stream object """ taper_amount = st[0].stats.npts * taper_percentage * st[0].stats.delta if taper_amount > abs(time_offset_sec): logger.warning("taper amount exceeds time offset, taper may affect " "data if source receiver distance is short") elif taper_amount < abs(time_offset_sec): logger.info(f"adjusting taper to cover time offset {time_offset_sec}") taper_percentage = (abs(time_offset_sec) / st[0].stats.npts * st[0].stats.delta) # Get rid of extra long period signals which may adversely affect processing st.detrend("simple").taper(taper_percentage, side="left") return st
def compare(self, iteration_a=None, step_count_a=None, iteration_b=None, step_count_b=None): """ Compare the misfit and number of windows on an event by event basis between two evaluations. Provides absolute values as well as differences. Final dataframe is sorted by the difference in misfit, showing the most and least improved events. :type iteration_a: str :param iteration_a: initial iteration to use in comparison :type step_count_a: str :param step_count_a: initial step count to use in comparison :type iteration_b: str :param iteration_b: final iteration to use in comparison :type step_count_b: str :param step_count_b: final step count to use in comparison :rtype: pandas.core.data_frame.DataFrame :return: a sorted data frame containing the difference of misfit and number of windows between final and initial """ # Assuming if first arg isnt given, default to first/last model if iteration_a is None: iteration_a, step_count_a = self.initial_model if iteration_b is None: iteration_b, step_count_b = self.final_model # If initial or final models not given, nothing to compare if None in [iteration_a, step_count_a, iteration_b, step_count_b]: logger.warning("Cannot locate model indices to compare model data") return None misfit = self.misfit(level="event") msft_a = misfit.loc[iteration_a, step_count_a] msft_b = misfit.loc[iteration_b, step_count_b] # Doesn't really make sense to compare unscaled misfit so drop column msft_a = msft_a.drop(["unscaled_misfit"], axis=1).copy() msft_b = msft_b.drop(["unscaled_misfit"], axis=1).copy() # For renaming and access to renamed columns initial = f"{iteration_a}{step_count_a}" final = f"{iteration_b}{step_count_b}" msft_a.rename({"nwin": f"nwin_{initial}", "misfit": f"misfit_{initial}"}, axis="columns", inplace=True) msft_b.rename({"nwin": f"nwin_{final}", "misfit": f"misfit_{final}"}, axis="columns", inplace=True) df = pd.merge(msft_a, msft_b, left_index=True, right_index=True) df["diff_misfit"] = df[f"misfit_{final}"] - df[f"misfit_{initial}"] df["diff_nwin"] = df[f"nwin_{final}"] - df[f"nwin_{initial}"] return df.sort_values(by="diff_misfit")
def save(self, path="./", fmt="csv", tag=None): """ Save the downloaded attributes into JSON files for easier re-loading. .. note:: fmt == 'hdf' requires 'pytables' to be installed in the environment :type tag: str :param tag: tag to use to save files, defaults to the class tag but allows for the option of overwriting that :type path: str :param path: optional path to save to, defaults to cwd :type fmt: str :param fmt: format of the files to write, default csv """ if tag is None: tag = self.tag if fmt == "hdf": try: import pytables except ImportError: fmt = "csv" print("format 'hdf' requires pytables, defaulting to 'csv'") if fmt == "csv": write_check = 0 if not self.sources.empty: self.sources.to_csv(os.path.join(path, f"{tag}_src.csv")) write_check += 1 if not self.receivers.empty: self.receivers.to_csv(os.path.join(path, f"{tag}_rcv.csv")) write_check += 1 if not self.windows.empty: self.windows.to_csv(os.path.join(path, f"{tag}.csv"), index=False) write_check += 1 if write_check == 0: logger.warning("Inspector empty, will not write to disk") elif fmt == "hdf": with pd.HDFStore(os.path.join(path, f"{tag}.hdf")) as s: s["sources"] = self.sources s["receivers"] = self.receivers s["windows"] = self.windows else: raise NotImplementedError
def event_get(self, event_id=None): """ Return event information parameters pertaining to a given event id if an event id is given, else by origin time. Catches FDSN exceptions. :rtype event: obspy.core.event.Event or None :return event: event object if found, else None. """ if not self.Client: return None if event_id is None: event_id = self.config.event_id event, origintime = None, None if event_id is not None: try: # Get events via event id, only available from certain clients logger.debug(f"event ID: {event_id}, querying " f"client {self.config.client}") event = self.Client.get_events(eventid=event_id)[0] except FDSNException: pass if self.origintime and event is None: try: # If getting by event id doesn't work, try based on origintime logger.debug(f"origintime: {self.origintime}, querying" f"client {self.config.client}") event = self.Client.get_events(starttime=self.origintime, endtime=self.origintime) if len(event) > 1: # Getting by origin time may result in multiple events # found in the catalog, this is hard to control and will # probably need to be addressed manually. logger.warning(f"{len(event)} events found, expected 1." f"Returning first entry, manual revision " f"may be required.") event = event[0] except FDSNException: pass return event
def write(self, write_to="ds"): """ Write the data collected inside Manager to either a Pyasdf Dataset, or to individual files (not implemented). :type write_to: str :param write_to: choice to write data to, if "ds" writes to a pyasdf.asdf_data_set.ASDFDataSet * write_to == "ds": If gather is skipped but data should still be saved into an ASDFDataSet for data storage, this function will fill that dataset in the same fashion as the Gatherer class * write_to == "/path/to/output": write out all the internal data of the manager to a path """ if write_to == "ds": if self.event: try: self.ds.add_quakeml(self.event) except ValueError: logger.warning("Event already present, not added") if self.inv: try: self.ds.add_stationxml(self.inv) except TypeError: logger.warning("StationXML already present, not added") # PyASDF has its own warnings if waveform data already present if self.st_obs: self.ds.add_waveforms(waveform=self.st_obs, tag=self.config.observed_tag) if self.st_syn: self.ds.add_waveforms(waveform=self.st_syn, tag=self.config.synthetic_tag) if self.windows: self.save_windows() if self.adjsrcs: self.save_adjsrcs() else: raise NotImplementedError
def zero_pad_then_window(ws, pad_by_fraction_of_npts=.2): """ To address Pyflex throwing ValueErrors when source-receiver distances are .. note:: Sept 1, 2020 Work in progress, may not actually want to do this to avoid any near-source effects? :type ws: pyflex.WindowSelector :param ws: an already-filled window selector object that should be passed in from the Manager object :rtype: list of pyflex.Window :return: a list of Window objects, or an empty list if no windows found or the zero padding didnt work """ raise NotImplementedError logger.warning("Pyflex has thrown a ValueError, most likely due to a small" "source-receiver distance. Attempting to zero-pad waveforms" "and re-run window selection") # We assume that these traces have already been standardized. These values # will be used to ensure that we can undo the zero-padding original_origintime = ws.observed.stats.starttime original_endtime = ws.observed.stats.endtime original_npts = ws.observed.stats.npts # Pad by a fraction of the trace length pad_width = int(original_npts * pad_by_fraction_of_npts) # Pad only the front of the data ws.observed.data = np.pad(ws.observed.data, (pad_width,), mode="constant") ws.observed.stats.starttime -= pad_width * ws.observed.stats.delta ws.observed.data = np.pad(ws.observed.data, (pad_width,), mode="constant") ws.observed.stats.starttime -= pad_width * ws.observed.stats.delta ws.select_windows()
def save_windows(self): """ Convenience function to save collected misfit windows into an ASDFDataSet with some preliminary checks Auxiliary data tag is hardcoded as 'MisfitWindows' """ if self.ds is None: logger.warning("Manager has no ASDFDataSet, cannot save windows") elif not self.windows: logger.warning("Manager has no windows to save") elif not self.config.save_to_ds: logger.warning("config parameter save_to_ds is set False, " "will not save windows") else: logger.debug("saving misfit windows to ASDFDataSet") add_misfit_windows(self.windows, self.ds, path=self.config.aux_path)
def save_adjsrcs(self): """ Convenience function to save collected adjoint sources into an ASDFDataSet with some preliminary checks Auxiliary data tag is hardcoded as 'AdjointSources' """ if self.ds is None: logger.warning("Manager has no ASDFDataSet, cannot save " "adjoint sources") elif not self.adjsrcs: logger.warning("Manager has no adjoint sources to save") elif not self.config.save_to_ds: logger.warning("config parameter save_to_ds is set False, " "will not save adjoint sources") else: logger.debug("saving adjoint sources to ASDFDataSet") add_adjoint_sources(adjsrcs=self.adjsrcs, ds=self.ds, path=self.config.aux_path, time_offset=self.stats.time_offset_sec)
def trim_streams(st_a, st_b, precision=1E-3, force=None): """ Trim two streams to common start and end times, Do some basic preprocessing before trimming. Allows user to force one stream to conform to another. Assumes all traces in a stream have the same time. Prechecks make sure that the streams are actually different :type st_a: obspy.stream.Stream :param st_a: streams to be trimmed :type st_b: obspy.stream.Stream :param st_b: streams to be trimmed :type precision: float :param precision: precision to check UTCDateTime differences :type force: str :param force: "a" or "b"; force trim to the length of "st_a" or to "st_b", if not given, trims to the common time :rtype: tuple of obspy.stream.Stream :return: trimmed stream objects in the same order as input """ # Check if the times are already the same if st_a[0].stats.starttime - st_b[0].stats.starttime < precision and \ st_a[0].stats.endtime - st_b[0].stats.endtime < precision: logger.debug(f"start and endtimes already match to {precision}") return st_a, st_b # Force the trim to the start and end times of one of the streams if force: if force.lower() == "a": start_set = st_a[0].stats.starttime end_set = st_a[0].stats.endtime elif force.lower() == "b": start_set = st_b[0].stats.starttime end_set = st_b[0].stats.endtime # Get starttime and endtime base on min values else: st_trimmed = st_a + st_b start_set, end_set = 0, 1E10 for st in st_trimmed: start_hold = st.stats.starttime end_hold = st.stats.endtime if start_hold > start_set: start_set = start_hold if end_hold < end_set: end_set = end_hold # Trim to common start and end times st_a_out = st_a.copy() st_b_out = st_b.copy() for st in [st_a_out, st_b_out]: st.trim(start_set, end_set) # Trimming doesn't always make the starttimes exactly equal if the precision # of the UTCDateTime object is set too high. # Artificially shift the starttime of the streams iff the amount shifted # is less than the sampling rate for st in [st_a_out, st_b_out]: for tr in st: dt = start_set - tr.stats.starttime if 0 < dt < tr.stats.sampling_rate: logger.debug(f"shifting {tr.id} starttime by {dt}s") tr.stats.starttime = start_set elif dt >= tr.stats.delta: logger.warning( f"{tr.id} starttime is {dt}s greater than delta") return st_a_out, st_b_out
def window(self, fix_windows=False, iteration=None, step_count=None, force=False, save=True): """ Evaluate misfit windows using Pyflex. Save windows to ASDFDataSet. Allows previously defined windows to be retrieved from ASDFDataSet. .. note:: * Windows are stored as dictionaries of pyflex.Window objects. * All windows are saved into the ASDFDataSet, even if retrieved. * STA/LTA information is collected and stored internally. :type fix_windows: bool :param fix_windows: do not pick new windows, but load windows from the given dataset from 'iteration' and 'step_count' :type iteration: int or str :param iteration: if 'fix_windows' is True, look for windows in this iteration. If None, will check the latest iteration/step_count in the given dataset :type step_count: int or str :param step_count: if 'fix_windows' is True, look for windows in this step_count. If None, will check the latest iteration/step_count in the given dataset :type force: bool :param force: ignore flag checks and run function, useful if e.g. external preprocessing is used that doesn't meet flag criteria :type save: bool :param save: save the gathered windows to an ASDF Dataset """ # Pre-check to see if data has already been standardized self.check() if self.config.pyflex_preset is None: logger.info("pyflex preset is set to 'None', will not window") return if not self.stats.standardized and not force: raise ManagerError("cannot window, waveforms not standardized") # Determine how to treat fixed windows if fix_windows and not self.ds: logger.warning("cannot fix window, no dataset") fix_windows = False elif fix_windows and (iteration is None or step_count is None): # If no iteration/step_count values are given, automatically search # the previous step_count for windows in relation to the current # iteration/step_count iteration = self.config.iteration step_count = self.config.step_count return_previous = True else: # If fix windows and iteration/step_count are given, search the # dataset for windows under the current iteration/step_count return_previous = False # Synthetic STA/LTA as Pyflex WindowSelector.calculate_preliminaries() for comp in self.config.component_list: try: self.staltas[comp] = pyflex.stalta.sta_lta( data=envelope(self.st_syn.select(component=comp)[0].data), dt=self.st_syn.select(component=comp)[0].stats.delta, min_period=self.config.min_period) except IndexError: continue # Find misfit windows, from a dataset or through window selection if fix_windows: self.retrieve_windows(iteration, step_count, return_previous) else: self.select_windows_plus() if save: self.save_windows() logger.info(f"{self.stats.nwin} window(s) total found") return self
def preprocess(self, which="both", overwrite=None, **kwargs): """ Preprocess observed and synthetic waveforms in place. Default preprocessing tasks: Remove response (observed), rotate, filter, convolve with source time function (synthetic). .. note:: Default preprocessing can be overwritten using a user-defined function that takes Manager and choice as inputs and outputs an ObsPy Stream object. .. note:: Documented kwargs only apply to default preprocessing. :type which: str :param which: "obs", "syn" or "both" to choose which stream to process defaults to both :type overwrite: function :param overwrite: If a function is provided, it will overwrite the standard preprocessing function. All arguments that are given to the standard preprocessing function will be passed as kwargs to the new function. This allows for customized preprocessing Keyword Arguments :: int water_level: water level for response removal float taper_percentage: amount to taper ends of waveform bool remove_response: remove instrument response using the Manager's inventory object. Defaults to True bool apply_filter: filter the waveforms using the Config's min_period and max_period parameters. Defaults to True bool convolve_with_stf: Convolve synthetic data with a Gaussian source time function if a half duration is provided. """ if not self.inv and not self.config.synthetics_only: raise ManagerError("cannot preprocess, no inventory") if overwrite: assert (hasattr(overwrite, '__call__')), "overwrite must be function" preproc_fx = overwrite else: preproc_fx = default_process # If required, will rotate based on source receiver lat/lon values if self.config.rotate_to_rtz: if not self.inv: logger.warning("cannot rotate components, no inventory") else: self.gcd, self.baz = gcd_and_baz(event=self.event, sta=self.inv[0][0]) # Preprocess observation waveforms if self.st_obs is not None and not self.stats.obs_processed and \ which.lower() in ["obs", "both"]: logger.info("preprocessing observation data") self.st_obs = preproc_fx(self, choice="obs", **kwargs) self.stats.obs_processed = True # Preprocess synthetic waveforms if self.st_syn is not None and not self.stats.syn_processed and \ which.lower() in ["syn", "both"]: logger.info("preprocessing synthetic data") self.st_syn = preproc_fx(self, choice="syn", **kwargs) self.stats.syn_processed = True # Set stats self.stats.len_obs = len(self.st_obs) self.stats.len_syn = len(self.st_syn) return self
def gather(self, code=None, choice=None, event_id=None, **kwargs): """ Gather station dataless and waveform data using the Gatherer class. In order collect observed waveforms, dataless, and finally synthetics. For valid kwargs see methods in :doc:`core.gatherer` :type code: str :param code: Station code following SEED naming convention. This must be in the form NN.SSSS.LL.CCC (N=network, S=station, L=location, C=channel). Allows for wildcard naming. By default the pyatoa workflow wants three orthogonal components in the N/E/Z coordinate system. Example station code: NZ.OPRZ.10.HH? :type choice: list :param choice: allows user to gather individual bits of data, rather than gathering all. Allowed: 'inv', 'st_obs', 'st_syn' :raises ManagerError: if any part of the gathering fails. Keyword Arguments :: bool try_fm: Try to retrieve and append focal mechanism information to the Event object. str prefix: Prefix for event id when searching for event information, can be used to search ordered files e.g., CMTSOLUTION_001 str suffix: Suffix for event id when searching for event information str station_level: The level of the station metadata if retrieved using the ObsPy Client. Defaults to 'response' str resp_dir_template: Directory structure template to search for response files. By default follows the SEED convention: 'path/to/RESPONSE/{sta}.{net}/' str resp_fid_template: Response file naming template to search for station dataless. By default, follows the SEED convention 'RESP.{net}.{sta}.{loc}.{cha}' str obs_dir_template: directory structure to search for observation data. Follows the SEED convention: 'path/to/obs_data/{year}/{net}/{sta}/{cha}' str obs_fid_template: File naming template to search for observation data. Follows the SEED convention: '{net}.{sta}.{loc}.{cha}*{year}.{jday:0>3}' str syn_cfgpath: Config.cfgpaths key to search for synthetic data. Defaults to 'synthetics', but for the may need to be set to 'waveforms' in certain use-cases, e.g. synthetics-synthetic inversions. str syn_unit: Optional argument to specify the letter used to identify the units of the synthetic data: For Specfem3D: ["d", "v", "a", "?"] 'd' for displacement, 'v' for velocity, 'a' for acceleration. Wildcards okay. Defaults to '?' str syn_dir_template: Directory structure template to search for synthetic waveforms. Defaults to empty string str syn_fid_template: The naming template of synthetic waveforms defaults to: "{net}.{sta}.*{cmp}.sem{syn_unit}" """ # Default to gathering all data if choice is None: choice = ["event", "inv", "st_obs", "st_syn"] try: # Attempt to gather event information before waveforms/metadata if "event" in choice and self.event is None: if event_id is None: event_id = self.config.event_id self.event = self.gatherer.gather_event(event_id, **kwargs) if code is not None: logger.info(f"gathering data for {code}") if "st_obs" in choice: # Ensure observed waveforms gathered before synthetics and # metadata. If this fails, no point to gathering the rest self.st_obs = self.gatherer.gather_observed(code, **kwargs) if "inv" in choice: self.inv = self.gatherer.gather_station(code, **kwargs) if "st_syn" in choice: self.st_syn = self.gatherer.gather_synthetic( code, **kwargs) return self except GathererNoDataException as e: # Catch the Gatherer exception and redirect as ManagerError # so that it can be caught by flow() raise ManagerError("Data Gatherer could not find some data") from e except Exception as e: # Gathering should be robust, but if something slips through, dont # let it kill a workflow, display and raise ManagerError logger.warning(e, exc_info=True) raise ManagerError("Uncontrolled error in data gathering") from e
def load(self, code, path=None, ds=None, synthetic_tag=None, observed_tag=None, config=True, windows=False, adjsrcs=False): """ Populate the manager using a previously populated ASDFDataSet. Useful for re-instantiating an existing workflow that has already gathered data and saved it to an ASDFDataSet. .. warning:: Loading any floating point values may result in rounding errors. Be careful to round off floating points to the correct place before using in future work. :type code: str :param code: SEED conv. code, e.g. NZ.BFZ.10.HHZ :type path: str :param path: if no Config object is given during init, the User can specify the config path here to load data from the dataset. This skips the need to initiate a separate Config object. :type ds: None or pyasdf.asdf_data_set.ASDFDataSet :param ds: dataset can be given to load from, will not set the ds :type synthetic_tag: str :param synthetic_tag: waveform tag of the synthetic data in the dataset e.g. 'synthetic_m00s00'. If None given, will use `config` attribute. :type observed_tag: str :param observed_tag: waveform tag of the observed data in the dataset e.g. 'observed'. If None given, will use `config` attribute. :type config: bool :param config: load config from the dataset, defaults to True but can be set False if Config should be instantiated by the User :type windows: bool :param windows: load misfit windows from the dataset, defaults to False :type adjsrcs: bool :param adjsrcs: load adjoint sources from the dataset, defaults to False """ # Allows a ds to be provided outside the attribute if self.ds and ds is None: ds = self.ds else: raise TypeError("load requires a Dataset") # If no Config object in Manager, try to load from dataset if config: if path is None: raise TypeError("load requires valid 'path' argument") logger.info(f"loading config from dataset {path}") try: self.config = Config(ds=ds, path=path) except AttributeError: logger.warning(f"No Config object in dataset for path {path}") assert (self.config is not None), "Config object required for load" assert len(code.split('.')) == 2, "'code' must be in form 'NN.SSS'" if windows or adjsrcs: assert (path is not None), "'path' required to load auxiliary data" iter_, step = path.split("/") # Reset and populate using the dataset self.__init__(config=self.config, ds=ds, event=ds.events[0]) net, sta = code.split('.') sta_tag = f"{net}.{sta}" if sta_tag in ds.waveforms.list(): self.inv = ds.waveforms[sta_tag].StationXML self.st_syn = ds.waveforms[sta_tag][synthetic_tag or self.config.synthetic_tag] self.st_obs = ds.waveforms[sta_tag][observed_tag or self.config.observed_tag] if windows: self.windows = load_windows(ds, net, sta, iter_, step, False) if adjsrcs: self.adjsrcs = load_adjsrcs(ds, net, sta, iter_, step) else: logger.warning(f"no data for {sta_tag} found in dataset") self.check() return self
def fetch_obs_by_dir(self, code, **kwargs): """ Fetch observation waveforms via directory structure on disk. .. note:: Default waveform directory structure assumed to follow SEED convention. That is: path/to/data/{YEAR}/{NETWORK}/{STATION}/{CHANNEL}*/{FID} e.g. path/to/data/2017/NZ/OPRZ/HHZ.D/NZ.OPRZ.10.HHZ.D :type code: str :param code: Station code following SEED naming convention. This must be in the form NN.SSSS.LL.CCC (N=network, S=station, L=location, C=channel). Allows for wildcard naming. By default the pyatoa workflow wants three orthogonal components in the N/E/Z coordinate system. Example station code: NZ.OPRZ.10.HH? :rtype stream: obspy.core.stream.Stream or None :return stream: stream object containing relevant waveforms, else None Keyword Arguments :: str obs_dir_template: directory structure to search for observation data. Follows the SEED convention: 'path/to/obs_data/{year}/{net}/{sta}/{cha}' str obs_fid_template: File naming template to search for observation data. Follows the SEED convention: '{net}.{sta}.{loc}.{cha}*{year}.{jday:0>3}' """ obs_dir_template = kwargs.get("obs_dir_template", "{year}/{net}/{sta}/{cha}*") obs_fid_template = kwargs.get( "obs_fid_template", "{net}.{sta}.{loc}.{cha}*{year}.{jday:0>3}") if self.origintime is None: raise AttributeError("'origintime' must be specified") net, sta, loc, cha = code.split('.') # If waveforms contain midnight, multiple files need to be read jdays = overlapping_days(origin_time=self.origintime, start_pad=self.config.start_pad, end_pad=self.config.end_pad) # Ensure that the paths are a list so that iterating doesnt accidentally # try to iterate through a string. paths = self.config.paths["waveforms"] if not isinstance(paths, list): paths = [paths] for path_ in paths: if not os.path.exists(path_): continue full_path = os.path.join(path_, obs_dir_template, obs_fid_template) pathlist = [] for jday in jdays: pathlist.append( full_path.format(net=net, sta=sta, cha=cha, loc=loc, jday=jday, year=self.origintime.year)) st = Stream() for fid in pathlist: logger.debug(f"searching for observations: {fid}") for filepath in glob.glob(fid): st += read(filepath) logger.info(f"retrieved observations locally:\n{filepath}") if len(st) > 0: # Take care of gaps in data by converting to masked data st.merge() st.trim(starttime=self.origintime - self.config.start_pad, endtime=self.origintime + self.config.end_pad) # Check if trimming retains data if len(st) > 0: return st else: logger.warning( "data does not fit origin time +/- pad time") return None else: return None
def check(self): """ (Re)check the stats of the workflow and data within the Manager. Rechecks conditions whenever called, incase something has gone awry mid-workflow. Stats should only be set by this function. """ # Give dataset filename if available if self.stats.dataset_id is None and self.ds is not None: self.stats.dataset_id = os.path.basename(self.ds.filename) # Determine the resource identifier for the Event object if self.stats.event_id is None and self.event is not None: self.stats.event_id = self.event.resource_id.id # Get the network and station name from the Inventory object if self.stats.inv_name is None and self.inv is not None: self.stats.inv_name = ".".join( [self.inv[0].code, self.inv[0][0].code]) # Check if waveforms are Stream objects, and if preprocessed if self.st_obs is not None: self.stats.len_obs = len(self.st_obs) self.stats.obs_processed = is_preprocessed(self.st_obs) if self.stats.len_obs > len(self.config.component_list): logger.warning("More observed traces than listed components, " "this may need to be reviewed manually") if self.st_syn is not None: self.stats.len_syn = len(self.st_syn) self.stats.syn_processed = is_preprocessed(self.st_syn) if self.stats.len_syn > len(self.config.component_list): logger.warning("More synthetic traces than listed components, " "this may need to be reviewed manually") # Check standardization by comparing waveforms against the first if not self.stats.standardized and self.st_obs and self.st_syn: for tr in self.st[1:]: for atr in ["sampling_rate", "npts", "starttime"]: if getattr(tr.stats, atr) != getattr( self.st[0].stats, atr): break break else: self.stats.standardized = True # Check for half duration used for source-time-function with synthetics if not self.stats.half_dur and self.event is not None: try: mt = self.event.preferred_focal_mechanism().moment_tensor self.stats.half_dur = mt.source_time_function.duration / 2 except AttributeError: pass # Count how many misfit windows are contained in the dataset if self.stats.nwin is None and self.windows is not None: self.stats.nwin = sum([len(_) for _ in self.windows.values()]) # Determine the unscaled misfit if not self.stats.misfit and self.adjsrcs is not None: self.stats.misfit = sum([_.misfit for _ in self.adjsrcs.values()])
def fetch_event_by_dir(self, event_id, prefix="", suffix="", format_=None, **kwargs): """ Fetch event information via directory structure on disk. Developed to parse CMTSOLUTION and QUAKEML files, but theoretically accepts any format that the ObsPy read_events() function will accept. Will search through all paths given until a matching source file found. .. note:: This function will search for the following path /path/to/event_dir/{prefix}{event_id}{suffix} so, if e.g., searching for a CMTSOLUTION file in the current dir: ./CMTSOLUTION_{event_id} Wildcards are okay but the function will return the first match :type event_id: str :param event_id: Unique event identifier to search source file by. e.g., a New Zealand earthquake ID '2018p130600'. A prefix or suffix will be tacked onto this :rtype event: obspy.core.event.Event or None :return event: event object if found, else None. :type prefix: str :param prefix Prefix to prepend to event id for file name searching. Wildcards are okay. :type suffix: str :param suffix: Suffix to append to event id for file name searching. Wildcards are okay. :type format_: str or NoneType :param format_: Expected format of the file to read, e.g., 'QUAKEML', passed to ObsPy read_events. NoneType means read_events() will guess """ # Ensure that the paths are a list so that iterating doesnt accidentally # try to iterate through a string. paths = self.config.paths["events"] if not isinstance(paths, list): paths = [paths] event = None for path_ in paths: if not os.path.exists(path_): continue # Search for available event files fid = os.path.join(path_, f"{prefix}{event_id}{suffix}") for filepath in glob.glob(fid): logger.debug(f"searching for event data: {filepath}") if os.path.exists(filepath): try: # Allow input of various types of source files if "SOURCE" in prefix: logger.info( f"reading SPECFEM2D SOURCE: {filepath}") cat = [read_specfem2d_source(filepath)] elif "FORCESOLUTION" in prefix: logger.info(f"reading FORCESOLUTION: {filepath}") cat = [read_forcesolution(filepath)] else: logger.info( f"reading source using ObsPy: {filepath}") cat = read_events(filepath, format=format_) if len(cat) != 1: logger.warning( f"{filepath} event file contains more than one " "event, returning 1st entry") event = cat[0] break except Exception as e: logger.warning(f"{filepath} event file read error {e}") if event is not None: logger.info(f"retrieved local file:\n{filepath}") else: logger.info(f"no local event file found") return event