def test_form_utils(): """ Test the string formatting utilities which ensures that IO is standardized throughout the package. Again, place them all in the same function. """ # Test format_iter for check in ["i09", 9, "9"]: assert(form.format_iter(check) == "i09") assert(form.format_iter(9.) is None) # Test format step for check in ["s09", 9, "9"]: assert(form.format_step(check) == "s09") assert(form.format_step(9.) is None) # Test format event name with various test cases eid = "abc123" # test event id test_cases = [ "smi:nz.org.geonet/{eid}", # GEONET "smi:service.iris.edu/fdsnws/event/1/query?eventid={eid}", # IRIS "smi:local/ndk/{eid}/event", # SPUD "smi:local/cmtsolution/{eid}/event", # GCMT "quakeml:earthquake.usgs.gov/fdsnws/event/1/query?eventid={eid}" "&format=quakeml", # USGS "quakeml:us.anss.org/event/{eid}", # USGS COMCAT, "pyatoa:source/{eid}" ] for test_case in test_cases: test_case = test_case.format(eid=eid) assert(form.format_event_name(test_case) == eid)
def del_synthetic_waveforms(ds, iteration=None, step_count=None): """ Remove "synthetic_{iter_tag}{step_tag}" tagged waveforms from an asdf dataset. If no iter_tag number given, wipes all synthetic data from dataset. :type ds: pyasdf.ASDFDataSet :param ds: dataset to be cleaned :param iteration: iteration number, e.g. "i01". Will be formatted so int ok. :type step_count: str or int :param step_count: step count e.g. "s00". Will be formatted so int ok. """ iter_tag = format_iter(iteration) step_tag = format_step(step_count) for sta in ds.waveforms.list(): for stream in ds.waveforms[sta].list(): # stream is e.g. 'synthetic_i00s00' if "synthetic" in stream: if (iter_tag is not None) and iter_tag in stream: if (step_tag is not None) and step_tag in stream: del ds.waveforms[sta][stream] elif step_tag is None: del ds.waveforms[sta][stream] elif iter_tag is None: del ds.waveforms[sta][stream]
def load_windows(ds, net, sta, iteration, step_count, return_previous=False): """ Returns misfit windows from an ASDFDataSet for a given iteration, step, network and station, as well as a count of windows returned. If given iteration and step are not present in dataset (e.g. during line search, new step), will try to search the previous step, which may or may not be contained in the previous iteration. Returns windows as Pyflex Window objects which can be used in Pyadjoint or in the Pyatoa workflow. .. note:: Expects that windows are saved into the dataset at each iteration and step such that there is a coherent structure within the dataset :type ds: pyasdf.ASDFDataSet :param ds: ASDF dataset containing MisfitWindows subgroup :type net: str :param net: network code used to find the name of the misfit window :type sta: str :param sta: station code used to find the name of the misfit window :type iteration: int or str :param iteration: current iteration, will be formatted by the function :type step_count: int or str :param step_count: step count, will be formatted by the function :type return_previous: bool :param return_previous: search the dataset for available windows from the previous iteration/step given the current iteration/step :rtype window_dict: dict :return window_dict: dictionary containing misfit windows, in a format expected by Pyatoa Manager class """ # Ensure the tags are properly formatted iteration = format_iter(iteration) step_count = format_step(step_count) windows = ds.auxiliary_data.MisfitWindows window_dict = {} if return_previous: # Retrieve windows from previous iter/step prev_windows = previous_windows(windows=windows, iteration=iteration, step_count=step_count) window_dict = dataset_windows_to_pyflex_windows(windows=prev_windows, network=net, station=sta) else: if hasattr(windows, iteration) and \ hasattr(windows[iteration], step_count): # Attempt to retrieve windows from the given iter/step logger.debug(f"searching for windows in {iteration}{step_count}") window_dict = dataset_windows_to_pyflex_windows( windows=windows[iteration][step_count], network=net, station=sta) return window_dict
def previous_windows(windows, iteration, step_count): """ Given an iteration and step count, find windows from the previous step count. If none are found for the given iteration, return the most recently available windows. .. note:: Assumes that windows are saved at each iteration. :type windows: pyasdf.utils.AuxiliaryDataAccessor :param windows: ds.auxiliary_data.MisfitWindows[iter][step] :type iteration: int or str :param iteration: the current iteration :type step_count: int or str :param step_count: the current step count :rtype: pyasdf.utils.AuxiliaryDataAccessor :return: ds.auxiliary_data.MisfitWindows """ # Ensure we're working with integer values for indexing, e.g. 's00' -> 0 if isinstance(iteration, str): iteration = int(iteration[1:]) if isinstance(step_count, str): step_count = int(step_count[1:]) # Get a flattened list of iters and steps as unique tuples of integers iters = [] steps = {i: windows[i].list() for i in windows.list()} for i, s in steps.items(): for s_ in s: iters.append((int(i[1:]), int(s_[1:]))) current = (iteration, step_count) if current in iters: # If windows have already been added to the auxiliary data prev_iter, prev_step = iters[iters.index(current) - 1] else: # Wind back the step to see if there are any windows for this iteration while step_count >= 0: if (iteration, step_count) in iters: prev_iter, prev_step = iteration, step_count break step_count -= 1 else: # If nothing is found return the most recent windows available prev_iter, prev_step = iters[-1] # Format back into strings for accessing auxiliary data prev_iter = format_iter(prev_iter) prev_step = format_step(prev_step) logger.debug(f"most recent windows: {prev_iter}{prev_step}") return windows[prev_iter][prev_step]
def write_stations_adjoint(ds, iteration, specfem_station_file, step_count=None, pathout=None): """ Generate the STATIONS_ADJOINT file for Specfem input by reading in the STATIONS file and cross-checking which adjoint sources are available in the Pyasdf dataset. :type ds: pyasdf.ASDFDataSet :param ds: dataset containing AdjointSources auxiliary data :type iteration: str or int :param iteration: iteration number, e.g. "i01". Will be formatted so int ok. :type step_count: str or int :param step_count: step count e.g. "s00". Will be formatted so int ok. If NoneType, final step of the iteration will be chosen automatically. :type specfem_station_file: str :param specfem_station_file: path/to/specfem/DATA/STATIONS :type pathout: str :param pathout: path to save file 'STATIONS_ADJOINT' """ # Check which stations have adjoint sources stas_with_adjsrcs = [] adj_srcs = ds.auxiliary_data.AdjointSources[format_iter(iteration)] # Dynamically determine final step count in the iteration if step_count is None: step_count = adj_srcs.list()[-1] logger.debug(f"writing stations adjoint for " f"{format_iter(iteration)}{format_step(step_count)}") adj_srcs = adj_srcs[format_step(step_count)] for code in adj_srcs.list(): stas_with_adjsrcs.append(code.split('_')[1]) stas_with_adjsrcs = set(stas_with_adjsrcs) # Figure out which stations were simulated with open(specfem_station_file, "r") as f: lines = f.readlines() # If no output path is specified, save into current working directory with # an event_id tag to avoid confusion with other files, else normal naming if pathout is None: write_out = f"./STATIONS_ADJOINT_{format_event_name(ds)}" else: write_out = os.path.join(pathout, "STATIONS_ADJOINT") # Rewrite the Station file but only with stations that contain adjoint srcs with open(write_out, "w") as f: for line in lines: if line.split()[0] in stas_with_adjsrcs: f.write(line)
def write_misfit(ds, iteration, step_count=None, path="./", fidout=None): """ This function writes a text file containing event misfit. This misfit value corresponds to F_S^T of Eq 6. Tape et al. (2010) e.g. path/to/misfits/{iteration}/{event_id} These files will then need to be read by: seisflows.workflow.write_misfit() :type ds: pyasdf.ASDFDataSet :param ds: processed dataset, assumed to contain auxiliary_data.Statistics :type iteration: str or int :param iteration: iteration number, e.g. "i01". Will be formatted so int ok. :type step_count: str or int :param step_count: step count e.g. "s00". Will be formatted so int ok. :type path: str :param path: output path to write the misfit. fid will be the event name :type fidout: str :param fidout: allow user defined filename, otherwise default to name of ds note: if given, var 'pathout' is not used, this must be a full path """ iter_tag = format_iter(iteration) step_tag = format_step(step_count) # By default, name the file after the event id if fidout is None: fidout = os.path.join(path, format_event_name(ds)) # Collect the total misfit calculated by Pyadjoint total_misfit = 0 adjoint_sources = ds.auxiliary_data.AdjointSources[iter_tag] if step_tag: adjoint_sources = adjoint_sources[step_tag] for adjsrc in adjoint_sources.list(): total_misfit += adjoint_sources[adjsrc].parameters["misfit"] # Count up the number of misfit windows win = ds.auxiliary_data.MisfitWindows[iter_tag] if step_tag: win = win[step_tag] number_windows = len(win) scaled_misfit = 0.5 * total_misfit / number_windows # save in the same format as seisflows np.savetxt(fidout, [scaled_misfit], '%11.6e') return scaled_misfit
def load_adjsrcs(ds, net, sta, iteration, step_count): """ Load adjoint sources from a pyasdf ASDFDataSet and return in the format expected by the Manager class, that is a dictionary of adjoint sources :type ds: pyasdf.ASDFDataSet :param ds: ASDF dataset containing MisfitWindows subgroup :type net: str :param net: network code used to find the name of the adjoint source :type sta: str :param sta: station code used to find the name of the adjoint source :type iteration: int or str :param iteration: current iteration, will be formatted by the function :type step_count: int or str :param step_count: step count, will be formatted by the function :rtype: dict :return: dictionary containing adjoint sources, in a format expected by Pyatoa Manager class """ # Ensure the tags are properly formatted before using them for access iteration = format_iter(iteration) step_count = format_step(step_count) adjsrcs = ds.auxiliary_data.AdjointSources[iteration][step_count] adjsrc_dict = {} # Use fnmatch filter to find all adjoint sources that match net/sta code for adjsrc_tag in fnf(adjsrcs.list(), f"{net}_{sta}_*"): component = adjsrc_tag[-1].upper() # e.g. 'Z' # Build the adjoint source based on the parameters that were parsed in parameters = adjsrcs[adjsrc_tag].parameters assert (component == parameters["component"][-1]), ( "AdjointSource tag does not match the component listed in the " "parameter dictionary when it should.") # Adjoint sources are time-reversed when saved into the dataset, so # reverse them back when returning to Manager. Also remove time axis. parameters["adjoint_source"] = adjsrcs[adjsrc_tag].data[()][:, 1][::-1] # Convert back from str to UTCDateTime object parameters["starttime"] = UTCDateTime(parameters["starttime"]) # The parameter dicionary will have all the keywords necessary adjsrc_dict[component] = AdjointSource(**parameters) return adjsrc_dict
def del_auxiliary_data(ds, iteration=None, step_count=None, retain=None, only=None): """ Delete all items in auxiliary data for a given iter_tag, if iter_tag not given, wipes all auxiliary data. :type ds: pyasdf.ASDFDataSet :param ds: dataset to be cleaned :param iteration: iteration number, e.g. "i01". Will be formatted so int ok. :type step_count: str or int :param step_count: step count e.g. "s00". Will be formatted so int ok. :type retain: list of str :param retain: list of auxiliary data tags to retain, that is: delete all auxiliary data EXCEPT FOR the names given in this variable :type only: list of str :param only: list of auxiliary data tags to remove, that is: ONLY delete auxiliary data that matches the names given in this variable. Lower in priority than 'retain' """ iter_tag = format_iter(iteration) step_tag = format_step(step_count) for aux in ds.auxiliary_data.list(): # Check if auxiliary data tag matches optional lists if retain and aux in retain: continue if only and aux not in only: continue if (iter_tag is not None) and hasattr(ds.auxiliary_data[aux], iter_tag): # If the aux data doesn't contain this iter_tag, nothing to clean if (step_tag is not None) and ( hasattr(ds.auxiliary_data[aux][iter_tag], step_tag)): del ds.auxiliary_data[aux][iter_tag][step_tag] # If no 'step_tag' given, simply delete the iter_tag subgroup elif step_tag is None: del ds.auxiliary_data[aux][iter_tag] elif iter_tag is None: del ds.auxiliary_data[aux]
def write_adj_src_to_ascii(ds, iteration, step_count=None, pathout=None, comp_list="ZNE"): """ Take AdjointSource auxiliary data from a Pyasdf dataset and write out the adjoint sources into ascii files with proper formatting, for input into PyASDF. .. note:: Specfem dictates that if a station is given as an adjoint source, all components must be present, even if some components don't have any misfit windows. This function writes blank adjoint sources (an array of 0's) to satisfy this requirement. :type ds: pyasdf.ASDFDataSet :param ds: dataset containing adjoint sources :type iteration: str or int :param iteration: iteration number, e.g. "i00". Will be formatted so int ok. :type step_count: str or int :param step_count: step count e.g. "s00". Will be formatted so int ok. If NoneType, final step of the iteration will be chosen automatically. :type pathout: str :param pathout: path to write the adjoint sources to :type comp_list: str :param comp_list: component list to check when writing blank adjoint sources defaults to N, E, Z, but can also be e.g. R, T, Z """ def write_to_ascii(f_, array): """ Function used to write the ascii in the correct format. Columns are formatted like the ASCII outputs of Specfem, two columns times written as float, amplitudes written in E notation, 6 spaces between. :type f_: _io.TextIO :param f_: the open file to write to :type array: numpy.ndarray :param array: array of data from obspy stream """ for dt, amp in array: if dt == 0. and amp != 0.: dt = 0 adj_formatter = "{dt:>13d} {amp:13.6E}\n" elif dt != 0. and amp == 0.: amp = 0 adj_formatter = "{dt:13.6f} {amp:>13d}\n" else: adj_formatter = "{dt:13.6f} {amp:13.6E}\n" f_.write(adj_formatter.format(dt=dt, amp=amp)) # Shortcuts adjsrcs = ds.auxiliary_data.AdjointSources[format_iter(iteration)] if step_count is None: step_count = adjsrcs.list()[-1] adjsrcs = adjsrcs[format_step(step_count)] logger.debug(f"writing adjoint sources to ascii for " f"{format_iter(iteration)}{format_step(step_count)}") # Set the path to write the data to. # If no path is given, default to current working directory if pathout is None: pathout = os.path.join("./", format_event_name(ds)) if not os.path.exists(pathout): os.makedirs(pathout) # Loop through adjoint sources and write out ascii files # ASDF datasets use '_' as separators but Specfem wants '.' as separators already_written = [] for adj_src in adjsrcs.list(): station = adj_src.replace('_', '.') fid = os.path.join(pathout, f"{station}.adj") with open(fid, "w") as f: write_to_ascii(f, adjsrcs[adj_src].data[()]) # Write blank adjoint sources for components with no misfit windows for comp in list(comp_list): station_blank = (adj_src[:-1] + comp).replace('_', '.') if station_blank.replace('.', '_') not in adjsrcs.list() and \ station_blank not in already_written: # Use the same adjoint source, but set the data to zeros blank_adj_src = adjsrcs[adj_src].data[()] blank_adj_src[:, 1] = np.zeros(len(blank_adj_src[:, 1])) # Write out the blank adjoint source fid_blank = os.path.join(pathout, f"{station_blank}.adj") with open(fid_blank, "w") as b: write_to_ascii(b, blank_adj_src) # Append to a list to make sure we don't write doubles already_written.append(station_blank)
def step_tag(self): """string formatted version of step, e.g. 's00'""" if self.step_count is not None: return format_step(self.step_count) else: return None