Пример #1
0
def test_form_utils():
    """
    Test the string formatting utilities which ensures that IO is standardized
    throughout the package. Again, place them all in the same function.
    """
    # Test format_iter
    for check in ["i09", 9, "9"]:
        assert(form.format_iter(check) == "i09")

    assert(form.format_iter(9.) is None)

    # Test format step
    for check in ["s09", 9, "9"]:
        assert(form.format_step(check) == "s09")

    assert(form.format_step(9.) is None)

    # Test format event name with various test cases
    eid = "abc123"  # test event id
    test_cases = [
        "smi:nz.org.geonet/{eid}",  # GEONET
        "smi:service.iris.edu/fdsnws/event/1/query?eventid={eid}",  # IRIS
        "smi:local/ndk/{eid}/event",  # SPUD
        "smi:local/cmtsolution/{eid}/event",  # GCMT
        "quakeml:earthquake.usgs.gov/fdsnws/event/1/query?eventid={eid}"
                                                    "&format=quakeml",  # USGS
        "quakeml:us.anss.org/event/{eid}",  # USGS COMCAT,
        "pyatoa:source/{eid}"
                  ]
    for test_case in test_cases:
        test_case = test_case.format(eid=eid)
        assert(form.format_event_name(test_case) == eid)
Пример #2
0
def del_synthetic_waveforms(ds, iteration=None, step_count=None):
    """
    Remove "synthetic_{iter_tag}{step_tag}" tagged waveforms from an asdf 
    dataset. If no iter_tag number given, wipes all synthetic data from dataset.   
 
    :type ds: pyasdf.ASDFDataSet
    :param ds: dataset to be cleaned
    :param iteration: iteration number, e.g. "i01". Will be formatted so int ok.
    :type step_count: str or int
    :param step_count: step count e.g. "s00". Will be formatted so int ok.
    """
    iter_tag = format_iter(iteration)
    step_tag = format_step(step_count)

    for sta in ds.waveforms.list():
        for stream in ds.waveforms[sta].list():
            # stream is e.g. 'synthetic_i00s00'
            if "synthetic" in stream:
                if (iter_tag is not None) and iter_tag in stream:
                    if (step_tag is not None) and step_tag in stream:
                        del ds.waveforms[sta][stream]
                    elif step_tag is None:
                        del ds.waveforms[sta][stream]
                elif iter_tag is None:
                    del ds.waveforms[sta][stream]
Пример #3
0
def load_windows(ds, net, sta, iteration, step_count, return_previous=False):
    """
    Returns misfit windows from an ASDFDataSet for a given iteration, step,
    network and station, as well as a count of windows returned.

    If given iteration and step are not present in dataset (e.g. during line 
    search, new step), will try to search the previous step, which may or 
    may not be contained in the previous iteration. 

    Returns windows as Pyflex Window objects which can be used in Pyadjoint or
    in the Pyatoa workflow.

    .. note::
        Expects that windows are saved into the dataset at each iteration and 
        step such that there is a coherent structure within the dataset

    :type ds: pyasdf.ASDFDataSet
    :param ds: ASDF dataset containing MisfitWindows subgroup
    :type net: str
    :param net: network code used to find the name of the misfit window
    :type sta: str
    :param sta: station code used to find the name of the misfit window
    :type iteration: int or str
    :param iteration: current iteration, will be formatted by the function
    :type step_count: int or str
    :param step_count: step count, will be formatted by the function
    :type return_previous: bool
    :param return_previous: search the dataset for available windows
        from the previous iteration/step given the current iteration/step
    :rtype window_dict: dict
    :return window_dict: dictionary containing misfit windows, in a format
        expected by Pyatoa Manager class
    """
    # Ensure the tags are properly formatted
    iteration = format_iter(iteration)
    step_count = format_step(step_count)
    windows = ds.auxiliary_data.MisfitWindows

    window_dict = {}
    if return_previous:
        # Retrieve windows from previous iter/step
        prev_windows = previous_windows(windows=windows,
                                        iteration=iteration,
                                        step_count=step_count)
        window_dict = dataset_windows_to_pyflex_windows(windows=prev_windows,
                                                        network=net,
                                                        station=sta)
    else:
        if hasattr(windows, iteration) and \
                            hasattr(windows[iteration], step_count):
            # Attempt to retrieve windows from the given iter/step
            logger.debug(f"searching for windows in {iteration}{step_count}")
            window_dict = dataset_windows_to_pyflex_windows(
                windows=windows[iteration][step_count],
                network=net,
                station=sta)

    return window_dict
Пример #4
0
def previous_windows(windows, iteration, step_count):
    """
    Given an iteration and step count, find windows from the previous step
    count. If none are found for the given iteration, return the most recently
    available windows.

    .. note:: 
        Assumes that windows are saved at each iteration.

    :type windows: pyasdf.utils.AuxiliaryDataAccessor
    :param windows: ds.auxiliary_data.MisfitWindows[iter][step]
    :type iteration: int or str
    :param iteration: the current iteration
    :type step_count: int or str
    :param step_count: the current step count
    :rtype: pyasdf.utils.AuxiliaryDataAccessor
    :return: ds.auxiliary_data.MisfitWindows
    """
    # Ensure we're working with integer values for indexing, e.g. 's00' -> 0
    if isinstance(iteration, str):
        iteration = int(iteration[1:])
    if isinstance(step_count, str):
        step_count = int(step_count[1:])

    # Get a flattened list of iters and steps as unique tuples of integers
    iters = []
    steps = {i: windows[i].list() for i in windows.list()}
    for i, s in steps.items():
        for s_ in s:
            iters.append((int(i[1:]), int(s_[1:])))

    current = (iteration, step_count)
    if current in iters:
        # If windows have already been added to the auxiliary data
        prev_iter, prev_step = iters[iters.index(current) - 1]
    else:
        # Wind back the step to see if there are any windows for this iteration
        while step_count >= 0:
            if (iteration, step_count) in iters:
                prev_iter, prev_step = iteration, step_count
                break
            step_count -= 1
        else:
            # If nothing is found return the most recent windows available
            prev_iter, prev_step = iters[-1]

    # Format back into strings for accessing auxiliary data
    prev_iter = format_iter(prev_iter)
    prev_step = format_step(prev_step)

    logger.debug(f"most recent windows: {prev_iter}{prev_step}")

    return windows[prev_iter][prev_step]
Пример #5
0
def write_stations_adjoint(ds,
                           iteration,
                           specfem_station_file,
                           step_count=None,
                           pathout=None):
    """
    Generate the STATIONS_ADJOINT file for Specfem input by reading in the
    STATIONS file and cross-checking which adjoint sources are available in the
    Pyasdf dataset.
    
    :type ds: pyasdf.ASDFDataSet
    :param ds: dataset containing AdjointSources auxiliary data
    :type iteration: str or int
    :param iteration: iteration number, e.g. "i01". Will be formatted so int ok.
    :type step_count: str or int
    :param step_count: step count e.g. "s00". Will be formatted so int ok.
        If NoneType, final step of the iteration will be chosen automatically.
    :type specfem_station_file: str
    :param specfem_station_file: path/to/specfem/DATA/STATIONS
    :type pathout: str
    :param pathout: path to save file 'STATIONS_ADJOINT'
    """
    # Check which stations have adjoint sources
    stas_with_adjsrcs = []
    adj_srcs = ds.auxiliary_data.AdjointSources[format_iter(iteration)]
    # Dynamically determine final step count in the iteration
    if step_count is None:
        step_count = adj_srcs.list()[-1]
    logger.debug(f"writing stations adjoint for "
                 f"{format_iter(iteration)}{format_step(step_count)}")
    adj_srcs = adj_srcs[format_step(step_count)]

    for code in adj_srcs.list():
        stas_with_adjsrcs.append(code.split('_')[1])
    stas_with_adjsrcs = set(stas_with_adjsrcs)

    # Figure out which stations were simulated
    with open(specfem_station_file, "r") as f:
        lines = f.readlines()

    # If no output path is specified, save into current working directory with
    # an event_id tag to avoid confusion with other files, else normal naming
    if pathout is None:
        write_out = f"./STATIONS_ADJOINT_{format_event_name(ds)}"
    else:
        write_out = os.path.join(pathout, "STATIONS_ADJOINT")

    # Rewrite the Station file but only with stations that contain adjoint srcs
    with open(write_out, "w") as f:
        for line in lines:
            if line.split()[0] in stas_with_adjsrcs:
                f.write(line)
Пример #6
0
def write_misfit(ds, iteration, step_count=None, path="./", fidout=None):
    """
    This function writes a text file containing event misfit.
    This misfit value corresponds to F_S^T of Eq 6. Tape et al. (2010)

    e.g. path/to/misfits/{iteration}/{event_id}
    
    These files will then need to be read by: seisflows.workflow.write_misfit()

    :type ds: pyasdf.ASDFDataSet
    :param ds: processed dataset, assumed to contain auxiliary_data.Statistics
    :type iteration: str or int
    :param iteration: iteration number, e.g. "i01". Will be formatted so int ok.
    :type step_count: str or int
    :param step_count: step count e.g. "s00". Will be formatted so int ok.
    :type path: str
    :param path: output path to write the misfit. fid will be the event name
    :type fidout: str
    :param fidout: allow user defined filename, otherwise default to name of ds
        note: if given, var 'pathout' is not used, this must be a full path
    """
    iter_tag = format_iter(iteration)
    step_tag = format_step(step_count)

    # By default, name the file after the event id
    if fidout is None:
        fidout = os.path.join(path, format_event_name(ds))

    # Collect the total misfit calculated by Pyadjoint
    total_misfit = 0
    adjoint_sources = ds.auxiliary_data.AdjointSources[iter_tag]
    if step_tag:
        adjoint_sources = adjoint_sources[step_tag]

    for adjsrc in adjoint_sources.list():
        total_misfit += adjoint_sources[adjsrc].parameters["misfit"]

    # Count up the number of misfit windows
    win = ds.auxiliary_data.MisfitWindows[iter_tag]
    if step_tag:
        win = win[step_tag]
    number_windows = len(win)

    scaled_misfit = 0.5 * total_misfit / number_windows

    # save in the same format as seisflows
    np.savetxt(fidout, [scaled_misfit], '%11.6e')

    return scaled_misfit
Пример #7
0
def load_adjsrcs(ds, net, sta, iteration, step_count):
    """
    Load adjoint sources from a pyasdf ASDFDataSet and return in the format
    expected by the Manager class, that is a dictionary of adjoint sources

    :type ds: pyasdf.ASDFDataSet
    :param ds: ASDF dataset containing MisfitWindows subgroup
    :type net: str
    :param net: network code used to find the name of the adjoint source
    :type sta: str
    :param sta: station code used to find the name of the adjoint source
    :type iteration: int or str
    :param iteration: current iteration, will be formatted by the function
    :type step_count: int or str
    :param step_count: step count, will be formatted by the function
    :rtype: dict
    :return: dictionary containing adjoint sources, in a format expected by
        Pyatoa Manager class
    """
    # Ensure the tags are properly formatted before using them for access
    iteration = format_iter(iteration)
    step_count = format_step(step_count)
    adjsrcs = ds.auxiliary_data.AdjointSources[iteration][step_count]

    adjsrc_dict = {}
    # Use fnmatch filter to find all adjoint sources that match net/sta code
    for adjsrc_tag in fnf(adjsrcs.list(), f"{net}_{sta}_*"):
        component = adjsrc_tag[-1].upper()  # e.g. 'Z'

        # Build the adjoint source based on the parameters that were parsed in
        parameters = adjsrcs[adjsrc_tag].parameters
        assert (component == parameters["component"][-1]), (
            "AdjointSource tag does not match the component listed in the "
            "parameter dictionary when it should.")

        # Adjoint sources are time-reversed when saved into the dataset, so
        # reverse them back when returning to Manager. Also remove time axis.
        parameters["adjoint_source"] = adjsrcs[adjsrc_tag].data[()][:, 1][::-1]

        # Convert back from str to UTCDateTime object
        parameters["starttime"] = UTCDateTime(parameters["starttime"])

        # The parameter dicionary will have all the keywords necessary
        adjsrc_dict[component] = AdjointSource(**parameters)

    return adjsrc_dict
Пример #8
0
def del_auxiliary_data(ds, iteration=None, step_count=None, retain=None,
                       only=None):
    """
    Delete all items in auxiliary data for a given iter_tag, if iter_tag not
    given, wipes all auxiliary data.

    :type ds: pyasdf.ASDFDataSet
    :param ds: dataset to be cleaned
    :param iteration: iteration number, e.g. "i01". Will be formatted so int ok.
    :type step_count: str or int
    :param step_count: step count e.g. "s00". Will be formatted so int ok.
    :type retain: list of str
    :param retain: list of auxiliary data tags to retain, that is: delete all 
        auxiliary data EXCEPT FOR the names given in this variable
    :type only: list of str
    :param only: list of auxiliary data tags to remove, that is: ONLY delete 
        auxiliary data that matches the names given in this variable. 
        Lower in priority than 'retain'
    """
    iter_tag = format_iter(iteration)
    step_tag = format_step(step_count)
    
    for aux in ds.auxiliary_data.list():
        # Check if auxiliary data tag matches optional lists
        if retain and aux in retain:
            continue
        if only and aux not in only:
            continue

        if (iter_tag is not None) and hasattr(ds.auxiliary_data[aux], iter_tag):
            # If the aux data doesn't contain this iter_tag, nothing to clean
            if (step_tag is not None) and (
                        hasattr(ds.auxiliary_data[aux][iter_tag], step_tag)):
                del ds.auxiliary_data[aux][iter_tag][step_tag]
            # If no 'step_tag' given, simply delete the iter_tag subgroup 
            elif step_tag is None:
                del ds.auxiliary_data[aux][iter_tag]
        elif iter_tag is None:
            del ds.auxiliary_data[aux]
Пример #9
0
def write_adj_src_to_ascii(ds,
                           iteration,
                           step_count=None,
                           pathout=None,
                           comp_list="ZNE"):
    """
    Take AdjointSource auxiliary data from a Pyasdf dataset and write out
    the adjoint sources into ascii files with proper formatting, for input
    into PyASDF.

    .. note::
        Specfem dictates that if a station is given as an adjoint source,
        all components must be present, even if some components don't have
        any misfit windows. This function writes blank adjoint sources
        (an array of 0's) to satisfy this requirement.

    :type ds: pyasdf.ASDFDataSet
    :param ds: dataset containing adjoint sources
    :type iteration: str or int
    :param iteration: iteration number, e.g. "i00". Will be formatted so int ok.
    :type step_count: str or int
    :param step_count: step count e.g. "s00". Will be formatted so int ok.
            If NoneType, final step of the iteration will be chosen automatically.
    :type pathout: str
    :param pathout: path to write the adjoint sources to
    :type comp_list: str
    :param comp_list: component list to check when writing blank adjoint sources
        defaults to N, E, Z, but can also be e.g. R, T, Z
    """
    def write_to_ascii(f_, array):
        """
        Function used to write the ascii in the correct format.
        Columns are formatted like the ASCII outputs of Specfem, two columns
        times written as float, amplitudes written in E notation, 6 spaces
        between.

        :type f_: _io.TextIO
        :param f_: the open file to write to
        :type array: numpy.ndarray
        :param array: array of data from obspy stream
        """
        for dt, amp in array:
            if dt == 0. and amp != 0.:
                dt = 0
                adj_formatter = "{dt:>13d}      {amp:13.6E}\n"
            elif dt != 0. and amp == 0.:
                amp = 0
                adj_formatter = "{dt:13.6f}      {amp:>13d}\n"
            else:
                adj_formatter = "{dt:13.6f}      {amp:13.6E}\n"

            f_.write(adj_formatter.format(dt=dt, amp=amp))

    # Shortcuts
    adjsrcs = ds.auxiliary_data.AdjointSources[format_iter(iteration)]
    if step_count is None:
        step_count = adjsrcs.list()[-1]
    adjsrcs = adjsrcs[format_step(step_count)]
    logger.debug(f"writing adjoint sources to ascii for "
                 f"{format_iter(iteration)}{format_step(step_count)}")

    # Set the path to write the data to.
    # If no path is given, default to current working directory
    if pathout is None:
        pathout = os.path.join("./", format_event_name(ds))
    if not os.path.exists(pathout):
        os.makedirs(pathout)

    # Loop through adjoint sources and write out ascii files
    # ASDF datasets use '_' as separators but Specfem wants '.' as separators
    already_written = []
    for adj_src in adjsrcs.list():
        station = adj_src.replace('_', '.')
        fid = os.path.join(pathout, f"{station}.adj")
        with open(fid, "w") as f:
            write_to_ascii(f, adjsrcs[adj_src].data[()])

        # Write blank adjoint sources for components with no misfit windows
        for comp in list(comp_list):
            station_blank = (adj_src[:-1] + comp).replace('_', '.')
            if station_blank.replace('.', '_') not in adjsrcs.list() and \
                    station_blank not in already_written:
                # Use the same adjoint source, but set the data to zeros
                blank_adj_src = adjsrcs[adj_src].data[()]
                blank_adj_src[:, 1] = np.zeros(len(blank_adj_src[:, 1]))

                # Write out the blank adjoint source
                fid_blank = os.path.join(pathout, f"{station_blank}.adj")
                with open(fid_blank, "w") as b:
                    write_to_ascii(b, blank_adj_src)

                # Append to a list to make sure we don't write doubles
                already_written.append(station_blank)
Пример #10
0
 def step_tag(self):
     """string formatted version of step, e.g. 's00'"""
     if self.step_count is not None:
         return format_step(self.step_count)
     else:
         return None