Пример #1
0
 def _func(time, ind, df):
     """ return waveforms from df of bulk parameters """
     match_chars = {"*", "?", "[", "]"}
     t1, t2 = time[0], time[1]
     # filter index based on start/end times
     in_time = ~((ind["starttime"] > t2) | (ind["endtime"] < t1))
     ind = ind[in_time]
     # create indices used to load data
     ar = np.ones(len(ind))  # indices of ind to use to load data
     df = df[(df.t1 == time[0]) & (df.t2 == time[1])]
     # determine which columns use any matching or other select features
     uses_matches = [_column_contains(df[x], match_chars) for x in NSLC]
     match_ar = np.array(uses_matches).any(axis=0)
     df_match = df[match_ar]
     df_no_match = df[~match_ar]
     # handle columns that need matches (more expensive)
     if not df_match.empty:
         match_bulk = df_match.to_records(index=False)
         mar = np.array(
             [filter_index(ind,
                           *tuple(b)[:4]) for b in match_bulk])
         ar = np.logical_and(ar, mar.any(axis=0))
     # handle columns that do not need matches
     if not df_no_match.empty:
         nslc1 = set(get_seed_id_series(df_no_match))
         nslc2 = get_seed_id_series(ind)
         ar = np.logical_and(ar, nslc2.isin(nslc1))
     return self._index2stream(ind[ar], t1, t2)
Пример #2
0
 def _func(time, ind, df, st):
     """ return waveforms from df of bulk parameters """
     match_chars = {"*", "?", "[", "]"}
     ar = np.ones(len(ind))  # indices of ind to use to load data
     t1, t2 = time[0], time[1]
     df = df[(df.t1 == time[0]) & (df.t2 == time[1])]
     # determine which columns use any matching or other select features
     uses_matches = [_column_contains(df[x], match_chars) for x in NSLC]
     match_ar = np.array(uses_matches).any(axis=0)
     df_match = df[match_ar]
     df_no_match = df[~match_ar]
     # handle columns that need matches (more expensive)
     if not df_match.empty:
         match_bulk = df_match.to_records(index=False)
         mar = np.array(
             [filter_index(ind,
                           *tuple(b)[:4]) for b in match_bulk])
         ar = np.logical_and(ar, mar.any(axis=0))
     # handle columns that do not need matches
     if not df_no_match.empty:
         nslc1 = set(get_seed_id_series(df_no_match))
         nslc2 = get_seed_id_series(ind)
         ar = np.logical_and(ar, nslc2.isin(nslc1))
     # get a list of used traces, combine and trim
     st = obspy.Stream([x for x, y in zip(st, ar) if y])
     return st.slice(starttime=to_utc(t1), endtime=to_utc(t2))
Пример #3
0
def check_pick_order(event: Event):
    """
    Ensure:
        1. There are no S picks before P picks on any station
        2. There are no amplitude picks before P picks on any station
    """
    def pick_order(g, sp, ap):
        # get sub dfs with phases of interest
        p_picks = g[g["phase_hint"].str.upper() == "P"]
        s_picks = g[g["phase_hint"].str.upper() == "S"]
        amp_picks = g[g["phase_hint"].str.endswith("AML")]
        # there should be one P/S pick
        assert len(p_picks) <= 1 and len(s_picks) <= 1
        # first check that P is less than S, if not append to name of bad
        if len(p_picks) and len(s_picks):
            stime, ptime = s_picks.iloc[0]["time"], p_picks.iloc[0]["time"]
            if (stime < ptime) and not (pd.isnull(ptime) | pd.isnull(stime)):
                sp.append(g.name)
        # next check all amplitude picks are after P
        if len(p_picks) and len(amp_picks):
            ptime = p_picks.iloc[0]["time"]
            bad_amp_picks = amp_picks[amp_picks["time"] < ptime]
            ap.extend(list(bad_amp_picks["seed_id"]))

    # get dataframe of picks, filter out rejected
    pdf = obsplus.picks_to_df(event)
    pdf = pdf.loc[pdf.evaluation_status != "rejected"]
    # get series of network, station
    ns = get_seed_id_series(pdf, subset=NSLC[:3])
    # Checking that picks are in acceptable order
    gb, sp, ap = pdf.groupby(ns), [], []
    gb.apply(pick_order, sp, ap)
    assert len(sp) == 0, "S pick found before P pick:\n" f"station/s: {sp}"
    assert len(
        ap) == 0, "amplitude pick found before P pick:\n" f"seed_id/s: {ap}"
Пример #4
0
 def _index2stream(self,
                   index,
                   starttime=None,
                   endtime=None,
                   attach_response=False) -> Stream:
     """ return the waveforms in the index """
     # get abs path to each datafame
     files: pd.Series = (self.bank_path + index.path).unique()
     # make sure start and endtimes are in UTCDateTime
     starttime = to_utc(starttime) if starttime else None
     endtime = to_utc(endtime) if endtime else None
     # iterate the files to read and try to load into waveforms
     kwargs = dict(format=self.format, starttime=starttime, endtime=endtime)
     func = partial(_try_read_stream, **kwargs)
     stt = obspy.Stream()
     chunksize = len(files) / self._max_workers
     for st in self._map(func, files, chunksize=chunksize):
         if st is not None:
             stt += st
     # sort out nullish nslc codes
     stt = replace_null_nlsc_codes(stt)
     # filter out any traces not in index (this can happen when files hold
     # multiple traces).
     nslc = set(get_seed_id_series(index))
     stt.traces = [x for x in stt if x.id in nslc]
     # trim, merge, attach response
     stt = self._prep_output_stream(stt, starttime, endtime,
                                    attach_response)
     return stt
Пример #5
0
 def dup_picks(df, phase_hint, subset):
     """Function for checking for duplications."""
     seed_id = get_seed_id_series(df, subset=subset)
     bad = seed_id[seed_id.duplicated()].tolist()
     assert len(bad) == 0, (
         f"Duplicate {phase_hint} picks found\n" f"event_id: {event_id}, "
     )
Пример #6
0
def archive_to_sds(
    bank: Union[Path, str, "obsplus.WaveBank"],
    sds_path: Union[Path, str],
    starttime: Optional[UTCDateTime] = None,
    endtime: Optional[UTCDateTime] = None,
    overlap: float = 30,
    type_code: str = "D",
    stream_processor: Optional[callable] = None,
):
    """
    Create a seiscomp data structure archive from a waveform source.

    Parameters
    ----------
    bank
        A wavebank or path to such.
    sds_path
        The path for the new sds archive to be created.
    starttime
        If not None, the starttime to convert data from bank.
    endtime
        If not None, the endtime to convert data from bank.
    overlap
        The overlap to use for each file.
    type_code
        The str indicating the datatype.
    stream_processor
        A callable that will take a single stream as input and return a
        a single stream. May return and empty stream to skip a stream.

    Notes
    -----
    see: https://www.seiscomp3.org/doc/applications/slarchive/SDS.html
    """
    sds_path = Path(sds_path)
    # create a fetcher object for yielding continuous waveforms
    bank = obsplus.WaveBank(bank)
    bank.update_index()
    # get starttime/endtimes
    index = bank.read_index()
    ts1 = index.starttime.min() if not starttime else starttime
    t1 = _nearest_day(ts1)
    t2 = obspy.UTCDateTime(index.endtime.max() if not endtime else endtime)
    nslcs = get_seed_id_series(index).unique()
    # iterate over nslc and get data for selected channel
    for nslc in nslcs:
        nslc_dict = {n: v for n, v in zip(NSLC, nslc.split("."))}
        # yield waveforms in desired chunks
        ykwargs = dict(starttime=t1,
                       endtime=t2,
                       overlap=overlap,
                       duration=86400)
        ykwargs.update(nslc_dict)
        for st in bank.yield_waveforms(**ykwargs):
            if stream_processor:  # apply stream processor if needed.
                st = stream_processor(st)
            if st:
                path = _get_sds_filename(st, sds_path, type_code, **nslc_dict)
                st.write(str(path), "mseed")
Пример #7
0
def make_origins(
        events: catalog_or_event,
        inventory: obspy.Inventory,
        depth: float = 1.0,
        phase_hints: Optional[Iterable] = ("P", "p"),
) -> catalog_or_event:
    """
    Iterate a catalog or single events and ensure each has an origin.

    If no origins are found for an event, create one with the time set to
    the earliest pick and the location set to the location of the first hit
     station. Events are modified in place.

    This may be useful for location codes that need a starting location.

    Parameters
    ----------
    events
        The events to scan and add origins were necessary.
    inventory
        An inventory object which contains all the stations referenced in
        quakeml elements of events.
    depth
        The default depth for created origins. Should be in meters. See the
        obspy docs for Origin or the quakeml standard for more details.
    phase_hints
        List of acceptable phase hints to use for identifying the earliest
        pick. By default will only search for "P" or "p" phase hints.

    Returns
    -------
    Either a Catalog or Event object (same as input).
    """
    # ensure input is an iterable of events
    cat = [events] if isinstance(events, Event) else events
    # load inv dataframe and make sure it has a seed_id column
    df = obsplus.stations_to_df(inventory)
    nslc_series = get_seed_id_series(df)
    for event in cat:
        if not event.origins:  # make new origin
            picks = event.picks_to_df()
            picks = picks.loc[(~(picks.evaluation_status == "rejected"))
                              & (picks.phase_hint.isin(phase_hints))]
            if not len(picks):
                raise ValueError(
                    f"{event} has no acceptable picks to create origin")
            # get first pick, determine time/station used
            first_pick = picks.loc[picks.time == picks.time.min()].iloc[0]
            seed_id = first_pick.seed_id
            # find channel corresponding to pick
            df_chan = df[nslc_series == seed_id]
            if not len(df_chan):
                raise KeyError(f"{seed_id} not found in inventory")
            ser = df_chan.iloc[0]
            # create origin
            ori = _create_first_pick_origin(first_pick, ser, depth=depth)
            event.origins.append(ori)
    return events
Пример #8
0
    def test_response_one_missing(self, df_with_partial_responses):
        """Ensure responses which can be got are fetched."""
        df = df_with_partial_responses
        with suppress_warnings():
            inv = df_to_inventory(df)

        missing = df["sensor_keys"].isnull() | df["datalogger_keys"].isnull()
        missing_seed_ids = set(get_seed_id_series(df[missing]))
        assert self.has_valid_response(inv, missing_seed_ids)
Пример #9
0
    def test_get_stations_client(self, df_with_get_stations_kwargs):
        """Ensure get_station_kwargs results responses."""
        df = df_with_get_stations_kwargs
        col = "get_station_kwargs"
        missing = df[col].isnull() | (df[col] == "")
        missing_seed_ids = set(get_seed_id_series(df[missing]))

        with suppress_warnings():
            inv = df_to_inventory(df)

        assert self.has_valid_response(inv, missing_seed_ids)
Пример #10
0
 def test_seed_id(self, pick_df):
     """ ensure valid seed_ids were created. """
     # recreate seed_id and make sure columns are equal
     df = pick_df
     seed = get_seed_id_series(pick_df)
     assert (seed == df["seed_id"]).all()