def download_data(self, temp_dir_path, fetcher: Fetcher): """ download data from the kem fetcher into the tempdir, return path to tempdir """ path = os.path.join(temp_dir_path, self.path) params = dict(time_before_origin=0, time_after_origin=10, path=path) fetcher.download_event_waveforms(**params) return temp_dir_path
def download_data(self, temp_dir_path, kem_fetcher: Fetcher): """ download data from the kem fetcher into the tempdir, return path to tempdir """ path = Path(temp_dir_path) / self.path params = dict( starttime=self.t1, endtime=self.t2, duration=self.duration, overlap=self.overlap, path=path, ) kem_fetcher.download_waveforms(**params) return temp_dir_path
def wavefetch_no_inv(self, bingham_dataset): """ init wavefetcher with inv_df zeroed """ kwargs = dict( waveforms=bingham_dataset.waveform_client, events=bingham_dataset.event_client, ) return Fetcher(**kwargs)
def fetcher_duplicate_channels(self, bingham_dataset): """Create a fetcher with duplicate channels""" inv_df = obsplus.stations_to_df(bingham_dataset.station_client) cat = bingham_dataset.event_client wbank = bingham_dataset.waveform_client inv = self.split_inventory(inv_df, cat) return Fetcher(waveforms=wbank, events=cat, stations=inv)
def stream_list(self, continuous_fetcher: Fetcher): """ return a dict of the events contained in the waveforms """ utc1 = self.t1 utc2 = self.t2 input_dict = dict( starttime=utc1, endtime=utc2, duration=self.duration, overlap=self.overlap ) return list(continuous_fetcher.yield_waveforms(**input_dict))
def ta_fetcher_with_processor(ta_dataset): """The ta_test fetcher with a stream_processor""" fetcher = Fetcher( waveforms=ta_dataset.waveform_client.get_waveforms(), events=ta_dataset.event_client.get_events(), stations=ta_dataset.station_client.get_stations(), stream_processor=processor, ) return fetcher
def subbing_fetcher_with_processor(bingham_dataset): """A fetcher with a stream_processor, only use last event of bingham_test.""" dataset = bingham_dataset events = dataset.event_client.get_events()[-1] fetcher = Fetcher( waveforms=dataset.waveform_client.get_waveforms(), events=events, stations=dataset.station_client.get_stations(), stream_processor=processor, ) return fetcher
def test_init_with_banks(self, bingham_dataset): """ Ensure the fetcher can be init'ed with all bank inputs. """ wbank = obsplus.WaveBank(bingham_dataset.waveform_path) ebank = obsplus.EventBank(bingham_dataset.event_path) wbank.update_index(), ebank.update_index() sbank = bingham_dataset.station_client fetcher = Fetcher(waveforms=wbank, events=ebank, stations=sbank) edf = fetcher.event_df sdf = fetcher.station_df for df in [edf, sdf]: assert isinstance(df, pd.DataFrame) assert not df.empty
def trim_kem_events(fetcher: Fetcher): """ trim the events in the kemmerer data set to only includes times when continuous data is available. """ t1 = obspy.UTCDateTime("2009-04-01").timestamp t2 = obspy.UTCDateTime("2009-04-04").timestamp cat = fetcher.event_client.get_events(starttime=t1, endtime=t2) return Fetcher( waveforms=fetcher.waveform_client, events=cat, stations=fetcher.station_client, stream_processor=fetcher.stream_processor, )
def test_init_with_banks(self, bingham_dataset): """Ensure the fetcher can be init'ed with all bank inputs.""" wbank = obsplus.WaveBank(bingham_dataset.waveform_path).update_index() ebank = obsplus.EventBank(bingham_dataset.event_path).update_index() sbank = bingham_dataset.station_client # ignore warnings (endtimes of inv are out of range) with suppress_warnings(): fetcher = Fetcher(waveforms=wbank, events=ebank, stations=sbank) edf = fetcher.event_df sdf = fetcher.station_df for df in [edf, sdf]: assert isinstance(df, pd.DataFrame) assert not df.empty
def fetcher_one_event(self, bingham_dataset, tmp_path): """Make a fetcher with only one event.""" fetcher = bingham_dataset.get_fetcher() inv = bingham_dataset.station_client.get_stations() # get stream and event kwargs = dict(time_before=1, time_after=1) for eid, st in fetcher.yield_event_waveforms(**kwargs): break eve = fetcher.event_client.get_events(eventid=eid) # create a new bank and return new fetcher wbank_path = tmp_path / "waveforms" wbank_path.mkdir(exist_ok=True, parents=True) wbank = obsplus.WaveBank(wbank_path) wbank.put_waveforms(st, update_index=True) wbank.read_index() # need to cache index return Fetcher(events=eve, stations=inv, waveforms=wbank)
def kem_fetcher_limited(): """ init a fetcher with a subset of the events """ kemmerer_dataset = obsplus.load_dataset("kemmerer") # load in a subset of the full events dataframe event_ids = { "smi:local/042f78e9-6089-4ed8-8f9b-47c2189a1c75", "smi:local/16459ce7-69ff-4fe0-8639-a980b47498bb", } df = pd.read_csv(kemmerer_dataset.data_path / "catalog.csv") df = df[df.event_id.isin(event_ids)] wf = Fetcher( waveforms=kemmerer_dataset.waveform_client, events=df, stations=kemmerer_dataset.station_client, ) return wf
def kem_fetcher_with_processor(): """ same as kem fetcher but with a stream_processor """ dataset = obsplus.load_dataset("kemmerer") def processor(st): """ simple processor to apply bandpass filter """ st.filter("bandpass", freqmin=1, freqmax=10) # mark that the processor ran for tr in st: tr.stats["processor_ran"] = True return st wf = Fetcher( waveforms=dataset.waveform_client.get_waveforms(), events=dataset.event_client.get_events(), stations=dataset.station_client.get_stations(), stream_processor=processor, ) return trim_kem_events(wf)
def bulk_arg_later_time(self, altered_inv): """Return bulk args for latter time test.""" fetcher = Fetcher(None, stations=altered_inv) return fetcher._get_bulk_args(starttime=self.t1 + 10, endtime=self.t2)
def copied_fetcher(self, wavefetcher): """ init a wavefetcher from a wavefetcher, return tuple of both """ return (wavefetcher, Fetcher(wavefetcher))
def fetcher(self, altered_inv, kem_fetcher): """ return a fetcher with the modified times """ return Fetcher(kem_fetcher.waveform_client, stations=altered_inv)
def bulk_arg_none_end_date(self, inv_with_none): """ return the bulk args from an inv with None endate """ fetcher = Fetcher(None, stations=inv_with_none) return fetcher._get_bulk_arg(starttime=self.t0, endtime=self.t1)
def bulk_arg_later_time(self, altered_inv): fetcher = Fetcher(None, stations=altered_inv) return fetcher._get_bulk_arg(starttime=self.t1 + 10, endtime=self.t2)
def wavefetcher_no_bulk(self, kem_bank_no_bulk, kemmerer_dataset): """ return a wavefetcher from the bank """ inv = kemmerer_dataset.station_client.get_stations() return Fetcher(waveforms=kem_bank_no_bulk, stations=inv)