def test_process_streams(): # Loma Prieta test station (nc216859) data_files, origin = read_data_dir('geonet', 'us1000778i', '*.V1A') streams = [] for f in data_files: streams += read_data(f) sc = StreamCollection(streams) sc.describe() config = update_config(os.path.join(datadir, 'config_min_freq_0p2.yml')) test = process_streams(sc, origin, config=config) logging.info('Testing trace: %s' % test[0][1]) assert len(test) == 3 assert len(test[0]) == 3 assert len(test[1]) == 3 assert len(test[2]) == 3 # Apparently the traces end up in a different order on the Travis linux # container than on my local mac. So testing individual traces need to # not care about trace order. trace_maxes = np.sort( [np.max(np.abs(t.data)) for t in test.select(station='HSES')[0]]) np.testing.assert_allclose(trace_maxes, np.array( [157.81975508, 240.33718094, 263.67804256]), rtol=1e-5)
def test(): # Test for channel grouping with three unique channels streams = [] # datadir = os.path.join(homedir, '..', 'data', 'knet', 'us2000cnnl') datafiles, origin = read_data_dir('knet', 'us2000cnnl', 'AOM0031801241951*') for datafile in datafiles: streams += read_knet(datafile) grouped_streams = StreamCollection(streams) assert len(grouped_streams) == 1 assert grouped_streams[0].count() == 3 # Test for channel grouping with more file types datafiles, origin = read_data_dir('geonet', 'us1000778i', '20161113_110313_THZ_20.V2A') datafile = datafiles[0] streams += read_geonet(datafile) grouped_streams = StreamCollection(streams) assert len(grouped_streams) == 2 assert grouped_streams[0].count() == 3 assert grouped_streams[1].count() == 3 # Test for warning for one channel streams datafiles, origin = read_data_dir('knet', 'us2000cnnl', 'AOM0071801241951.UD') datafile = datafiles[0] streams += read_knet(datafile) grouped_streams = StreamCollection(streams) # assert "One channel stream:" in logstream.getvalue() assert len(grouped_streams) == 3 assert grouped_streams[0].count() == 3 assert grouped_streams[1].count() == 3 assert grouped_streams[2].count() == 1
def test_process_streams(): # Loma Prieta test station (nc216859) data_files, origin = read_data_dir('geonet', 'us1000778i', '*.V1A') streams = [] for f in data_files: streams += read_data(f) sc = StreamCollection(streams) sc.describe() test = process_streams(sc, origin) logging.info('Testing trace: %s' % test[0][1]) assert len(test) == 3 assert len(test[0]) == 3 assert len(test[1]) == 3 assert len(test[2]) == 3 # Apparently the traces end up in a different order on the Travis linux # container than on my local mac. So testing individual traces need to # not care about trace order. trace_maxes = np.sort([np.max(np.abs(t.data)) for t in test[0]]) np.testing.assert_allclose( trace_maxes, np.array([157.81975508, 240.33718094, 263.67804256]), rtol=1e-5 )
def test_get_travel_time_df(): datapath = os.path.join('data', 'testdata', 'travel_times') datadir = pkg_resources.resource_filename('gmprocess', datapath) sc1 = StreamCollection.from_directory(os.path.join(datadir, 'ci37218996')) sc2 = StreamCollection.from_directory(os.path.join(datadir, 'ci38461735')) scs = [sc1, sc2] df1, catalog = create_travel_time_dataframe( sc1, os.path.join(datadir, 'catalog_test_traveltimes.csv'), 5, 0.1, 'iasp91') df2, catalog = create_travel_time_dataframe( sc2, os.path.join(datadir, 'catalog_test_traveltimes.csv'), 5, 0.1, 'iasp91') model = TauPyModel('iasp91') for dfidx, df in enumerate([df1, df2]): for staidx, sta in enumerate(df): for eqidx, time in enumerate(df[sta]): sta_coords = scs[dfidx][staidx][0].stats.coordinates event = catalog[eqidx] dist = locations2degrees(sta_coords['latitude'], sta_coords['longitude'], event.latitude, event.longitude) if event.depth_km < 0: depth = 0 else: depth = event.depth_km travel_time = model.get_travel_times(depth, dist, ['p', 'P', 'Pn'])[0].time abs_time = event.time + travel_time np.testing.assert_almost_equal(abs_time, time, decimal=1)
def test_colocated(): datapath = os.path.join('data', 'testdata', 'colocated_instruments') datadir = pkg_resources.resource_filename('gmprocess', datapath) sc = StreamCollection.from_directory(datadir) sc.select_colocated() assert sc.n_passed == 7 assert sc.n_failed == 4 # What if no preference is matched? sc = StreamCollection.from_directory(datadir) sc.select_colocated(preference=["XX"]) assert sc.n_passed == 3 assert sc.n_failed == 8
def test_metrics2(): eventid = 'usb000syza' datafiles, event = read_data_dir('knet', eventid, '*') datadir = os.path.split(datafiles[0])[0] raw_streams = StreamCollection.from_directory(datadir) config = update_config(os.path.join(datadir, 'config_min_freq_0p2.yml')) config['metrics']['output_imts'].append('Arias') config['metrics']['output_imcs'].append('arithmetic_mean') # turn off sta/lta check and snr checks newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr']) processed_streams = process_streams(raw_streams, event, config=newconfig) tdir = tempfile.mkdtemp() try: tfile = os.path.join(tdir, 'test.hdf') workspace = StreamWorkspace(tfile) workspace.addEvent(event) workspace.addStreams(event, processed_streams, label='processed') workspace.calcMetrics(event.id, labels=['processed']) etable, imc_tables1, readmes1 = workspace.getTables('processed') assert 'ARITHMETIC_MEAN' not in imc_tables1 assert 'ARITHMETIC_MEAN' not in readmes1 del workspace.dataset.auxiliary_data.WaveFormMetrics del workspace.dataset.auxiliary_data.StationMetrics workspace.calcMetrics(event.id, labels=['processed'], config=config) etable2, imc_tables2, readmes2 = workspace.getTables('processed') assert 'ARITHMETIC_MEAN' in imc_tables2 assert 'ARITHMETIC_MEAN' in readmes2 assert 'ARIAS' in imc_tables2['ARITHMETIC_MEAN'] testarray = readmes2['ARITHMETIC_MEAN']['Column header'].to_numpy() assert 'ARIAS' in testarray except Exception as e: raise (e) finally: shutil.rmtree(tdir)
def test_signal_split2(): datafiles, origin = read_data_dir('knet', 'us2000cnnl', 'AOM0011801241951*') streams = [] for datafile in datafiles: streams += read_data(datafile) streams = StreamCollection(streams) stream = streams[0] signal_split(stream, origin) cmpdict = { 'split_time': UTCDateTime(2018, 1, 24, 10, 51, 39, 841483), 'method': 'p_arrival', 'picker_type': 'travel_time' } pdict = stream[0].getParameter('signal_split') for key, value in cmpdict.items(): v1 = pdict[key] # because I can't figure out how to get utcdattime __eq__ # operator to behave as expected with the currently installed # version of obspy, we're going to pedantically compare two # of these objects... if isinstance(value, UTCDateTime): #value.__precision = 4 #v1.__precision = 4 assert value.year == v1.year assert value.month == v1.month assert value.day == v1.day assert value.hour == v1.hour assert value.minute == v1.minute assert value.second == v1.second else: assert v1 == value
def test_fit_spectra(): config = get_config() datapath = os.path.join('data', 'testdata', 'demo', 'ci38457511', 'raw') datadir = pkg_resources.resource_filename('gmprocess', datapath) event = get_event_object('ci38457511') sc = StreamCollection.from_directory(datadir) for st in sc: st = signal_split(st, event) end_conf = config['windows']['signal_end'] st = signal_end(st, event_time=event.time, event_lon=event.longitude, event_lat=event.latitude, event_mag=event.magnitude, **end_conf) st = compute_snr(st, 30) st = get_corner_frequencies(st, method='constant', constant={ 'highpass': 0.08, 'lowpass': 20.0 }) for st in sc: spectrum.fit_spectra(st, event)
def test_colocated(): eventid = 'ci38445975' datafiles, event = read_data_dir('fdsn', eventid, '*') datadir = os.path.split(datafiles[0])[0] raw_streams = StreamCollection.from_directory(datadir) config_file = os.path.join(datadir, 'test_config.yml') with open(config_file, 'r') as f: config = yaml.load(f, Loader=yaml.FullLoader) processed_streams = process_streams(raw_streams, event, config=config) tdir = tempfile.mkdtemp() try: tfile = os.path.join(tdir, 'test.hdf') ws = StreamWorkspace(tfile) ws.addEvent(event) ws.addStreams(event, raw_streams, label='raw') ws.addStreams(event, processed_streams, label='processed') ws.calcMetrics(eventid, labels=['processed'], config=config) stasum = ws.getStreamMetrics(eventid, 'CI', 'MIKB', 'processed') np.testing.assert_allclose( stasum.get_pgm('duration', 'geometric_mean'), 38.94480068) ws.close() except Exception as e: raise (e) finally: shutil.rmtree(tdir)
def test(): datafiles, origin = read_data_dir('fdsn', 'nc72282711', 'BK.CMB*.mseed') streams = [] for datafile in datafiles: streams += read_fdsn(datafile) assert streams[0].get_id() == 'BK.CMB.HN' datafiles, origin = read_data_dir('fdsn', 'nc72282711', 'TA.M04C*.mseed') streams = [] for datafile in datafiles: streams += read_fdsn(datafile) assert streams[0].get_id() == 'TA.M04C.HN' # test assignment of Z channel datafiles, origin = read_data_dir('fdsn', 'nc73300395', 'BK.VALB*.mseed') streams = [] for datafile in datafiles: streams += read_fdsn(datafile) # get all channel names channels = sorted([st[0].stats.channel for st in streams]) assert channels == ['HN2', 'HN3', 'HNZ'] # DEBUGGING sc = StreamCollection(streams) psc = process_streams(sc, origin)
def directory_to_dataframe(directory, imcs=None, imts=None, origin=None, process=True): """Extract peak ground motions from list of Stream objects. Note: The PGM columns underneath each channel will be variable depending on the units of the Stream being passed in (velocity sensors can only generate PGV) and on the imtlist passed in by user. Spectral acceleration columns will be formatted as SA(0.3) for 0.3 second spectral acceleration, for example. Args: directory (str): Directory of ground motion files (streams). imcs (list): Strings designating desired components to create in table. imts (list): Strings designating desired PGMs to create in table. origin (obspy.core.event.Origin): Defines the focal time and geographical location of an earthquake hypocenter. Default is None. process (bool): Process the stream using the config file. Returns: DataFrame: Pandas dataframe containing columns: - STATION Station code. - NAME Text description of station. - LOCATION Two character location code. - SOURCE Long form string containing source network. - NETWORK Short network code. - LAT Station latitude - LON Station longitude - DISTANCE Epicentral distance (km) (if epicentral lat/lon provided) - HN1 East-west channel (or H1) (multi-index with pgm columns): - PGA Peak ground acceleration (%g). - PGV Peak ground velocity (cm/s). - SA(0.3) Pseudo-spectral acceleration at 0.3 seconds (%g). - SA(1.0) Pseudo-spectral acceleration at 1.0 seconds (%g). - SA(3.0) Pseudo-spectral acceleration at 3.0 seconds (%g). - HN2 North-south channel (or H2) (multi-index with pgm columns): - PGA Peak ground acceleration (%g). - PGV Peak ground velocity (cm/s). - SA(0.3) Pseudo-spectral acceleration at 0.3 seconds (%g). - SA(1.0) Pseudo-spectral acceleration at 1.0 seconds (%g). - SA(3.0) Pseudo-spectral acceleration at 3.0 seconds (%g). - HNZ Vertical channel (or HZ) (multi-index with pgm columns): - PGA Peak ground acceleration (%g). - PGV Peak ground velocity (cm/s). - SA(0.3) Pseudo-spectral acceleration at 0.3 seconds (%g). - SA(1.0) Pseudo-spectral acceleration at 1.0 seconds (%g). - SA(3.0) Pseudo-spectral acceleration at 3.0 seconds (%g). - GREATER_OF_TWO_HORIZONTALS (multi-index with pgm columns): - PGA Peak ground acceleration (%g). - PGV Peak ground velocity (cm/s). - SA(0.3) Pseudo-spectral acceleration at 0.3 seconds (%g). - SA(1.0) Pseudo-spectral acceleration at 1.0 seconds (%g). - SA(3.0) Pseudo-spectral acceleration at 3.0 seconds (%g). """ streams = [] for filepath in glob.glob(os.path.join(directory, "*")): streams += read_data(filepath) grouped_streams = StreamCollection(streams) dataframe = streams_to_dataframe( grouped_streams, imcs=imcs, imts=imts, origin=origin) return dataframe
def test_nnet(): conf = get_config() update = { 'processing': [ {'detrend': {'detrending_method': 'demean'}}, # {'check_zero_crossings': {'min_crossings': 10}}, {'detrend': {'detrending_method': 'linear'}}, {'compute_snr': {'bandwidth': 20.0, 'check': {'max_freq': 5.0, 'min_freq': 0.2, 'threshold': 3.0}}}, {'NNet_QA': {'acceptance_threshold': 0.5, 'model_name': 'CantWell'}} ] } update_dict(conf, update) data_files, origin = read_data_dir('geonet', 'us1000778i', '*.V1A') streams = [] for f in data_files: streams += read_data(f) sc = StreamCollection(streams) test = process_streams(sc, origin, conf) tstream = test.select(station='HSES')[0] allparams = tstream.getStreamParamKeys() nnet_dict = tstream.getStreamParam('nnet_qa') np.testing.assert_allclose( nnet_dict['score_HQ'], 0.99321798811740059, rtol=1e-3)
def test_get_status(): dpath = os.path.join('data', 'testdata', 'status') directory = pkg_resources.resource_filename('gmprocess', dpath) sc = StreamCollection.from_directory(directory) # Manually fail some of the streams sc.select(station='BSAP')[0][0].fail('Failure 0') sc.select(station='CPE')[0][0].fail('Failure 1') sc.select(station='MIKB', instrument='HN')[0][0].fail('Failure 2') sc.select(network='PG', station='PSD')[0][0].fail('Failure 3') # Test results from 'short', 'long', and 'net short = sc.get_status('short') assert (short == 1).all() long = sc.get_status('long') assert long.at['AZ.BSAP.HN'] == 'Failure 0' assert long.at['AZ.BZN.HN'] == '' assert long.at['AZ.CPE.HN'] == 'Failure 1' assert long.at['CI.MIKB.BN'] == '' assert long.at['CI.MIKB.HN'] == 'Failure 2' assert long.at['CI.PSD.HN'] == '' assert long.at['PG.PSD.HN'] == 'Failure 3' net = sc.get_status('net') assert net.at['AZ', 'number passed'] == 1 assert net.at['AZ', 'number failed'] == 2 assert net.at['CI', 'number passed'] == 2 assert net.at['CI', 'number failed'] == 1 assert net.at['PG', 'number passed'] == 0 assert net.at['PG', 'number failed'] == 1
def test_free_field(): data_files, origin = read_data_dir('kiknet', 'usp000hzq8') raw_streams = [] for dfile in data_files: raw_streams += read_data(dfile) sc = StreamCollection(raw_streams) processed_streams = process_streams(sc, origin) # all of these streams should have failed for different reasons npassed = np.sum([pstream.passed for pstream in processed_streams]) assert npassed == 0 for pstream in processed_streams: is_free = pstream[0].free_field reason = '' for trace in pstream: if trace.hasParameter('failure'): reason = trace.getParameter('failure')['reason'] break if is_free: assert reason.startswith('Failed sta/lta check') else: assert reason == 'Failed free field sensor check.'
def test_metrics2(): eventid = 'usb000syza' datafiles, event = read_data_dir('knet', eventid, '*') datadir = os.path.split(datafiles[0])[0] raw_streams = StreamCollection.from_directory(datadir) config = get_config() config['metrics']['output_imts'].append('Arias') config['metrics']['output_imcs'].append('arithmetic_mean') # turn off sta/lta check and snr checks newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr']) processed_streams = process_streams(raw_streams, event, config=newconfig) tdir = tempfile.mkdtemp() try: tfile = os.path.join(tdir, 'test.hdf') workspace = StreamWorkspace(tfile) workspace.addEvent(event) workspace.addStreams(event, processed_streams, label='processed') workspace.calcMetrics(event.id, labels=['processed']) etable, imc_tables1 = workspace.getTables('processed') etable2, imc_tables2 = workspace.getTables('processed', config=config) assert 'ARITHMETIC_MEAN' not in imc_tables1 assert 'ARITHMETIC_MEAN' in imc_tables2 assert 'ARIAS' in imc_tables2['ARITHMETIC_MEAN'] except Exception as e: raise (e) finally: shutil.rmtree(tdir)
def test_lowpass_max(): datapath = os.path.join('data', 'testdata', 'lowpass_max') datadir = pkg_resources.resource_filename('gmprocess', datapath) sc = StreamCollection.from_directory(datadir) sc.describe() conf = get_config() update = { 'processing': [ {'detrend': {'detrending_method': 'demean'}}, {'remove_response': { 'f1': 0.001, 'f2': 0.005, 'f3': None, 'f4': None, 'output': 'ACC', 'water_level': 60} }, # {'detrend': {'detrending_method': 'linear'}}, # {'detrend': {'detrending_method': 'demean'}}, {'get_corner_frequencies': { 'constant': { 'highpass': 0.08, 'lowpass': 20.0 }, 'method': 'constant', 'snr': {'same_horiz': True}} }, {'lowpass_max_frequency': {'fn_fac': 0.9}} ] } update_dict(conf, update) update = { 'windows': { 'signal_end': { 'method': 'model', 'vmin': 1.0, 'floor': 120, 'model': 'AS16', 'epsilon': 2.0 }, 'window_checks': { 'do_check': False, 'min_noise_duration': 1.0, 'min_signal_duration': 1.0 } } } update_dict(conf, update) edict = { 'id': 'ci38038071', 'time': UTCDateTime('2018-08-30 02:35:36'), 'lat': 34.136, 'lon': -117.775, 'depth': 5.5, 'magnitude': 4.4 } event = get_event_object(edict) test = process_streams(sc, event, conf) for st in test: for tr in st: freq_dict = tr.getParameter('corner_frequencies') np.testing.assert_allclose(freq_dict['lowpass'], 18.0)
def test_raw(): msg = "dataset.value has been deprecated. Use dataset[()] instead." with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=H5pyDeprecationWarning) warnings.filterwarnings("ignore", category=YAMLLoadWarning) warnings.filterwarnings("ignore", category=FutureWarning) raw_streams, inv = request_raw_waveforms( fdsn_client='IRIS', org_time='2018-11-30T17-29-29.330Z', lat=61.3464, lon=-149.9552, before_time=120, after_time=120, dist_min=0, dist_max=0.135, networks='*', stations='*', channels=['?N?'], access_restricted=False) tdir = tempfile.mkdtemp() try: edict = get_event_dict('ak20419010') origin = get_event_object('ak20419010') tfile = os.path.join(tdir, 'test.hdf') sc1 = StreamCollection(raw_streams) workspace = StreamWorkspace(tfile) workspace.addStreams(origin, sc1, label='raw') tstreams = workspace.getStreams(edict['id']) assert len(tstreams) == 0 imclist = [ 'greater_of_two_horizontals', 'channels', 'rotd50', 'rotd100' ] imtlist = ['sa1.0', 'PGA', 'pgv', 'fas2.0', 'arias'] # this shouldn't do anything workspace.setStreamMetrics(edict['id'], imclist=imclist, imtlist=imtlist) processed_streams = process_streams(sc1, edict) workspace.addStreams(origin, processed_streams, 'processed') labels = workspace.getLabels() tags = workspace.getStreamTags(edict['id']) out_raw_streams = workspace.getStreams(edict['id'], get_raw=True) assert len(out_raw_streams) == len(sc1) # this should only work on processed data workspace.setStreamMetrics(edict['id'], imclist=imclist, imtlist=imtlist) df = workspace.summarizeLabels() x = 1 except Exception as e: raise e finally: shutil.rmtree(tdir)
def test_weird_sensitivity(): datafiles, origin = read_data_dir('fdsn', 'us70008dx7', 'SL.KOGS*.mseed') streams = [] for datafile in datafiles: streams += read_fdsn(datafile) sc = StreamCollection(streams) psc = process_streams(sc, origin) channel = psc[0].select(component='E')[0] assert_almost_equal(channel.data.max(), 62900.191900393373)
def retrieveData(self, event_dict): """Retrieve data from NSMN, turn into StreamCollection. Args: event (dict): Best dictionary matching input event, fields as above in return of getMatchingEvents(). Returns: StreamCollection: StreamCollection object. """ rawdir = self.rawdir if self.rawdir is None: rawdir = tempfile.mkdtemp() else: if not os.path.isdir(rawdir): os.makedirs(rawdir) urlparts = urlparse(SEARCH_URL) req = requests.get(event_dict['url']) data = req.text soup = BeautifulSoup(data, features="lxml") table = soup.find_all('table', 'tableType_01')[1] datafiles = [] for row in table.find_all('tr'): if 'class' in row.attrs: continue col = row.find_all('td', 'coltype01')[0] href = col.contents[0].attrs['href'] station_id = col.contents[0].contents[0] station_url = urljoin('http://' + urlparts.netloc, href) req2 = requests.get(station_url) data2 = req2.text soup2 = BeautifulSoup(data2, features="lxml") center = soup2.find_all('center')[0] anchor = center.find_all('a')[0] href2 = anchor.attrs['href'] data_url = urljoin('http://' + urlparts.netloc, href2) req3 = requests.get(data_url) data = req3.text localfile = os.path.join(rawdir, '%s.txt' % station_id) logging.info('Downloading Turkish data file %s...' % station_id) with open(localfile, 'wt') as f: f.write(data) datafiles.append(localfile) streams = [] for dfile in datafiles: logging.info('Reading datafile %s...' % dfile) streams += read_nsmn(dfile) if self.rawdir is None: shutil.rmtree(rawdir) stream_collection = StreamCollection(streams=streams, drop_non_free=self.drop_non_free) return stream_collection
def get_streams(): datafiles1, origin1 = read_data_dir('cwb', 'us1000chhc', '*.dat') datafiles2, origin2 = read_data_dir('nsmn', 'us20009ynd', '*.txt') datafiles3, origin3 = read_data_dir('geonet', 'us1000778i', '*.V1A') datafiles = datafiles1 + datafiles2 + datafiles3 streams = [] for datafile in datafiles: streams += read_data(datafile) return StreamCollection(streams)
def download(event, event_dir, config, directory): """Download data or load data from local directory, turn into Streams. Args: event (ScalarEvent): Object containing basic event hypocenter, origin time, magnitude. event_dir (str): Path where raw directory should be created (if downloading). config (dict): Dictionary with gmprocess configuration information. directory (str): Path where raw data already exists. Returns: tuple: - StreamWorkspace: Contains the event and raw streams. - str: Name of workspace HDF file. - StreamCollection: Raw data StationStreams. """ # generate the raw directory rawdir = get_rawdir(event_dir) if directory is None: tcollection, terrors = fetch_data( event.time.datetime, event.latitude, event.longitude, event.depth_km, event.magnitude, config=config, rawdir=rawdir) # create an event.json file in each event directory, # in case user is simply downloading for now create_event_file(event, event_dir) else: streams, bad, terrors = directory_to_streams(directory) tcollection = StreamCollection(streams) # plot the raw waveforms with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) pngfiles = glob.glob(os.path.join(rawdir, '*.png')) if not len(pngfiles): plot_raw(rawdir, tcollection, event) # create the workspace file and put the unprocessed waveforms in it workname = os.path.join(event_dir, 'workspace.hdf') if os.path.isfile(workname): os.remove(workname) workspace = StreamWorkspace(workname) workspace.addEvent(event) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=H5pyDeprecationWarning) workspace.addStreams(event, tcollection, label='unprocessed') return (workspace, workname, tcollection)
def test_process_streams(): # Loma Prieta test station (nc216859) origin = { 'eventid': 'test', 'time': UTCDateTime('2000-10-16T13:30:00'), 'magnitude': 7.3, 'lat': 35.278, 'lon': 133.345 } data_files, origin = read_data_dir('geonet', 'us1000778i', '*.V1A') streams = [] for f in data_files: streams += read_data(f) sc = StreamCollection(streams) sc.describe() test = process_streams(sc, origin) logging.info('Testing trace: %s' % test[0][1]) assert len(test) == 3 assert len(test[0]) == 3 assert len(test[1]) == 3 assert len(test[2]) == 3 # Apparently the traces end up in a different order on the Travis linux # container than on my local mac. So testing individual traces need to # not care about trace order. trace_maxes = np.sort([np.max(np.abs(t.data)) for t in test[0]]) np.testing.assert_allclose( trace_maxes, np.array([157.82909426, 240.36582093, 263.7063879]), rtol=1e-5 )
def test_num_horizontals(): data_path = pkg_resources.resource_filename('gmprocess', 'data') sc = StreamCollection.from_directory(os.path.join( data_path, 'testdata', 'fdsn', 'uw61251926', 'strong_motion')) st = sc.select(station='SP2')[0] assert st.num_horizontal == 2 for tr in st: tr.stats.channel = 'ENZ' assert st.num_horizontal == 0 for tr in st: tr.stats.channel = 'EN1' assert st.num_horizontal == 3
def test_metrics(): eventid = 'usb000syza' datafiles, event = read_data_dir('knet', eventid, '*') datadir = os.path.split(datafiles[0])[0] raw_streams = StreamCollection.from_directory(datadir) config = get_config() # turn off sta/lta check and snr checks newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr']) processed_streams = process_streams(raw_streams, event, config=newconfig) tdir = tempfile.mkdtemp() try: tfile = os.path.join(tdir, 'test.hdf') workspace = StreamWorkspace(tfile) workspace.addEvent(event) workspace.addStreams(event, processed_streams, label='processed') stream1 = processed_streams[0] stream2 = processed_streams[1] summary1 = StationSummary.from_config(stream1) summary2 = StationSummary.from_config(stream2) workspace.setStreamMetrics(event.id, 'processed', summary1) workspace.setStreamMetrics(event.id, 'processed', summary2) workspace.calcStationMetrics(event.id, labels=['processed']) summary1_a = workspace.getStreamMetrics(event.id, stream1[0].stats.station, 'processed') s1_df_in = summary1.pgms.sort_values(['IMT', 'IMC']) s1_df_out = summary1_a.pgms.sort_values(['IMT', 'IMC']) array1 = s1_df_in['Result'].as_matrix() array2 = s1_df_out['Result'].as_matrix() np.testing.assert_almost_equal(array1, array2, decimal=4) df = workspace.getMetricsTable(event.id) cmp_series = { 'GREATER_OF_TWO_HORIZONTALS': 0.6787, 'H1': 0.3869, 'H2': 0.6787, 'Z': 0.7663 } pga_dict = df.iloc[0]['PGA'].to_dict() for key, value in pga_dict.items(): value2 = cmp_series[key] np.testing.assert_almost_equal(value, value2, decimal=4) workspace.close() except Exception as e: raise(e) finally: shutil.rmtree(tdir)
def test_metrics(): eventid = 'usb000syza' datafiles, event = read_data_dir('knet', eventid, '*') datadir = os.path.split(datafiles[0])[0] raw_streams = StreamCollection.from_directory(datadir) config = get_config() # turn off sta/lta check and snr checks newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr']) processed_streams = process_streams(raw_streams, event, config=newconfig) tdir = tempfile.mkdtemp() try: tfile = os.path.join(tdir, 'test.hdf') workspace = StreamWorkspace(tfile) workspace.addEvent(event) workspace.addStreams(event, processed_streams, label='processed') stream1 = processed_streams[0] stream2 = processed_streams[1] summary1 = StationSummary.from_config(stream1) summary2 = StationSummary.from_config(stream2) workspace.setStreamMetrics(event.id, 'processed', summary1) workspace.setStreamMetrics(event.id, 'processed', summary2) summary1_a = workspace.getStreamMetrics(event.id, stream1[0].stats.station, 'processed') s1_df_in = summary1.pgms.sort_values(['IMT', 'IMC']) s1_df_out = summary1_a.pgms.sort_values(['IMT', 'IMC']) array1 = s1_df_in['Result'].as_matrix() array2 = s1_df_out['Result'].as_matrix() np.testing.assert_almost_equal(array1, array2, decimal=4) df = workspace.getMetricsTable(event.id) cmp_series = {'GREATER_OF_TWO_HORIZONTALS': 0.6787, 'HN1': 0.3869, 'HN2': 0.6787, 'HNZ': 0.7663} pga_dict = df.iloc[0]['PGA'].to_dict() for key, value in pga_dict.items(): value2 = cmp_series[key] np.testing.assert_almost_equal(value, value2, decimal=4) workspace.close() except Exception as e: raise(e) finally: shutil.rmtree(tdir)
def test_allow_nans(): dpath = os.path.join('data', 'testdata', 'fdsn', 'uu60363602') datadir = pkg_resources.resource_filename('gmprocess', dpath) sc = StreamCollection.from_directory(datadir) origin = read_event_json_files([os.path.join(datadir, 'event.json')])[0] psc = process_streams(sc, origin) st = psc[0] ss = StationSummary.from_stream( st, components=['quadratic_mean'], imts=['FAS(4.0)'], bandwidth=189, allow_nans=True) assert np.isnan(ss.pgms.Result).all() ss = StationSummary.from_stream( st, components=['quadratic_mean'], imts=['FAS(4.0)'], bandwidth=189, allow_nans=False) assert ~np.isnan(ss.pgms.Result).all()
def test_metrics(): eventid = 'usb000syza' datafiles, event = read_data_dir('knet', eventid, '*') datadir = os.path.split(datafiles[0])[0] raw_streams = StreamCollection.from_directory(datadir) config = update_config(os.path.join(datadir, 'config_min_freq_0p2.yml')) # turn off sta/lta check and snr checks # newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr']) # processed_streams = process_streams(raw_streams, event, config=newconfig) newconfig = config.copy() newconfig['processing'].append( {'NNet_QA': { 'acceptance_threshold': 0.5, 'model_name': 'CantWell' }}) processed_streams = process_streams(raw_streams.copy(), event, config=newconfig) tdir = tempfile.mkdtemp() try: tfile = os.path.join(tdir, 'test.hdf') workspace = StreamWorkspace(tfile) workspace.addEvent(event) workspace.addStreams(event, raw_streams, label='raw') workspace.addStreams(event, processed_streams, label='processed') stream1 = raw_streams[0] summary1 = StationSummary.from_config(stream1) s1_df_in = summary1.pgms.sort_values(['IMT', 'IMC']) array1 = s1_df_in['Result'].to_numpy() workspace.calcMetrics(eventid, labels=['raw']) pstreams2 = workspace.getStreams(event.id, labels=['processed']) assert pstreams2[0].getStreamParamKeys() == ['nnet_qa'] summary1_a = workspace.getStreamMetrics(event.id, stream1[0].stats.network, stream1[0].stats.station, 'raw') s1_df_out = summary1_a.pgms.sort_values(['IMT', 'IMC']) array2 = s1_df_out['Result'].to_numpy() np.testing.assert_almost_equal(array1, array2, decimal=4) workspace.close() except Exception as e: raise (e) finally: shutil.rmtree(tdir)
def test_smc(): dpath = os.path.join('data', 'testdata', 'smc', 'nc216859') datadir = pkg_resources.resource_filename('gmprocess', dpath) files = OrderedDict([('0111a.smc', (1.5057E+0, -2.8745E-1)), ('0111b.smc', (-1.2518E+1, -1.6806E+0)), ('0111c.smc', (-5.8486E+0, -1.1594E+0))]) streams = [] for tfilename, accvals in files.items(): filename = os.path.join(datadir, tfilename) assert is_smc(filename) # test acceleration from the file stream = read_smc(filename)[0] # test for one trace per file assert stream.count() == 1 # test that the traces are acceleration for trace in stream: assert trace.stats.standard.units == 'acc' # compare the start/end points np.testing.assert_almost_equal(accvals[0], stream[0].data[0]) np.testing.assert_almost_equal(accvals[1], stream[0].data[-1]) # append to list of streams, so we can make sure these group together streams.append(stream) # test location override stream = read_smc(filename, location='test')[0] for trace in stream: assert trace.stats.location == 'test' newstreams = StreamCollection(streams) assert len(newstreams) == 1 filename = os.path.join(datadir, '891018_1.sma-1.0444a.smc') try: stream = read_smc(filename)[0] success = True except Exception: success = False assert success == False
def test(): datafiles, origin = read_data_dir('fdsn', 'nc72282711', 'BK.CMB*.mseed') streams = [] for datafile in datafiles: streams += read_fdsn(datafile) assert streams[0].get_id() == 'BK.CMB.HN' datafiles, origin = read_data_dir('fdsn', 'nc72282711', 'TA.M04C*.mseed') streams = [] for datafile in datafiles: streams += read_fdsn(datafile) assert streams[0].get_id() == 'TA.M04C.HN' # DEBUGGING sc = StreamCollection(streams) psc = process_streams(sc, origin)
def test_v0(): datafiles, origin = read_data_dir('cosmos', 'ftbragg') dfile = datafiles[0] # TODO: Fix this problem, or get the data fixed? assert is_cosmos(dfile) try: rstreams = read_cosmos(dfile) tstream = rstreams[0].copy() # raw stream streams = StreamCollection(rstreams) pstream = remove_response(rstreams[0], 0, 0) pstream.detrend(type='demean') for trace in tstream: trace.data /= trace.stats.standard.instrument_sensitivity trace.data *= 100 tstream.detrend(type='demean') np.testing.assert_almost_equal(tstream[0].data, pstream[0].data) except Exception as e: pass
def test_zero_crossings(): datapath = os.path.join('data', 'testdata', 'zero_crossings') datadir = pkg_resources.resource_filename('gmprocess', datapath) sc = StreamCollection.from_directory(datadir) sc.describe() conf = get_config() update = { 'processing': [{ 'detrend': { 'detrending_method': 'demean' } }, { 'check_zero_crossings': { 'min_crossings': 1 } }] } update_dict(conf, update) edict = { 'id': 'ak20419010', 'time': UTCDateTime('2018-11-30T17:29:29'), 'lat': 61.346, 'lon': -149.955, 'depth': 46.7, 'magnitude': 7.1 } event = get_event_object(edict) test = process_streams(sc, event, conf) for st in test: for tr in st: assert tr.hasParameter('ZeroCrossingRate') np.testing.assert_allclose( test[0][0].getParameter('ZeroCrossingRate')['crossing_rate'], 0.008888888888888889, atol=1e-5)
def test_trim_multiple_events(): datapath = os.path.join('data', 'testdata', 'multiple_events') datadir = pkg_resources.resource_filename('gmprocess', datapath) sc = StreamCollection.from_directory(os.path.join(datadir, 'ci38457511')) origin = get_event_object('ci38457511') df, catalog = create_travel_time_dataframe( sc, os.path.join(datadir, 'catalog.csv'), 5, 0.1, 'iasp91') for st in sc: st.detrend('demean') remove_response(st, None, None) st = corner_frequencies.constant(st) lowpass_filter(st) highpass_filter(st) signal_split(st, origin) signal_end(st, origin.time, origin.longitude, origin.latitude, origin.magnitude, method='model', model='AS16') cut(st, 2) trim_multiple_events(st, origin, catalog, df, 0.2, 0.7, 'B14', {'vs30': 760}, {'rake': 0}) num_failures = sum([1 if not st.passed else 0 for st in sc]) assert num_failures == 1 failure = sc.select(station='WRV2')[0][0].getParameter('failure') assert failure['module'] == 'trim_multiple_events' assert failure['reason'] == ('A significant arrival from another event ' 'occurs within the first 70.0 percent of the ' 'signal window') for tr in sc.select(station='JRC2')[0]: np.testing.assert_almost_equal( tr.stats.endtime, UTCDateTime('2019-07-06T03:20:38.7983Z'))
def test_StreamCollection(): # read usc data dpath = os.path.join('data', 'testdata', 'usc', 'ci3144585') directory = pkg_resources.resource_filename('gmprocess', dpath) usc_streams, unprocessed_files, unprocessed_file_errors = \ directory_to_streams(directory) assert len(usc_streams) == 7 usc_sc = StreamCollection(usc_streams) # Use print method print(usc_sc) # Use len method assert len(usc_sc) == 3 # Use nonzero method assert bool(usc_sc) # Slice lengths = [ len(usc_sc[0]), len(usc_sc[1]), len(usc_sc[2]) ] sort_lengths = np.sort(lengths) assert sort_lengths[0] == 1 assert sort_lengths[1] == 3 assert sort_lengths[2] == 3 # read dmg data dpath = os.path.join('data', 'testdata', 'dmg', 'ci3144585') directory = pkg_resources.resource_filename('gmprocess', dpath) dmg_streams, unprocessed_files, unprocessed_file_errors = \ directory_to_streams(directory) assert len(dmg_streams) == 1 dmg_sc = StreamCollection(dmg_streams) # Has one station assert len(dmg_sc) == 1 # With 3 channels assert len(dmg_sc[0]) == 3 # So this should have 4 stations test1 = dmg_sc + usc_sc assert len(test1) == 4 test_copy = dmg_sc.copy() assert test_copy[0][0].stats['standard']['process_level'] == \ 'corrected physical units' stream1 = test_copy[0] test_append = usc_sc.append(stream1) assert len(test_append) == 4 # Change back to unique values for station/network for tr in dmg_sc[0]: tr.stats['network'] = 'LALALA' tr.stats['station'] = '575757' stream2 = dmg_sc[0] test_append = usc_sc.append(stream2) assert len(test_append) == 4 # Check the from_directory method sc_test = StreamCollection.from_directory(directory) assert len(sc_test) == 1 # Test to_dataframe jsonfile = os.path.join(directory, 'event.json') with open(jsonfile, 'rt') as f: origin = json.load(f) dmg_df = sc_test.to_dataframe(origin) np.testing.assert_allclose( dmg_df['HN1']['PGA'], 0.145615, atol=1e5)
def test_corner_frequencies(): # Default config has 'constant' corner frequency method, so the need # here is to force the 'snr' method. data_files, origin = read_data_dir('geonet', 'us1000778i', '*.V1A') streams = [] for f in data_files: streams += read_data(f) sc = StreamCollection(streams) config = get_config() window_conf = config['windows'] processed_streams = sc.copy() for st in processed_streams: if st.passed: # Estimate noise/signal split time event_time = origin.time event_lon = origin.longitude event_lat = origin.latitude st = signal_split(st, origin) # Estimate end of signal end_conf = window_conf['signal_end'] event_mag = origin.magnitude print(st) st = signal_end( st, event_time=event_time, event_lon=event_lon, event_lat=event_lat, event_mag=event_mag, **end_conf ) wcheck_conf = window_conf['window_checks'] st = window_checks( st, min_noise_duration=wcheck_conf['min_noise_duration'], min_signal_duration=wcheck_conf['min_signal_duration'] ) pconfig = config['processing'] # Run SNR check # I think we don't do this anymore. test = [ d for d in pconfig if list(d.keys())[0] == 'compute_snr' ] snr_config = test[0]['compute_snr'] for stream in processed_streams: stream = compute_snr( stream, **snr_config ) # Run get_corner_frequencies test = [ d for d in pconfig if list(d.keys())[0] == 'get_corner_frequencies' ] cf_config = test[0]['get_corner_frequencies'] snr_config = cf_config['snr'] lp = [] hp = [] for stream in processed_streams: if not stream.passed: continue stream = get_corner_frequencies( stream, method="snr", snr=snr_config ) if stream[0].hasParameter('corner_frequencies'): cfdict = stream[0].getParameter('corner_frequencies') lp.append(cfdict['lowpass']) hp.append(cfdict['highpass']) np.testing.assert_allclose( np.sort(hp), [0.00751431, 0.01354455, 0.04250735], atol=1e-6 )