示例#1
0
def test_process_streams():
    # Loma Prieta test station (nc216859)

    data_files, origin = read_data_dir('geonet', 'us1000778i', '*.V1A')
    streams = []
    for f in data_files:
        streams += read_data(f)

    sc = StreamCollection(streams)

    sc.describe()

    config = update_config(os.path.join(datadir, 'config_min_freq_0p2.yml'))

    test = process_streams(sc, origin, config=config)

    logging.info('Testing trace: %s' % test[0][1])

    assert len(test) == 3
    assert len(test[0]) == 3
    assert len(test[1]) == 3
    assert len(test[2]) == 3

    # Apparently the traces end up in a different order on the Travis linux
    # container than on my local mac. So testing individual traces need to
    # not care about trace order.

    trace_maxes = np.sort(
        [np.max(np.abs(t.data)) for t in test.select(station='HSES')[0]])

    np.testing.assert_allclose(trace_maxes,
                               np.array(
                                   [157.81975508, 240.33718094, 263.67804256]),
                               rtol=1e-5)
示例#2
0
def test():
    # Test for channel grouping with three unique channels
    streams = []
    # datadir = os.path.join(homedir, '..', 'data', 'knet', 'us2000cnnl')
    datafiles, origin = read_data_dir('knet', 'us2000cnnl',
                                      'AOM0031801241951*')
    for datafile in datafiles:
        streams += read_knet(datafile)
    grouped_streams = StreamCollection(streams)
    assert len(grouped_streams) == 1
    assert grouped_streams[0].count() == 3

    # Test for channel grouping with more file types
    datafiles, origin = read_data_dir('geonet', 'us1000778i',
                                      '20161113_110313_THZ_20.V2A')
    datafile = datafiles[0]
    streams += read_geonet(datafile)
    grouped_streams = StreamCollection(streams)
    assert len(grouped_streams) == 2
    assert grouped_streams[0].count() == 3
    assert grouped_streams[1].count() == 3

    # Test for warning for one channel streams
    datafiles, origin = read_data_dir('knet', 'us2000cnnl',
                                      'AOM0071801241951.UD')
    datafile = datafiles[0]
    streams += read_knet(datafile)

    grouped_streams = StreamCollection(streams)
    #    assert "One channel stream:" in logstream.getvalue()

    assert len(grouped_streams) == 3
    assert grouped_streams[0].count() == 3
    assert grouped_streams[1].count() == 3
    assert grouped_streams[2].count() == 1
def test_process_streams():
    # Loma Prieta test station (nc216859)

    data_files, origin = read_data_dir('geonet', 'us1000778i', '*.V1A')
    streams = []
    for f in data_files:
        streams += read_data(f)

    sc = StreamCollection(streams)

    sc.describe()

    test = process_streams(sc, origin)

    logging.info('Testing trace: %s' % test[0][1])

    assert len(test) == 3
    assert len(test[0]) == 3
    assert len(test[1]) == 3
    assert len(test[2]) == 3

    # Apparently the traces end up in a different order on the Travis linux
    # container than on my local mac. So testing individual traces need to
    # not care about trace order.

    trace_maxes = np.sort([np.max(np.abs(t.data)) for t in test[0]])

    np.testing.assert_allclose(
        trace_maxes,
        np.array([157.81975508, 240.33718094, 263.67804256]),
        rtol=1e-5
    )
示例#4
0
def test_get_travel_time_df():
    datapath = os.path.join('data', 'testdata', 'travel_times')
    datadir = pkg_resources.resource_filename('gmprocess', datapath)

    sc1 = StreamCollection.from_directory(os.path.join(datadir, 'ci37218996'))
    sc2 = StreamCollection.from_directory(os.path.join(datadir, 'ci38461735'))
    scs = [sc1, sc2]

    df1, catalog = create_travel_time_dataframe(
        sc1, os.path.join(datadir, 'catalog_test_traveltimes.csv'), 5, 0.1,
        'iasp91')
    df2, catalog = create_travel_time_dataframe(
        sc2, os.path.join(datadir, 'catalog_test_traveltimes.csv'), 5, 0.1,
        'iasp91')

    model = TauPyModel('iasp91')
    for dfidx, df in enumerate([df1, df2]):
        for staidx, sta in enumerate(df):
            for eqidx, time in enumerate(df[sta]):
                sta_coords = scs[dfidx][staidx][0].stats.coordinates
                event = catalog[eqidx]
                dist = locations2degrees(sta_coords['latitude'],
                                         sta_coords['longitude'],
                                         event.latitude, event.longitude)
                if event.depth_km < 0:
                    depth = 0
                else:
                    depth = event.depth_km
                travel_time = model.get_travel_times(depth, dist,
                                                     ['p', 'P', 'Pn'])[0].time
                abs_time = event.time + travel_time
                np.testing.assert_almost_equal(abs_time, time, decimal=1)
示例#5
0
def test_colocated():
    datapath = os.path.join('data', 'testdata', 'colocated_instruments')
    datadir = pkg_resources.resource_filename('gmprocess', datapath)
    sc = StreamCollection.from_directory(datadir)

    sc.select_colocated()
    assert sc.n_passed == 7
    assert sc.n_failed == 4

    # What if no preference is matched?
    sc = StreamCollection.from_directory(datadir)
    sc.select_colocated(preference=["XX"])
    assert sc.n_passed == 3
    assert sc.n_failed == 8
示例#6
0
def test_metrics2():
    eventid = 'usb000syza'
    datafiles, event = read_data_dir('knet', eventid, '*')
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = update_config(os.path.join(datadir, 'config_min_freq_0p2.yml'))
    config['metrics']['output_imts'].append('Arias')
    config['metrics']['output_imcs'].append('arithmetic_mean')
    # turn off sta/lta check and snr checks
    newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr'])
    processed_streams = process_streams(raw_streams, event, config=newconfig)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, 'test.hdf')
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, processed_streams, label='processed')
        workspace.calcMetrics(event.id, labels=['processed'])
        etable, imc_tables1, readmes1 = workspace.getTables('processed')
        assert 'ARITHMETIC_MEAN' not in imc_tables1
        assert 'ARITHMETIC_MEAN' not in readmes1
        del workspace.dataset.auxiliary_data.WaveFormMetrics
        del workspace.dataset.auxiliary_data.StationMetrics
        workspace.calcMetrics(event.id, labels=['processed'], config=config)
        etable2, imc_tables2, readmes2 = workspace.getTables('processed')
        assert 'ARITHMETIC_MEAN' in imc_tables2
        assert 'ARITHMETIC_MEAN' in readmes2
        assert 'ARIAS' in imc_tables2['ARITHMETIC_MEAN']
        testarray = readmes2['ARITHMETIC_MEAN']['Column header'].to_numpy()
        assert 'ARIAS' in testarray
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
示例#7
0
def test_signal_split2():
    datafiles, origin = read_data_dir('knet', 'us2000cnnl',
                                      'AOM0011801241951*')
    streams = []
    for datafile in datafiles:
        streams += read_data(datafile)

    streams = StreamCollection(streams)
    stream = streams[0]
    signal_split(stream, origin)

    cmpdict = {
        'split_time': UTCDateTime(2018, 1, 24, 10, 51, 39, 841483),
        'method': 'p_arrival',
        'picker_type': 'travel_time'
    }

    pdict = stream[0].getParameter('signal_split')
    for key, value in cmpdict.items():
        v1 = pdict[key]
        # because I can't figure out how to get utcdattime __eq__
        # operator to behave as expected with the currently installed
        # version of obspy, we're going to pedantically compare two
        # of these objects...
        if isinstance(value, UTCDateTime):
            #value.__precision = 4
            #v1.__precision = 4
            assert value.year == v1.year
            assert value.month == v1.month
            assert value.day == v1.day
            assert value.hour == v1.hour
            assert value.minute == v1.minute
            assert value.second == v1.second
        else:
            assert v1 == value
示例#8
0
def test_fit_spectra():
    config = get_config()
    datapath = os.path.join('data', 'testdata', 'demo', 'ci38457511', 'raw')
    datadir = pkg_resources.resource_filename('gmprocess', datapath)
    event = get_event_object('ci38457511')
    sc = StreamCollection.from_directory(datadir)
    for st in sc:
        st = signal_split(st, event)
        end_conf = config['windows']['signal_end']
        st = signal_end(st,
                        event_time=event.time,
                        event_lon=event.longitude,
                        event_lat=event.latitude,
                        event_mag=event.magnitude,
                        **end_conf)
        st = compute_snr(st, 30)
        st = get_corner_frequencies(st,
                                    method='constant',
                                    constant={
                                        'highpass': 0.08,
                                        'lowpass': 20.0
                                    })

    for st in sc:
        spectrum.fit_spectra(st, event)
示例#9
0
def test_colocated():
    eventid = 'ci38445975'
    datafiles, event = read_data_dir('fdsn', eventid, '*')
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config_file = os.path.join(datadir, 'test_config.yml')
    with open(config_file, 'r') as f:
        config = yaml.load(f, Loader=yaml.FullLoader)
    processed_streams = process_streams(raw_streams, event, config=config)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, 'test.hdf')
        ws = StreamWorkspace(tfile)
        ws.addEvent(event)
        ws.addStreams(event, raw_streams, label='raw')
        ws.addStreams(event, processed_streams, label='processed')
        ws.calcMetrics(eventid, labels=['processed'], config=config)
        stasum = ws.getStreamMetrics(eventid, 'CI', 'MIKB', 'processed')
        np.testing.assert_allclose(
            stasum.get_pgm('duration', 'geometric_mean'), 38.94480068)
        ws.close()
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
示例#10
0
def test():
    datafiles, origin = read_data_dir('fdsn', 'nc72282711', 'BK.CMB*.mseed')
    streams = []
    for datafile in datafiles:
        streams += read_fdsn(datafile)

    assert streams[0].get_id() == 'BK.CMB.HN'

    datafiles, origin = read_data_dir('fdsn', 'nc72282711', 'TA.M04C*.mseed')
    streams = []
    for datafile in datafiles:
        streams += read_fdsn(datafile)

    assert streams[0].get_id() == 'TA.M04C.HN'

    # test assignment of Z channel
    datafiles, origin = read_data_dir('fdsn', 'nc73300395', 'BK.VALB*.mseed')
    streams = []
    for datafile in datafiles:
        streams += read_fdsn(datafile)

    # get all channel names
    channels = sorted([st[0].stats.channel for st in streams])
    assert channels == ['HN2', 'HN3', 'HNZ']

    # DEBUGGING
    sc = StreamCollection(streams)
    psc = process_streams(sc, origin)
示例#11
0
def directory_to_dataframe(directory, imcs=None, imts=None, origin=None, process=True):
    """Extract peak ground motions from list of Stream objects.
    Note: The PGM columns underneath each channel will be variable
    depending on the units of the Stream being passed in (velocity
    sensors can only generate PGV) and on the imtlist passed in by
    user. Spectral acceleration columns will be formatted as SA(0.3)
    for 0.3 second spectral acceleration, for example.
    Args:
        directory (str): Directory of ground motion files (streams).
        imcs (list): Strings designating desired components to create
                in table.
        imts (list): Strings designating desired PGMs to create
                in table.
        origin (obspy.core.event.Origin): Defines the focal time and
                geographical location of an earthquake hypocenter.
                Default is None.
        process (bool): Process the stream using the config file.
    Returns:
        DataFrame: Pandas dataframe containing columns:
            - STATION Station code.
            - NAME Text description of station.
            - LOCATION Two character location code.
            - SOURCE Long form string containing source network.
            - NETWORK Short network code.
            - LAT Station latitude
            - LON Station longitude
            - DISTANCE Epicentral distance (km) (if epicentral lat/lon provided)
            - HN1 East-west channel (or H1) (multi-index with pgm columns):
                - PGA Peak ground acceleration (%g).
                - PGV Peak ground velocity (cm/s).
                - SA(0.3) Pseudo-spectral acceleration at 0.3 seconds (%g).
                - SA(1.0) Pseudo-spectral acceleration at 1.0 seconds (%g).
                - SA(3.0) Pseudo-spectral acceleration at 3.0 seconds (%g).
            - HN2 North-south channel (or H2) (multi-index with pgm columns):
                - PGA Peak ground acceleration (%g).
                - PGV Peak ground velocity (cm/s).
                - SA(0.3) Pseudo-spectral acceleration at 0.3 seconds (%g).
                - SA(1.0) Pseudo-spectral acceleration at 1.0 seconds (%g).
                - SA(3.0) Pseudo-spectral acceleration at 3.0 seconds (%g).
            - HNZ Vertical channel (or HZ) (multi-index with pgm columns):
                - PGA Peak ground acceleration (%g).
                - PGV Peak ground velocity (cm/s).
                - SA(0.3) Pseudo-spectral acceleration at 0.3 seconds (%g).
                - SA(1.0) Pseudo-spectral acceleration at 1.0 seconds (%g).
                - SA(3.0) Pseudo-spectral acceleration at 3.0 seconds (%g).
            - GREATER_OF_TWO_HORIZONTALS (multi-index with pgm columns):
                - PGA Peak ground acceleration (%g).
                - PGV Peak ground velocity (cm/s).
                - SA(0.3) Pseudo-spectral acceleration at 0.3 seconds (%g).
                - SA(1.0) Pseudo-spectral acceleration at 1.0 seconds (%g).
                - SA(3.0) Pseudo-spectral acceleration at 3.0 seconds (%g).
    """
    streams = []
    for filepath in glob.glob(os.path.join(directory, "*")):
        streams += read_data(filepath)
    grouped_streams = StreamCollection(streams)

    dataframe = streams_to_dataframe(
        grouped_streams, imcs=imcs, imts=imts, origin=origin)
    return dataframe
def test_nnet():

    conf = get_config()

    update = {
        'processing': [
            {'detrend': {'detrending_method': 'demean'}},
            # {'check_zero_crossings': {'min_crossings': 10}},
            {'detrend': {'detrending_method': 'linear'}},
            {'compute_snr': {'bandwidth': 20.0,
                             'check': {'max_freq': 5.0,
                                       'min_freq': 0.2,
                                       'threshold': 3.0}}},
            {'NNet_QA': {'acceptance_threshold': 0.5,
                         'model_name': 'CantWell'}}
        ]
    }
    update_dict(conf, update)

    data_files, origin = read_data_dir('geonet', 'us1000778i', '*.V1A')
    streams = []
    for f in data_files:
        streams += read_data(f)

    sc = StreamCollection(streams)
    test = process_streams(sc, origin, conf)
    tstream = test.select(station='HSES')[0]
    allparams = tstream.getStreamParamKeys()
    nnet_dict = tstream.getStreamParam('nnet_qa')
    np.testing.assert_allclose(
        nnet_dict['score_HQ'], 0.99321798811740059, rtol=1e-3)
def test_get_status():
    dpath = os.path.join('data', 'testdata', 'status')
    directory = pkg_resources.resource_filename('gmprocess', dpath)
    sc = StreamCollection.from_directory(directory)

    # Manually fail some of the streams
    sc.select(station='BSAP')[0][0].fail('Failure 0')
    sc.select(station='CPE')[0][0].fail('Failure 1')
    sc.select(station='MIKB', instrument='HN')[0][0].fail('Failure 2')
    sc.select(network='PG', station='PSD')[0][0].fail('Failure 3')

    # Test results from 'short', 'long', and 'net
    short = sc.get_status('short')
    assert (short == 1).all()

    long = sc.get_status('long')
    assert long.at['AZ.BSAP.HN'] == 'Failure 0'
    assert long.at['AZ.BZN.HN'] == ''
    assert long.at['AZ.CPE.HN'] == 'Failure 1'
    assert long.at['CI.MIKB.BN'] == ''
    assert long.at['CI.MIKB.HN'] == 'Failure 2'
    assert long.at['CI.PSD.HN'] == ''
    assert long.at['PG.PSD.HN'] == 'Failure 3'

    net = sc.get_status('net')
    assert net.at['AZ', 'number passed'] == 1
    assert net.at['AZ', 'number failed'] == 2
    assert net.at['CI', 'number passed'] == 2
    assert net.at['CI', 'number failed'] == 1
    assert net.at['PG', 'number passed'] == 0
    assert net.at['PG', 'number failed'] == 1
示例#14
0
def test_free_field():
    data_files, origin = read_data_dir('kiknet', 'usp000hzq8')

    raw_streams = []
    for dfile in data_files:
        raw_streams += read_data(dfile)

    sc = StreamCollection(raw_streams)

    processed_streams = process_streams(sc, origin)

    # all of these streams should have failed for different reasons
    npassed = np.sum([pstream.passed for pstream in processed_streams])
    assert npassed == 0
    for pstream in processed_streams:
        is_free = pstream[0].free_field
        reason = ''
        for trace in pstream:
            if trace.hasParameter('failure'):
                reason = trace.getParameter('failure')['reason']
                break
        if is_free:
            assert reason.startswith('Failed sta/lta check')
        else:
            assert reason == 'Failed free field sensor check.'
def test_metrics2():
    eventid = 'usb000syza'
    datafiles, event = read_data_dir('knet', eventid, '*')
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = get_config()
    config['metrics']['output_imts'].append('Arias')
    config['metrics']['output_imcs'].append('arithmetic_mean')
    # turn off sta/lta check and snr checks
    newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr'])
    processed_streams = process_streams(raw_streams, event, config=newconfig)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, 'test.hdf')
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, processed_streams, label='processed')
        workspace.calcMetrics(event.id, labels=['processed'])
        etable, imc_tables1 = workspace.getTables('processed')
        etable2, imc_tables2 = workspace.getTables('processed', config=config)
        assert 'ARITHMETIC_MEAN' not in imc_tables1
        assert 'ARITHMETIC_MEAN' in imc_tables2
        assert 'ARIAS' in imc_tables2['ARITHMETIC_MEAN']
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
示例#16
0
def test_lowpass_max():
    datapath = os.path.join('data', 'testdata', 'lowpass_max')
    datadir = pkg_resources.resource_filename('gmprocess', datapath)
    sc = StreamCollection.from_directory(datadir)
    sc.describe()

    conf = get_config()
    update = {
        'processing': [
            {'detrend': {'detrending_method': 'demean'}},
            {'remove_response': {
                'f1': 0.001, 'f2': 0.005, 'f3': None, 'f4': None,
                'output': 'ACC', 'water_level': 60}
             },
            #            {'detrend': {'detrending_method': 'linear'}},
            #            {'detrend': {'detrending_method': 'demean'}},
            {'get_corner_frequencies': {
                'constant': {
                    'highpass': 0.08, 'lowpass': 20.0
                },
                'method': 'constant',
                'snr': {'same_horiz': True}}
             },
            {'lowpass_max_frequency': {'fn_fac': 0.9}}
        ]
    }
    update_dict(conf, update)
    update = {
        'windows': {
            'signal_end': {
                'method': 'model',
                'vmin': 1.0,
                'floor': 120,
                'model': 'AS16',
                'epsilon': 2.0
            },
            'window_checks': {
                'do_check': False,
                'min_noise_duration': 1.0,
                'min_signal_duration': 1.0
            }
        }
    }
    update_dict(conf, update)
    edict = {
        'id': 'ci38038071',
        'time': UTCDateTime('2018-08-30 02:35:36'),
        'lat': 34.136,
        'lon': -117.775,
        'depth': 5.5,
        'magnitude': 4.4
    }
    event = get_event_object(edict)
    test = process_streams(sc, event, conf)
    for st in test:
        for tr in st:
            freq_dict = tr.getParameter('corner_frequencies')
            np.testing.assert_allclose(freq_dict['lowpass'], 18.0)
示例#17
0
def test_raw():
    msg = "dataset.value has been deprecated. Use dataset[()] instead."
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=H5pyDeprecationWarning)
        warnings.filterwarnings("ignore", category=YAMLLoadWarning)
        warnings.filterwarnings("ignore", category=FutureWarning)
        raw_streams, inv = request_raw_waveforms(
            fdsn_client='IRIS',
            org_time='2018-11-30T17-29-29.330Z',
            lat=61.3464,
            lon=-149.9552,
            before_time=120,
            after_time=120,
            dist_min=0,
            dist_max=0.135,
            networks='*',
            stations='*',
            channels=['?N?'],
            access_restricted=False)
        tdir = tempfile.mkdtemp()
        try:
            edict = get_event_dict('ak20419010')
            origin = get_event_object('ak20419010')
            tfile = os.path.join(tdir, 'test.hdf')
            sc1 = StreamCollection(raw_streams)
            workspace = StreamWorkspace(tfile)
            workspace.addStreams(origin, sc1, label='raw')
            tstreams = workspace.getStreams(edict['id'])
            assert len(tstreams) == 0

            imclist = [
                'greater_of_two_horizontals', 'channels', 'rotd50', 'rotd100'
            ]
            imtlist = ['sa1.0', 'PGA', 'pgv', 'fas2.0', 'arias']
            # this shouldn't do anything
            workspace.setStreamMetrics(edict['id'],
                                       imclist=imclist,
                                       imtlist=imtlist)

            processed_streams = process_streams(sc1, edict)
            workspace.addStreams(origin, processed_streams, 'processed')
            labels = workspace.getLabels()
            tags = workspace.getStreamTags(edict['id'])
            out_raw_streams = workspace.getStreams(edict['id'], get_raw=True)
            assert len(out_raw_streams) == len(sc1)

            # this should only work on processed data
            workspace.setStreamMetrics(edict['id'],
                                       imclist=imclist,
                                       imtlist=imtlist)

            df = workspace.summarizeLabels()
            x = 1

        except Exception as e:
            raise e
        finally:
            shutil.rmtree(tdir)
示例#18
0
def test_weird_sensitivity():
    datafiles, origin = read_data_dir('fdsn', 'us70008dx7', 'SL.KOGS*.mseed')
    streams = []
    for datafile in datafiles:
        streams += read_fdsn(datafile)
    sc = StreamCollection(streams)
    psc = process_streams(sc, origin)
    channel = psc[0].select(component='E')[0]
    assert_almost_equal(channel.data.max(), 62900.191900393373)
    def retrieveData(self, event_dict):
        """Retrieve data from NSMN, turn into StreamCollection.

        Args:
            event (dict):
                Best dictionary matching input event, fields as above
                in return of getMatchingEvents().

        Returns:
            StreamCollection: StreamCollection object.
        """
        rawdir = self.rawdir
        if self.rawdir is None:
            rawdir = tempfile.mkdtemp()
        else:
            if not os.path.isdir(rawdir):
                os.makedirs(rawdir)

        urlparts = urlparse(SEARCH_URL)
        req = requests.get(event_dict['url'])
        data = req.text
        soup = BeautifulSoup(data, features="lxml")
        table = soup.find_all('table', 'tableType_01')[1]
        datafiles = []
        for row in table.find_all('tr'):
            if 'class' in row.attrs:
                continue
            col = row.find_all('td', 'coltype01')[0]
            href = col.contents[0].attrs['href']
            station_id = col.contents[0].contents[0]
            station_url = urljoin('http://' + urlparts.netloc, href)
            req2 = requests.get(station_url)
            data2 = req2.text
            soup2 = BeautifulSoup(data2, features="lxml")
            center = soup2.find_all('center')[0]
            anchor = center.find_all('a')[0]
            href2 = anchor.attrs['href']
            data_url = urljoin('http://' + urlparts.netloc, href2)
            req3 = requests.get(data_url)
            data = req3.text
            localfile = os.path.join(rawdir, '%s.txt' % station_id)
            logging.info('Downloading Turkish data file %s...' % station_id)
            with open(localfile, 'wt') as f:
                f.write(data)
            datafiles.append(localfile)

        streams = []
        for dfile in datafiles:
            logging.info('Reading datafile %s...' % dfile)
            streams += read_nsmn(dfile)

        if self.rawdir is None:
            shutil.rmtree(rawdir)

        stream_collection = StreamCollection(streams=streams,
                                             drop_non_free=self.drop_non_free)
        return stream_collection
示例#20
0
def get_streams():
    datafiles1, origin1 = read_data_dir('cwb', 'us1000chhc', '*.dat')
    datafiles2, origin2 = read_data_dir('nsmn', 'us20009ynd', '*.txt')
    datafiles3, origin3 = read_data_dir('geonet', 'us1000778i', '*.V1A')
    datafiles = datafiles1 + datafiles2 + datafiles3
    streams = []
    for datafile in datafiles:
        streams += read_data(datafile)

    return StreamCollection(streams)
示例#21
0
def download(event, event_dir, config, directory):
    """Download data or load data from local directory, turn into Streams.

    Args:
        event (ScalarEvent):
            Object containing basic event hypocenter, origin time, magnitude.
        event_dir (str):
            Path where raw directory should be created (if downloading).
        config (dict):
            Dictionary with gmprocess configuration information.
        directory (str):
            Path where raw data already exists.
    Returns:
        tuple:
            - StreamWorkspace: Contains the event and raw streams.
            - str: Name of workspace HDF file.
            - StreamCollection: Raw data StationStreams.
    """
    # generate the raw directory
    rawdir = get_rawdir(event_dir)

    if directory is None:
        tcollection, terrors = fetch_data(
            event.time.datetime,
            event.latitude,
            event.longitude,
            event.depth_km,
            event.magnitude,
            config=config,
            rawdir=rawdir)
        # create an event.json file in each event directory,
        # in case user is simply downloading for now
        create_event_file(event, event_dir)
    else:
        streams, bad, terrors = directory_to_streams(directory)
        tcollection = StreamCollection(streams)

    # plot the raw waveforms
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=UserWarning)
        pngfiles = glob.glob(os.path.join(rawdir, '*.png'))
        if not len(pngfiles):
            plot_raw(rawdir, tcollection, event)

    # create the workspace file and put the unprocessed waveforms in it
    workname = os.path.join(event_dir, 'workspace.hdf')
    if os.path.isfile(workname):
        os.remove(workname)
    workspace = StreamWorkspace(workname)
    workspace.addEvent(event)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=H5pyDeprecationWarning)
        workspace.addStreams(event, tcollection, label='unprocessed')

    return (workspace, workname, tcollection)
示例#22
0
def test_process_streams():
    # Loma Prieta test station (nc216859)
    origin = {
        'eventid': 'test',
        'time': UTCDateTime('2000-10-16T13:30:00'),
        'magnitude': 7.3,
        'lat': 35.278,
        'lon': 133.345
    }

    data_files, origin = read_data_dir('geonet', 'us1000778i', '*.V1A')
    streams = []
    for f in data_files:
        streams += read_data(f)

    sc = StreamCollection(streams)

    sc.describe()

    test = process_streams(sc, origin)

    logging.info('Testing trace: %s' % test[0][1])

    assert len(test) == 3
    assert len(test[0]) == 3
    assert len(test[1]) == 3
    assert len(test[2]) == 3

    # Apparently the traces end up in a different order on the Travis linux
    # container than on my local mac. So testing individual traces need to
    # not care about trace order.

    trace_maxes = np.sort([np.max(np.abs(t.data)) for t in test[0]])

    np.testing.assert_allclose(
        trace_maxes,
        np.array([157.82909426, 240.36582093, 263.7063879]),
        rtol=1e-5
    )
示例#23
0
def test_num_horizontals():
    data_path = pkg_resources.resource_filename('gmprocess', 'data')
    sc = StreamCollection.from_directory(os.path.join(
        data_path, 'testdata', 'fdsn', 'uw61251926', 'strong_motion'))
    st = sc.select(station='SP2')[0]
    assert st.num_horizontal == 2

    for tr in st:
        tr.stats.channel = 'ENZ'
    assert st.num_horizontal == 0

    for tr in st:
        tr.stats.channel = 'EN1'
    assert st.num_horizontal == 3
示例#24
0
def test_metrics():
    eventid = 'usb000syza'
    datafiles, event = read_data_dir('knet', eventid, '*')
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = get_config()
    # turn off sta/lta check and snr checks
    newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr'])
    processed_streams = process_streams(raw_streams, event, config=newconfig)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, 'test.hdf')
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, processed_streams, label='processed')
        stream1 = processed_streams[0]
        stream2 = processed_streams[1]
        summary1 = StationSummary.from_config(stream1)
        summary2 = StationSummary.from_config(stream2)
        workspace.setStreamMetrics(event.id, 'processed', summary1)
        workspace.setStreamMetrics(event.id, 'processed', summary2)
        workspace.calcStationMetrics(event.id, labels=['processed'])
        summary1_a = workspace.getStreamMetrics(event.id,
                                                stream1[0].stats.station,
                                                'processed')
        s1_df_in = summary1.pgms.sort_values(['IMT', 'IMC'])
        s1_df_out = summary1_a.pgms.sort_values(['IMT', 'IMC'])
        array1 = s1_df_in['Result'].as_matrix()
        array2 = s1_df_out['Result'].as_matrix()
        np.testing.assert_almost_equal(array1, array2, decimal=4)

        df = workspace.getMetricsTable(event.id)
        cmp_series = {
            'GREATER_OF_TWO_HORIZONTALS': 0.6787,
            'H1': 0.3869,
            'H2': 0.6787,
            'Z': 0.7663
        }
        pga_dict = df.iloc[0]['PGA'].to_dict()
        for key, value in pga_dict.items():
            value2 = cmp_series[key]
            np.testing.assert_almost_equal(value, value2, decimal=4)

        workspace.close()
    except Exception as e:
        raise(e)
    finally:
        shutil.rmtree(tdir)
def test_metrics():
    eventid = 'usb000syza'
    datafiles, event = read_data_dir('knet',
                                     eventid,
                                     '*')
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = get_config()
    # turn off sta/lta check and snr checks
    newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr'])
    processed_streams = process_streams(raw_streams, event, config=newconfig)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, 'test.hdf')
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, processed_streams, label='processed')
        stream1 = processed_streams[0]
        stream2 = processed_streams[1]
        summary1 = StationSummary.from_config(stream1)
        summary2 = StationSummary.from_config(stream2)
        workspace.setStreamMetrics(event.id, 'processed', summary1)
        workspace.setStreamMetrics(event.id, 'processed', summary2)
        summary1_a = workspace.getStreamMetrics(event.id,
                                                stream1[0].stats.station,
                                                'processed')
        s1_df_in = summary1.pgms.sort_values(['IMT', 'IMC'])
        s1_df_out = summary1_a.pgms.sort_values(['IMT', 'IMC'])
        array1 = s1_df_in['Result'].as_matrix()
        array2 = s1_df_out['Result'].as_matrix()
        np.testing.assert_almost_equal(array1, array2, decimal=4)

        df = workspace.getMetricsTable(event.id)
        cmp_series = {'GREATER_OF_TWO_HORIZONTALS': 0.6787,
                      'HN1': 0.3869,
                      'HN2': 0.6787,
                      'HNZ': 0.7663}
        pga_dict = df.iloc[0]['PGA'].to_dict()
        for key, value in pga_dict.items():
            value2 = cmp_series[key]
            np.testing.assert_almost_equal(value, value2, decimal=4)

        workspace.close()
    except Exception as e:
        raise(e)
    finally:
        shutil.rmtree(tdir)
示例#26
0
def test_allow_nans():
    dpath = os.path.join('data', 'testdata', 'fdsn', 'uu60363602')
    datadir = pkg_resources.resource_filename('gmprocess', dpath)
    sc = StreamCollection.from_directory(datadir)
    origin = read_event_json_files([os.path.join(datadir, 'event.json')])[0]
    psc = process_streams(sc, origin)
    st = psc[0]

    ss = StationSummary.from_stream(
        st, components=['quadratic_mean'], imts=['FAS(4.0)'], bandwidth=189,
        allow_nans=True)
    assert np.isnan(ss.pgms.Result).all()

    ss = StationSummary.from_stream(
        st, components=['quadratic_mean'], imts=['FAS(4.0)'], bandwidth=189,
        allow_nans=False)
    assert ~np.isnan(ss.pgms.Result).all()
示例#27
0
def test_metrics():
    eventid = 'usb000syza'
    datafiles, event = read_data_dir('knet', eventid, '*')
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = update_config(os.path.join(datadir, 'config_min_freq_0p2.yml'))
    # turn off sta/lta check and snr checks
    # newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr'])
    # processed_streams = process_streams(raw_streams, event, config=newconfig)
    newconfig = config.copy()
    newconfig['processing'].append(
        {'NNet_QA': {
            'acceptance_threshold': 0.5,
            'model_name': 'CantWell'
        }})
    processed_streams = process_streams(raw_streams.copy(),
                                        event,
                                        config=newconfig)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, 'test.hdf')
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, raw_streams, label='raw')
        workspace.addStreams(event, processed_streams, label='processed')
        stream1 = raw_streams[0]
        summary1 = StationSummary.from_config(stream1)
        s1_df_in = summary1.pgms.sort_values(['IMT', 'IMC'])
        array1 = s1_df_in['Result'].to_numpy()
        workspace.calcMetrics(eventid, labels=['raw'])
        pstreams2 = workspace.getStreams(event.id, labels=['processed'])
        assert pstreams2[0].getStreamParamKeys() == ['nnet_qa']
        summary1_a = workspace.getStreamMetrics(event.id,
                                                stream1[0].stats.network,
                                                stream1[0].stats.station,
                                                'raw')
        s1_df_out = summary1_a.pgms.sort_values(['IMT', 'IMC'])
        array2 = s1_df_out['Result'].to_numpy()
        np.testing.assert_almost_equal(array1, array2, decimal=4)

        workspace.close()
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
示例#28
0
def test_smc():
    dpath = os.path.join('data', 'testdata', 'smc', 'nc216859')
    datadir = pkg_resources.resource_filename('gmprocess', dpath)

    files = OrderedDict([('0111a.smc', (1.5057E+0, -2.8745E-1)),
                         ('0111b.smc', (-1.2518E+1, -1.6806E+0)),
                         ('0111c.smc', (-5.8486E+0, -1.1594E+0))])

    streams = []
    for tfilename, accvals in files.items():
        filename = os.path.join(datadir, tfilename)
        assert is_smc(filename)

        # test acceleration from the file
        stream = read_smc(filename)[0]

        # test for one trace per file
        assert stream.count() == 1

        # test that the traces are acceleration
        for trace in stream:
            assert trace.stats.standard.units == 'acc'

        # compare the start/end points
        np.testing.assert_almost_equal(accvals[0], stream[0].data[0])
        np.testing.assert_almost_equal(accvals[1], stream[0].data[-1])

        # append to list of streams, so we can make sure these group together
        streams.append(stream)

    # test location override
    stream = read_smc(filename, location='test')[0]
    for trace in stream:
        assert trace.stats.location == 'test'

    newstreams = StreamCollection(streams)
    assert len(newstreams) == 1

    filename = os.path.join(datadir, '891018_1.sma-1.0444a.smc')
    try:
        stream = read_smc(filename)[0]
        success = True
    except Exception:
        success = False
    assert success == False
示例#29
0
def test():
    datafiles, origin = read_data_dir('fdsn', 'nc72282711', 'BK.CMB*.mseed')
    streams = []
    for datafile in datafiles:
        streams += read_fdsn(datafile)

    assert streams[0].get_id() == 'BK.CMB.HN'

    datafiles, origin = read_data_dir('fdsn', 'nc72282711', 'TA.M04C*.mseed')
    streams = []
    for datafile in datafiles:
        streams += read_fdsn(datafile)

    assert streams[0].get_id() == 'TA.M04C.HN'

    # DEBUGGING
    sc = StreamCollection(streams)
    psc = process_streams(sc, origin)
示例#30
0
def test_v0():
    datafiles, origin = read_data_dir('cosmos', 'ftbragg')
    dfile = datafiles[0]
    # TODO: Fix this problem, or get the data fixed?
    assert is_cosmos(dfile)
    try:
        rstreams = read_cosmos(dfile)
        tstream = rstreams[0].copy()  # raw stream
        streams = StreamCollection(rstreams)
        pstream = remove_response(rstreams[0], 0, 0)
        pstream.detrend(type='demean')

        for trace in tstream:
            trace.data /= trace.stats.standard.instrument_sensitivity
            trace.data *= 100
        tstream.detrend(type='demean')

        np.testing.assert_almost_equal(tstream[0].data, pstream[0].data)
    except Exception as e:
        pass
示例#31
0
def test_zero_crossings():
    datapath = os.path.join('data', 'testdata', 'zero_crossings')
    datadir = pkg_resources.resource_filename('gmprocess', datapath)
    sc = StreamCollection.from_directory(datadir)
    sc.describe()

    conf = get_config()

    update = {
        'processing': [{
            'detrend': {
                'detrending_method': 'demean'
            }
        }, {
            'check_zero_crossings': {
                'min_crossings': 1
            }
        }]
    }
    update_dict(conf, update)

    edict = {
        'id': 'ak20419010',
        'time': UTCDateTime('2018-11-30T17:29:29'),
        'lat': 61.346,
        'lon': -149.955,
        'depth': 46.7,
        'magnitude': 7.1
    }
    event = get_event_object(edict)
    test = process_streams(sc, event, conf)
    for st in test:
        for tr in st:
            assert tr.hasParameter('ZeroCrossingRate')
    np.testing.assert_allclose(
        test[0][0].getParameter('ZeroCrossingRate')['crossing_rate'],
        0.008888888888888889,
        atol=1e-5)
示例#32
0
def test_trim_multiple_events():
    datapath = os.path.join('data', 'testdata', 'multiple_events')
    datadir = pkg_resources.resource_filename('gmprocess', datapath)
    sc = StreamCollection.from_directory(os.path.join(datadir, 'ci38457511'))
    origin = get_event_object('ci38457511')
    df, catalog = create_travel_time_dataframe(
        sc, os.path.join(datadir, 'catalog.csv'), 5, 0.1, 'iasp91')
    for st in sc:
        st.detrend('demean')
        remove_response(st, None, None)
        st = corner_frequencies.constant(st)
        lowpass_filter(st)
        highpass_filter(st)
        signal_split(st, origin)
        signal_end(st,
                   origin.time,
                   origin.longitude,
                   origin.latitude,
                   origin.magnitude,
                   method='model',
                   model='AS16')
        cut(st, 2)
        trim_multiple_events(st, origin, catalog, df, 0.2, 0.7, 'B14',
                             {'vs30': 760}, {'rake': 0})

    num_failures = sum([1 if not st.passed else 0 for st in sc])
    assert num_failures == 1

    failure = sc.select(station='WRV2')[0][0].getParameter('failure')
    assert failure['module'] == 'trim_multiple_events'
    assert failure['reason'] == ('A significant arrival from another event '
                                 'occurs within the first 70.0 percent of the '
                                 'signal window')

    for tr in sc.select(station='JRC2')[0]:
        np.testing.assert_almost_equal(
            tr.stats.endtime, UTCDateTime('2019-07-06T03:20:38.7983Z'))
def test_StreamCollection():

    # read usc data
    dpath = os.path.join('data', 'testdata', 'usc', 'ci3144585')
    directory = pkg_resources.resource_filename('gmprocess', dpath)
    usc_streams, unprocessed_files, unprocessed_file_errors = \
        directory_to_streams(directory)
    assert len(usc_streams) == 7

    usc_sc = StreamCollection(usc_streams)

    # Use print method
    print(usc_sc)

    # Use len method
    assert len(usc_sc) == 3

    # Use nonzero method
    assert bool(usc_sc)

    # Slice
    lengths = [
        len(usc_sc[0]),
        len(usc_sc[1]),
        len(usc_sc[2])
    ]
    sort_lengths = np.sort(lengths)
    assert sort_lengths[0] == 1
    assert sort_lengths[1] == 3
    assert sort_lengths[2] == 3

    # read dmg data
    dpath = os.path.join('data', 'testdata', 'dmg', 'ci3144585')
    directory = pkg_resources.resource_filename('gmprocess', dpath)
    dmg_streams, unprocessed_files, unprocessed_file_errors = \
        directory_to_streams(directory)
    assert len(dmg_streams) == 1

    dmg_sc = StreamCollection(dmg_streams)

    # Has one station
    assert len(dmg_sc) == 1
    # With 3 channels
    assert len(dmg_sc[0]) == 3

    # So this should have 4 stations
    test1 = dmg_sc + usc_sc
    assert len(test1) == 4

    test_copy = dmg_sc.copy()
    assert test_copy[0][0].stats['standard']['process_level'] == \
        'corrected physical units'

    stream1 = test_copy[0]
    test_append = usc_sc.append(stream1)
    assert len(test_append) == 4

    # Change back to unique values for station/network
    for tr in dmg_sc[0]:
        tr.stats['network'] = 'LALALA'
        tr.stats['station'] = '575757'
    stream2 = dmg_sc[0]
    test_append = usc_sc.append(stream2)
    assert len(test_append) == 4

    # Check the from_directory method
    sc_test = StreamCollection.from_directory(directory)
    assert len(sc_test) == 1

    # Test to_dataframe
    jsonfile = os.path.join(directory, 'event.json')
    with open(jsonfile, 'rt') as f:
        origin = json.load(f)
    dmg_df = sc_test.to_dataframe(origin)
    np.testing.assert_allclose(
        dmg_df['HN1']['PGA'],
        0.145615,
        atol=1e5)
def test_corner_frequencies():
    # Default config has 'constant' corner frequency method, so the need
    # here is to force the 'snr' method.
    data_files, origin = read_data_dir('geonet', 'us1000778i', '*.V1A')
    streams = []
    for f in data_files:
        streams += read_data(f)

    sc = StreamCollection(streams)

    config = get_config()

    window_conf = config['windows']

    processed_streams = sc.copy()
    for st in processed_streams:
        if st.passed:
            # Estimate noise/signal split time
            event_time = origin.time
            event_lon = origin.longitude
            event_lat = origin.latitude
            st = signal_split(st, origin)

            # Estimate end of signal
            end_conf = window_conf['signal_end']
            event_mag = origin.magnitude
            print(st)
            st = signal_end(
                st,
                event_time=event_time,
                event_lon=event_lon,
                event_lat=event_lat,
                event_mag=event_mag,
                **end_conf
            )
            wcheck_conf = window_conf['window_checks']
            st = window_checks(
                st,
                min_noise_duration=wcheck_conf['min_noise_duration'],
                min_signal_duration=wcheck_conf['min_signal_duration']
            )

    pconfig = config['processing']

    # Run SNR check
    # I think we don't do this anymore.
    test = [
        d for d in pconfig if list(d.keys())[0] == 'compute_snr'
    ]
    snr_config = test[0]['compute_snr']
    for stream in processed_streams:
        stream = compute_snr(
            stream,
            **snr_config
        )

    # Run get_corner_frequencies
    test = [
        d for d in pconfig if list(d.keys())[0] == 'get_corner_frequencies'
    ]
    cf_config = test[0]['get_corner_frequencies']
    snr_config = cf_config['snr']

    lp = []
    hp = []
    for stream in processed_streams:
        if not stream.passed:
            continue
        stream = get_corner_frequencies(
            stream,
            method="snr",
            snr=snr_config
        )
        if stream[0].hasParameter('corner_frequencies'):
            cfdict = stream[0].getParameter('corner_frequencies')
            lp.append(cfdict['lowpass'])
            hp.append(cfdict['highpass'])
    np.testing.assert_allclose(
        np.sort(hp),
        [0.00751431, 0.01354455, 0.04250735],
        atol=1e-6
    )