示例#1
0
def _test_colocated():
    eventid = "ci38445975"
    datafiles, event = read_data_dir("fdsn", eventid, "*")
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config_file = os.path.join(datadir, "test_config.yml")
    with open(config_file, "r", encoding="utf-8") as f:
        yaml = YAML()
        yaml.preserve_quotes = True
        config = yaml.load(f)
    processed_streams = process_streams(raw_streams, event, config=config)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, "test.hdf")
        ws = StreamWorkspace(tfile)
        ws.addEvent(event)
        ws.addStreams(event, raw_streams, label="raw")
        ws.addStreams(event, processed_streams, label="processed")
        ws.calcMetrics(eventid, labels=["processed"], config=config)
        stasum = ws.getStreamMetrics(eventid, "CI", "MIKB", "processed")
        np.testing.assert_allclose(
            stasum.get_pgm("duration", "geometric_mean"), 38.94480068)
        ws.close()
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
示例#2
0
def test_stream_params():
    eventid = 'us1000778i'
    datafiles, event = read_data_dir(
        'geonet',
        eventid,
        '20161113_110259_WTMC_20.V1A'
    )
    tdir = tempfile.mkdtemp()
    streams = []
    try:
        streams += read_data(datafiles[0])
        statsdict = {'name': 'Fred', 'age': 34}
        streams[0].setStreamParam('stats', statsdict)
        tfile = os.path.join(tdir, 'test.hdf')
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, streams, label='stats')
        outstreams = workspace.getStreams(event.id, labels=['stats'])
        cmpdict = outstreams[0].getStreamParam('stats')
        assert cmpdict == statsdict
        workspace.close()
    except Exception as e:
        raise(e)
    finally:
        shutil.rmtree(tdir)
def _test_colocated():
    eventid = 'ci38445975'
    datafiles, event = read_data_dir('fdsn', eventid, '*')
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config_file = os.path.join(datadir, 'test_config.yml')
    with open(config_file, 'r', encoding='utf-8') as f:
        config = yaml.load(f, Loader=yaml.FullLoader)
    processed_streams = process_streams(raw_streams, event, config=config)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, 'test.hdf')
        ws = StreamWorkspace(tfile)
        ws.addEvent(event)
        ws.addStreams(event, raw_streams, label='raw')
        ws.addStreams(event, processed_streams, label='processed')
        ws.calcMetrics(eventid, labels=['processed'], config=config)
        stasum = ws.getStreamMetrics(eventid, 'CI', 'MIKB', 'processed')
        np.testing.assert_allclose(
            stasum.get_pgm('duration', 'geometric_mean'), 38.94480068)
        ws.close()
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
示例#4
0
def test_metrics2():
    eventid = 'usb000syza'
    datafiles, event = read_data_dir('knet', eventid, '*')
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = update_config(os.path.join(datadir, 'config_min_freq_0p2.yml'))
    config['metrics']['output_imts'].append('Arias')
    config['metrics']['output_imcs'].append('arithmetic_mean')
    # turn off sta/lta check and snr checks
    newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr'])
    processed_streams = process_streams(raw_streams, event, config=newconfig)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, 'test.hdf')
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, processed_streams, label='processed')
        workspace.calcMetrics(event.id, labels=['processed'])
        etable, imc_tables1, readmes1 = workspace.getTables('processed')
        assert 'ARITHMETIC_MEAN' not in imc_tables1
        assert 'ARITHMETIC_MEAN' not in readmes1
        del workspace.dataset.auxiliary_data.WaveFormMetrics
        del workspace.dataset.auxiliary_data.StationMetrics
        workspace.calcMetrics(event.id, labels=['processed'], config=config)
        etable2, imc_tables2, readmes2 = workspace.getTables('processed')
        assert 'ARITHMETIC_MEAN' in imc_tables2
        assert 'ARITHMETIC_MEAN' in readmes2
        assert 'ARIAS' in imc_tables2['ARITHMETIC_MEAN']
        testarray = readmes2['ARITHMETIC_MEAN']['Column header'].to_numpy()
        assert 'ARIAS' in testarray
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
def test_metrics2():
    eventid = 'usb000syza'
    datafiles, event = read_data_dir('knet', eventid, '*')
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = get_config()
    config['metrics']['output_imts'].append('Arias')
    config['metrics']['output_imcs'].append('arithmetic_mean')
    # turn off sta/lta check and snr checks
    newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr'])
    processed_streams = process_streams(raw_streams, event, config=newconfig)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, 'test.hdf')
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, processed_streams, label='processed')
        workspace.calcMetrics(event.id, labels=['processed'])
        etable, imc_tables1 = workspace.getTables('processed')
        etable2, imc_tables2 = workspace.getTables('processed', config=config)
        assert 'ARITHMETIC_MEAN' not in imc_tables1
        assert 'ARITHMETIC_MEAN' in imc_tables2
        assert 'ARIAS' in imc_tables2['ARITHMETIC_MEAN']
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
示例#6
0
def download(event, event_dir, config, directory):
    """Download data or load data from local directory, turn into Streams.

    Args:
        event (ScalarEvent):
            Object containing basic event hypocenter, origin time, magnitude.
        event_dir (str):
            Path where raw directory should be created (if downloading).
        config (dict):
            Dictionary with gmprocess configuration information.
        directory (str):
            Path where raw data already exists.
    Returns:
        tuple:
            - StreamWorkspace: Contains the event and raw streams.
            - str: Name of workspace HDF file.
            - StreamCollection: Raw data StationStreams.
    """
    # generate the raw directory
    rawdir = get_rawdir(event_dir)

    if directory is None:
        tcollection, terrors = fetch_data(
            event.time.datetime,
            event.latitude,
            event.longitude,
            event.depth_km,
            event.magnitude,
            config=config,
            rawdir=rawdir)
        # create an event.json file in each event directory,
        # in case user is simply downloading for now
        create_event_file(event, event_dir)
    else:
        streams, bad, terrors = directory_to_streams(directory)
        tcollection = StreamCollection(streams)

    # plot the raw waveforms
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=UserWarning)
        pngfiles = glob.glob(os.path.join(rawdir, '*.png'))
        if not len(pngfiles):
            plot_raw(rawdir, tcollection, event)

    # create the workspace file and put the unprocessed waveforms in it
    workname = os.path.join(event_dir, 'workspace.hdf')
    if os.path.isfile(workname):
        os.remove(workname)
    workspace = StreamWorkspace(workname)
    workspace.addEvent(event)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=H5pyDeprecationWarning)
        workspace.addStreams(event, tcollection, label='unprocessed')

    return (workspace, workname, tcollection)
示例#7
0
def test_metrics():
    eventid = 'usb000syza'
    datafiles, event = read_data_dir('knet', eventid, '*')
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = get_config()
    # turn off sta/lta check and snr checks
    newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr'])
    processed_streams = process_streams(raw_streams, event, config=newconfig)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, 'test.hdf')
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, processed_streams, label='processed')
        stream1 = processed_streams[0]
        stream2 = processed_streams[1]
        summary1 = StationSummary.from_config(stream1)
        summary2 = StationSummary.from_config(stream2)
        workspace.setStreamMetrics(event.id, 'processed', summary1)
        workspace.setStreamMetrics(event.id, 'processed', summary2)
        workspace.calcStationMetrics(event.id, labels=['processed'])
        summary1_a = workspace.getStreamMetrics(event.id,
                                                stream1[0].stats.station,
                                                'processed')
        s1_df_in = summary1.pgms.sort_values(['IMT', 'IMC'])
        s1_df_out = summary1_a.pgms.sort_values(['IMT', 'IMC'])
        array1 = s1_df_in['Result'].as_matrix()
        array2 = s1_df_out['Result'].as_matrix()
        np.testing.assert_almost_equal(array1, array2, decimal=4)

        df = workspace.getMetricsTable(event.id)
        cmp_series = {
            'GREATER_OF_TWO_HORIZONTALS': 0.6787,
            'H1': 0.3869,
            'H2': 0.6787,
            'Z': 0.7663
        }
        pga_dict = df.iloc[0]['PGA'].to_dict()
        for key, value in pga_dict.items():
            value2 = cmp_series[key]
            np.testing.assert_almost_equal(value, value2, decimal=4)

        workspace.close()
    except Exception as e:
        raise(e)
    finally:
        shutil.rmtree(tdir)
示例#8
0
def test_metrics():
    eventid = "usb000syza"
    datafiles, event = read_data_dir("knet", eventid, "*")
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = update_config(os.path.join(datadir, "config_min_freq_0p2.yml"))
    # turn off sta/lta check and snr checks
    # newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr'])
    # processed_streams = process_streams(raw_streams, event, config=newconfig)
    newconfig = config.copy()
    newconfig["processing"].append(
        {"NNet_QA": {
            "acceptance_threshold": 0.5,
            "model_name": "CantWell"
        }})
    processed_streams = process_streams(raw_streams.copy(),
                                        event,
                                        config=newconfig)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, "test.hdf")
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, raw_streams, label="raw")
        workspace.addStreams(event, processed_streams, label="processed")
        stream1 = raw_streams[0]

        # Get metrics from station summary for raw streams
        summary1 = StationSummary.from_config(stream1)
        s1_df_in = summary1.pgms.sort_values(["IMT", "IMC"])
        array1 = s1_df_in["Result"].to_numpy()

        # Compare to metrics from getStreamMetrics for raw streams
        workspace.calcMetrics(eventid, labels=["raw"])
        summary1_a = workspace.getStreamMetrics(event.id,
                                                stream1[0].stats.network,
                                                stream1[0].stats.station,
                                                "raw")
        s1_df_out = summary1_a.pgms.sort_values(["IMT", "IMC"])
        array2 = s1_df_out["Result"].to_numpy()

        np.testing.assert_allclose(array1, array2, atol=1e-6, rtol=1e-6)
        workspace.close()
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
def test_metrics():
    eventid = 'usb000syza'
    datafiles, event = read_data_dir('knet', eventid, '*')
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = update_config(os.path.join(datadir, 'config_min_freq_0p2.yml'))
    # turn off sta/lta check and snr checks
    # newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr'])
    # processed_streams = process_streams(raw_streams, event, config=newconfig)
    newconfig = config.copy()
    newconfig['processing'].append(
        {'NNet_QA': {
            'acceptance_threshold': 0.5,
            'model_name': 'CantWell'
        }})
    processed_streams = process_streams(raw_streams.copy(),
                                        event,
                                        config=newconfig)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, 'test.hdf')
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, raw_streams, label='raw')
        workspace.addStreams(event, processed_streams, label='processed')
        stream1 = raw_streams[0]

        # Get metrics from station summary for raw streams
        summary1 = StationSummary.from_config(stream1)
        s1_df_in = summary1.pgms.sort_values(['IMT', 'IMC'])
        array1 = s1_df_in['Result'].to_numpy()

        # Compare to metrics from getStreamMetrics for raw streams
        workspace.calcMetrics(eventid, labels=['raw'])
        summary1_a = workspace.getStreamMetrics(event.id,
                                                stream1[0].stats.network,
                                                stream1[0].stats.station,
                                                'raw')
        s1_df_out = summary1_a.pgms.sort_values(['IMT', 'IMC'])
        array2 = s1_df_out['Result'].to_numpy()

        np.testing.assert_allclose(array1, array2, atol=1e-6, rtol=1e-6)
        workspace.close()
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
def test_metrics():
    eventid = 'usb000syza'
    datafiles, event = read_data_dir('knet',
                                     eventid,
                                     '*')
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = get_config()
    # turn off sta/lta check and snr checks
    newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr'])
    processed_streams = process_streams(raw_streams, event, config=newconfig)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, 'test.hdf')
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, processed_streams, label='processed')
        stream1 = processed_streams[0]
        stream2 = processed_streams[1]
        summary1 = StationSummary.from_config(stream1)
        summary2 = StationSummary.from_config(stream2)
        workspace.setStreamMetrics(event.id, 'processed', summary1)
        workspace.setStreamMetrics(event.id, 'processed', summary2)
        summary1_a = workspace.getStreamMetrics(event.id,
                                                stream1[0].stats.station,
                                                'processed')
        s1_df_in = summary1.pgms.sort_values(['IMT', 'IMC'])
        s1_df_out = summary1_a.pgms.sort_values(['IMT', 'IMC'])
        array1 = s1_df_in['Result'].as_matrix()
        array2 = s1_df_out['Result'].as_matrix()
        np.testing.assert_almost_equal(array1, array2, decimal=4)

        df = workspace.getMetricsTable(event.id)
        cmp_series = {'GREATER_OF_TWO_HORIZONTALS': 0.6787,
                      'HN1': 0.3869,
                      'HN2': 0.6787,
                      'HNZ': 0.7663}
        pga_dict = df.iloc[0]['PGA'].to_dict()
        for key, value in pga_dict.items():
            value2 = cmp_series[key]
            np.testing.assert_almost_equal(value, value2, decimal=4)

        workspace.close()
    except Exception as e:
        raise(e)
    finally:
        shutil.rmtree(tdir)
def test_metrics():
    eventid = 'usb000syza'
    datafiles, event = read_data_dir('knet', eventid, '*')
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = get_config()
    # turn off sta/lta check and snr checks
    # newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr'])
    # processed_streams = process_streams(raw_streams, event, config=newconfig)
    newconfig = config.copy()
    newconfig['processing'].append(
        {'NNet_QA': {
            'acceptance_threshold': 0.5,
            'model_name': 'CantWell'
        }})
    processed_streams = process_streams(raw_streams, event, config=newconfig)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, 'test.hdf')
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, raw_streams, label='raw')
        workspace.addStreams(event, processed_streams, label='processed')
        stream1 = raw_streams[0]
        summary1 = StationSummary.from_config(stream1)
        s1_df_in = summary1.pgms.sort_values(['IMT', 'IMC'])
        array1 = s1_df_in['Result'].as_matrix()
        workspace.calcStreamMetrics(eventid, labels=['raw'])
        workspace.calcStationMetrics(event.id, labels=['raw'])
        pstreams2 = workspace.getStreams(event.id, labels=['processed'])
        assert pstreams2[0].getStreamParamKeys() == ['nnet_qa']
        summary1_a = workspace.getStreamMetrics(event.id,
                                                stream1[0].stats.network,
                                                stream1[0].stats.station,
                                                'raw')
        s1_df_out = summary1_a.pgms.sort_values(['IMT', 'IMC'])
        array2 = s1_df_out['Result'].as_matrix()
        np.testing.assert_almost_equal(array1, array2, decimal=4)

        workspace.close()
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
示例#12
0
def _test_stream_params():
    eventid = "us1000778i"
    datafiles, event = read_data_dir("geonet", eventid,
                                     "20161113_110259_WTMC_20.V1A")
    tdir = tempfile.mkdtemp()
    streams = []
    try:
        streams += read_data(datafiles[0])
        statsdict = {"name": "Fred", "age": 34}
        streams[0].setStreamParam("stats", statsdict)
        tfile = os.path.join(tdir, "test.hdf")
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, streams, label="stats")
        outstreams = workspace.getStreams(event.id, labels=["stats"])
        cmpdict = outstreams[0].getStreamParam("stats")
        assert cmpdict == statsdict
        workspace.close()
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
示例#13
0
def _test_metrics2():
    eventid = "usb000syza"
    datafiles, event = read_data_dir("knet", eventid, "*")
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = update_config(os.path.join(datadir, "config_min_freq_0p2.yml"))
    config["metrics"]["output_imts"].append("Arias")
    config["metrics"]["output_imcs"].append("arithmetic_mean")
    # Adjust checks so that streams pass checks for this test
    newconfig = drop_processing(config, ["check_sta_lta"])
    csnr = [s for s in newconfig["processing"] if "compute_snr" in s.keys()][0]
    csnr["compute_snr"]["check"]["threshold"] = -10.0
    processed_streams = process_streams(raw_streams, event, config=newconfig)

    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, "test.hdf")
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, processed_streams, label="processed")
        workspace.calcMetrics(event.id, labels=["processed"])
        etable, imc_tables1, readmes1 = workspace.getTables("processed")
        assert "ARITHMETIC_MEAN" not in imc_tables1
        assert "ARITHMETIC_MEAN" not in readmes1
        del workspace.dataset.auxiliary_data.WaveFormMetrics
        del workspace.dataset.auxiliary_data.StationMetrics
        workspace.calcMetrics(event.id, labels=["processed"], config=config)
        etable2, imc_tables2, readmes2 = workspace.getTables("processed")
        assert "ARITHMETIC_MEAN" in imc_tables2
        assert "ARITHMETIC_MEAN" in readmes2
        assert "ARIAS" in imc_tables2["ARITHMETIC_MEAN"]
        testarray = readmes2["ARITHMETIC_MEAN"]["Column header"].to_numpy()
        assert "ARIAS" in testarray
        workspace.close()
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
def test_stream_params():
    eventid = 'us1000778i'
    datafiles, event = read_data_dir('geonet',
                                     eventid,
                                     '20161113_110259_WTMC_20.V1A')
    tdir = tempfile.mkdtemp()
    streams = []
    try:
        streams += read_data(datafiles[0])
        statsdict = {'name': 'Fred', 'age': 34}
        streams[0].setStreamParam('stats', statsdict)
        tfile = os.path.join(tdir, 'test.hdf')
        workspace = StreamWorkspace(tfile)
        workspace.addEvent(event)
        workspace.addStreams(event, streams, label='stats')
        outstreams = workspace.getStreams(event.id, labels=['stats'])
        cmpdict = outstreams[0].getStreamParam('stats')
        assert cmpdict == statsdict
        workspace.close()
    except Exception as e:
        raise(e)
    finally:
        shutil.rmtree(tdir)
def download(event, event_dir, config, directory):
    """Download data or load data from local directory, turn into Streams.

    Args:
        event (ScalarEvent):
            Object containing basic event hypocenter, origin time, magnitude.
        event_dir (str):
            Path where raw directory should be created (if downloading).
        config (dict):
            Dictionary with gmprocess configuration information.
        directory (str):
            Path where data already exists. Must be organized in a 'raw'
            directory, within directories with names as the event ids. For
            example, if `directory` is 'proj_dir' and you have data for
            event id 'abc123' then the raw data to be read in should be
            located in `proj_dir/abc123/raw/`.

    Returns:
        tuple:
            - StreamWorkspace: Contains the event and raw streams.
            - str: Name of workspace HDF file.
            - StreamCollection: Raw data StationStreams.
    """
    # Make raw directory
    rawdir = get_rawdir(event_dir)

    if directory is None:
        tcollection, terrors = fetch_data(event.time.datetime,
                                          event.latitude,
                                          event.longitude,
                                          event.depth_km,
                                          event.magnitude,
                                          config=config,
                                          rawdir=rawdir)
        # create an event.json file in each event directory,
        # in case user is simply downloading for now
        create_event_file(event, event_dir)
    else:
        # Make raw directory
        in_event_dir = os.path.join(directory, event.id)
        in_raw_dir = get_rawdir(in_event_dir)
        streams, bad, terrors = directory_to_streams(in_raw_dir)
        tcollection = StreamCollection(streams, **config['duplicate'])
        create_event_file(event, event_dir)

    # Plot the raw waveforms
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=UserWarning)
        pngfiles = glob.glob(os.path.join(rawdir, '*.png'))
        if not len(pngfiles):
            plot_raw(rawdir, tcollection, event)

    # Create the workspace file and put the unprocessed waveforms in it
    workname = os.path.join(event_dir, 'workspace.hdf')

    # Remove any existing workspace file
    if os.path.isfile(workname):
        os.remove(workname)

    workspace = StreamWorkspace(workname)
    workspace.addEvent(event)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=H5pyDeprecationWarning)
        workspace.addStreams(event, tcollection, label='unprocessed')

    return (workspace, workname, tcollection)
def _test_vs30_dist_metrics():
    KNOWN_DISTANCES = {
        'epicentral': 5.1,
        'hypocentral': 10.2,
        'rupture': 2.21,
        'rupture_var': np.nan,
        'joyner_boore': 2.21,
        'joyner_boore_var': np.nan,
        'gc2_rx': 2.66,
        'gc2_ry': 3.49,
        'gc2_ry0': 0.00,
        'gc2_U': 34.34,
        'gc2_T': 2.66
    }
    KNOWN_BAZ = 239.46
    KNOWN_VS30 = 331.47

    eventid = 'ci38457511'
    datafiles, event = read_data_dir('fdsn', eventid, '*')
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = update_config(os.path.join(datadir, 'config_min_freq_0p2.yml'))
    processed_streams = process_streams(raw_streams, event, config=config)
    rupture_file = get_rupture_file(datadir)
    grid_file = os.path.join(datadir, 'test_grid.grd')
    config['metrics']['vs30'] = {
        'vs30': {
            'file': grid_file,
            'column_header': 'GlobalVs30',
            'readme_entry': 'GlobalVs30',
            'units': 'm/s'
        }
    }
    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, 'test.hdf')
        ws = StreamWorkspace(tfile)
        ws.addEvent(event)
        ws.addStreams(event, raw_streams, label='raw')
        ws.addStreams(event, processed_streams, label='processed')
        ws.calcMetrics(event.id,
                       rupture_file=rupture_file,
                       labels=['processed'],
                       config=config)
        sta_sum = ws.getStreamMetrics(event.id, 'CI', 'CLC', 'processed')

        for dist in sta_sum.distances:
            np.testing.assert_allclose(sta_sum.distances[dist],
                                       KNOWN_DISTANCES[dist],
                                       rtol=0.01)
        np.testing.assert_allclose(sta_sum._back_azimuth, KNOWN_BAZ, rtol=0.01)
        np.testing.assert_allclose(sta_sum._vs30['vs30']['value'],
                                   KNOWN_VS30,
                                   rtol=0.01)
        event_df, imc_tables, readme_tables = ws.getTables('processed')
        ws.close()
        check_cols = set([
            'EpicentralDistance', 'HypocentralDistance', 'RuptureDistance',
            'RuptureDistanceVar', 'JoynerBooreDistance',
            'JoynerBooreDistanceVar', 'GC2_rx', 'GC2_ry', 'GC2_ry0', 'GC2_U',
            'GC2_T', 'GlobalVs30', 'BackAzimuth'
        ])
        assert check_cols.issubset(set(readme_tables['Z']['Column header']))
        assert check_cols.issubset(set(imc_tables['Z'].columns))
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)
def assemble(event, config, directory, gmprocess_version):
    """Download data or load data from local directory, turn into Streams.

    Args:
        event (ScalarEvent):
            Object containing basic event hypocenter, origin time, magnitude.
        config (dict):
            Dictionary with gmprocess configuration information.
        directory (str):
            Path where data already exists. Must be organized in a 'raw'
            directory, within directories with names as the event ids. For
            example, if `directory` is 'proj_dir' and you have data for
            event id 'abc123' then the raw data to be read in should be
            located in `proj_dir/abc123/raw/`.
        gmprocess_version (str):
            Software version for gmprocess.

    Returns:
        tuple:
            - StreamWorkspace: Contains the event and raw streams.
            - str: Name of workspace HDF file.
            - StreamCollection: Raw data StationStreams.
            - str: Path to the rupture file.
    """

    # Make raw directory
    in_event_dir = os.path.join(directory, event.id)
    in_raw_dir = get_rawdir(in_event_dir)
    logging.debug(f"in_raw_dir: {in_raw_dir}")
    streams, unprocessed_files, unprocessed_file_errors = directory_to_streams(
        in_raw_dir, config=config)
    # Write errors to a csv file
    failures_file = Path(in_raw_dir) / "read_failures.csv"
    colnames = ["File", "Failure"]
    with open(failures_file, "w", newline="") as f:
        writer = csv.writer(f, delimiter=",", quoting=csv.QUOTE_MINIMAL)
        writer.writerow(colnames)
        for ufile, uerror in zip(unprocessed_files, unprocessed_file_errors):
            writer.writerow([ufile, uerror])

    logging.debug("streams:")
    logging.debug(streams)

    if config["read"]["use_streamcollection"]:
        stream_array = StreamCollection(streams, **config["duplicate"])
    else:
        stream_array = StreamArray(streams)

    logging.info("stream_array.describe_string():")
    logging.info(stream_array.describe_string())

    # Create the workspace file and put the unprocessed waveforms in it
    workname = os.path.join(in_event_dir, WORKSPACE_NAME)

    # Remove any existing workspace file
    if os.path.isfile(workname):
        os.remove(workname)

    workspace = StreamWorkspace(workname)
    workspace.addEvent(event)
    logging.debug("workspace.dataset.events:")
    logging.debug(workspace.dataset.events)
    workspace.addGmprocessVersion(gmprocess_version)
    workspace.addConfig()
    workspace.addStreams(event,
                         stream_array,
                         label="unprocessed",
                         gmprocess_version=gmprocess_version)
    logging.debug("workspace.dataset.waveforms.list():")
    logging.debug(workspace.dataset.waveforms.list())
    logging.debug("workspace.dataset.config")

    return workspace
示例#18
0
def _test_vs30_dist_metrics():
    KNOWN_DISTANCES = {
        "epicentral": 5.1,
        "hypocentral": 10.2,
        "rupture": 2.21,
        "rupture_var": np.nan,
        "joyner_boore": 2.21,
        "joyner_boore_var": np.nan,
        "gc2_rx": 2.66,
        "gc2_ry": 3.49,
        "gc2_ry0": 0.00,
        "gc2_U": 34.34,
        "gc2_T": 2.66,
    }
    KNOWN_BAZ = 239.46
    KNOWN_VS30 = 331.47

    eventid = "ci38457511"
    datafiles, event = read_data_dir("fdsn", eventid, "*")
    datadir = os.path.split(datafiles[0])[0]
    raw_streams = StreamCollection.from_directory(datadir)
    config = update_config(os.path.join(datadir, "config_min_freq_0p2.yml"))
    processed_streams = process_streams(raw_streams, event, config=config)
    rupture_file = get_rupture_file(datadir)
    grid_file = os.path.join(datadir, "test_grid.grd")
    config["metrics"]["vs30"] = {
        "vs30": {
            "file": grid_file,
            "column_header": "GlobalVs30",
            "readme_entry": "GlobalVs30",
            "units": "m/s",
        }
    }
    tdir = tempfile.mkdtemp()
    try:
        tfile = os.path.join(tdir, "test.hdf")
        ws = StreamWorkspace(tfile)
        ws.addEvent(event)
        ws.addStreams(event, raw_streams, label="raw")
        ws.addStreams(event, processed_streams, label="processed")
        ws.calcMetrics(event.id,
                       rupture_file=rupture_file,
                       labels=["processed"],
                       config=config)
        sta_sum = ws.getStreamMetrics(event.id, "CI", "CLC", "processed")

        for dist in sta_sum.distances:
            np.testing.assert_allclose(sta_sum.distances[dist],
                                       KNOWN_DISTANCES[dist],
                                       rtol=0.01)
        np.testing.assert_allclose(sta_sum._back_azimuth, KNOWN_BAZ, rtol=0.01)
        np.testing.assert_allclose(sta_sum._vs30["vs30"]["value"],
                                   KNOWN_VS30,
                                   rtol=0.01)
        event_df, imc_tables, readme_tables = ws.getTables("processed")
        ws.close()
        check_cols = set([
            "EpicentralDistance",
            "HypocentralDistance",
            "RuptureDistance",
            "RuptureDistanceVar",
            "JoynerBooreDistance",
            "JoynerBooreDistanceVar",
            "GC2_rx",
            "GC2_ry",
            "GC2_ry0",
            "GC2_U",
            "GC2_T",
            "GlobalVs30",
            "BackAzimuth",
        ])
        assert check_cols.issubset(set(readme_tables["Z"]["Column header"]))
        assert check_cols.issubset(set(imc_tables["Z"].columns))
    except Exception as e:
        raise (e)
    finally:
        shutil.rmtree(tdir)