def test_StreamArray():

    # read usc data
    dpath = os.path.join("data", "testdata", "usc", "ci3144585")
    directory = pkg_resources.resource_filename("gmprocess", dpath)
    usc_streams, unprocessed_files, unprocessed_file_errors = directory_to_streams(
        directory)
    assert len(usc_streams) == 7

    usc_sa = StreamArray(usc_streams)

    # Use print method
    print(usc_sa)
    usc_sa.describe()

    # Use len method
    assert len(usc_sa) == 7

    # Use nonzero method
    assert bool(usc_sa)

    # read dmg data
    dpath = os.path.join("data", "testdata", "dmg", "ci3144585")
    directory = pkg_resources.resource_filename("gmprocess", dpath)
    dmg_streams, unprocessed_files, unprocessed_file_errors = directory_to_streams(
        directory)
    assert len(dmg_streams) == 1

    dmg_sa = StreamArray(dmg_streams)
    dmg_sa.describe()

    # Has 3 streams
    assert len(dmg_sa) == 3
Пример #2
0
def test_directory_to_streams():
    dpath = os.path.join('data', 'testdata', 'read_directory', 'whittier87')
    directory = pkg_resources.resource_filename('gmprocess', dpath)

    streams, unprocessed_files, unprocessed_file_errors = directory_to_streams(
        directory)
    assert len(streams) == 7
def test_directory_to_streams():
    dpath = os.path.join('data', 'testdata', 'read_directory', 'whittier87')
    directory = pkg_resources.resource_filename('gmprocess', dpath)

    streams, unprocessed_files, unprocessed_file_errors = directory_to_streams(
        directory)
    assert len(streams) == 7
def test_directory_to_streams():
    dpath = os.path.join("data", "testdata", "read_directory", "whittier87")
    directory = pkg_resources.resource_filename("gmprocess", dpath)

    streams, unprocessed_files, unprocessed_file_errors = directory_to_streams(
        directory)
    assert len(streams) == 7
Пример #5
0
def download(event, event_dir, config, directory):
    """Download data or load data from local directory, turn into Streams.

    Args:
        event (ScalarEvent):
            Object containing basic event hypocenter, origin time, magnitude.
        event_dir (str):
            Path where raw directory should be created (if downloading).
        config (dict):
            Dictionary with gmprocess configuration information.
        directory (str):
            Path where raw data already exists.
    Returns:
        tuple:
            - StreamWorkspace: Contains the event and raw streams.
            - str: Name of workspace HDF file.
            - StreamCollection: Raw data StationStreams.
    """
    # generate the raw directory
    rawdir = get_rawdir(event_dir)

    if directory is None:
        tcollection, terrors = fetch_data(
            event.time.datetime,
            event.latitude,
            event.longitude,
            event.depth_km,
            event.magnitude,
            config=config,
            rawdir=rawdir)
        # create an event.json file in each event directory,
        # in case user is simply downloading for now
        create_event_file(event, event_dir)
    else:
        streams, bad, terrors = directory_to_streams(directory)
        tcollection = StreamCollection(streams)

    # plot the raw waveforms
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=UserWarning)
        pngfiles = glob.glob(os.path.join(rawdir, '*.png'))
        if not len(pngfiles):
            plot_raw(rawdir, tcollection, event)

    # create the workspace file and put the unprocessed waveforms in it
    workname = os.path.join(event_dir, 'workspace.hdf')
    if os.path.isfile(workname):
        os.remove(workname)
    workspace = StreamWorkspace(workname)
    workspace.addEvent(event)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=H5pyDeprecationWarning)
        workspace.addStreams(event, tcollection, label='unprocessed')

    return (workspace, workname, tcollection)
Пример #6
0
    def from_directory(cls, directory):
        """Create a StreamCollection instance from a directory of data.

        Args:
            directory (str):
                Directory of ground motion files (streams) to be read.

        Returns:
            StreamCollection instance.
        """
        streams, missed_files, errors = directory_to_streams(directory)

        # Might eventually want to include some of the missed files and
        # error info but don't have a sensible place to put it currently.
        return cls(streams)
    def from_directory(cls, directory, use_default_config=False):
        """Create a StreamCollection instance from a directory of data.

        Args:
            directory (str):
                Directory of ground motion files (streams) to be read.
            use_default_config (bool):
                Use default ("production") config.

        Returns:
            StreamCollection instance.
        """
        if use_default_config:
            config = get_config(use_default=True)
        else:
            config = None
        streams, missed_files, errors = directory_to_streams(directory,
                                                             config=config)

        # Might eventually want to include some of the missed files and
        # error info but don't have a sensible place to put it currently.
        return cls(streams, config=config)
def download(event, event_dir, config, directory):
    """Download data or load data from local directory, turn into Streams.

    Args:
        event (ScalarEvent):
            Object containing basic event hypocenter, origin time, magnitude.
        event_dir (str):
            Path where raw directory should be created (if downloading).
        config (dict):
            Dictionary with gmprocess configuration information.
        directory (str):
            Path where data already exists. Must be organized in a 'raw'
            directory, within directories with names as the event ids. For
            example, if `directory` is 'proj_dir' and you have data for
            event id 'abc123' then the raw data to be read in should be
            located in `proj_dir/abc123/raw/`.

    Returns:
        tuple:
            - StreamWorkspace: Contains the event and raw streams.
            - str: Name of workspace HDF file.
            - StreamCollection: Raw data StationStreams.
    """
    # Make raw directory
    rawdir = get_rawdir(event_dir)

    if directory is None:
        tcollection, terrors = fetch_data(event.time.datetime,
                                          event.latitude,
                                          event.longitude,
                                          event.depth_km,
                                          event.magnitude,
                                          config=config,
                                          rawdir=rawdir)
        # create an event.json file in each event directory,
        # in case user is simply downloading for now
        create_event_file(event, event_dir)
    else:
        # Make raw directory
        in_event_dir = os.path.join(directory, event.id)
        in_raw_dir = get_rawdir(in_event_dir)
        streams, bad, terrors = directory_to_streams(in_raw_dir)
        tcollection = StreamCollection(streams, **config['duplicate'])
        create_event_file(event, event_dir)

    # Plot the raw waveforms
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=UserWarning)
        pngfiles = glob.glob(os.path.join(rawdir, '*.png'))
        if not len(pngfiles):
            plot_raw(rawdir, tcollection, event)

    # Create the workspace file and put the unprocessed waveforms in it
    workname = os.path.join(event_dir, 'workspace.hdf')

    # Remove any existing workspace file
    if os.path.isfile(workname):
        os.remove(workname)

    workspace = StreamWorkspace(workname)
    workspace.addEvent(event)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=H5pyDeprecationWarning)
        workspace.addStreams(event, tcollection, label='unprocessed')

    return (workspace, workname, tcollection)
Пример #9
0
def test_StreamCollection():

    # read usc data
    dpath = os.path.join('data', 'testdata', 'usc', 'ci3144585')
    directory = pkg_resources.resource_filename('gmprocess', dpath)
    usc_streams, unprocessed_files, unprocessed_file_errors = \
        directory_to_streams(directory)
    assert len(usc_streams) == 7

    usc_sc = StreamCollection(usc_streams)

    # Use print method
    print(usc_sc)

    # Use len method
    assert len(usc_sc) == 3

    # Use nonzero method
    assert bool(usc_sc)

    # Slice
    lengths = [
        len(usc_sc[0]),
        len(usc_sc[1]),
        len(usc_sc[2])
    ]
    sort_lengths = np.sort(lengths)
    assert sort_lengths[0] == 1
    assert sort_lengths[1] == 3
    assert sort_lengths[2] == 3

    # read dmg data
    dpath = os.path.join('data', 'testdata', 'dmg', 'ci3144585')
    directory = pkg_resources.resource_filename('gmprocess', dpath)
    dmg_streams, unprocessed_files, unprocessed_file_errors = \
        directory_to_streams(directory)
    assert len(dmg_streams) == 1

    dmg_sc = StreamCollection(dmg_streams)

    # Has one station
    assert len(dmg_sc) == 1
    # With 3 channels
    assert len(dmg_sc[0]) == 3

    # So this should have 4 stations
    test1 = dmg_sc + usc_sc
    assert len(test1) == 4

    # Overwrite the dmg station and network to force it to be
    # a duplicate of one of the stations in usc_sc to check if
    # validation works with these addition methods
    for tr in dmg_sc[0]:
        tr.stats['network'] = 'LA'
        tr.stats['station'] = '57'

    test3 = dmg_sc + usc_sc
    assert len(test3) == 3
    # usc_sc has 1 channel for station 57 and the modified
    # dmg_sc has 3 channels so the combined StreamCollection
    # should have 4
    assert len(test3[0]) == 4

    test_copy = dmg_sc.copy()
    assert test_copy[0][0].stats['standard']['process_level'] == \
        'corrected physical units'

    # Appending dmg should not add to length because of the
    # overwriting of the station/network above
    stream1 = test_copy[0]
    test_append = usc_sc.append(stream1)
    assert len(test_append) == 3

    # Change back to unique values for station/network
    for tr in dmg_sc[0]:
        tr.stats['network'] = 'LALALA'
        tr.stats['station'] = '575757'
    stream2 = dmg_sc[0]
    test_append = usc_sc.append(stream2)
    assert len(test_append) == 4

    # Check the from_directory method
    sc_test = StreamCollection.from_directory(directory)
    assert len(sc_test) == 1

    # Test to_dataframe
    jsonfile = os.path.join(directory, 'event.json')
    with open(jsonfile, 'rt') as f:
        origin = json.load(f)
    dmg_df = sc_test.to_dataframe(origin)
    np.testing.assert_allclose(
        dmg_df['HN1']['PGA'],
        0.145615,
        atol=1e5)
Пример #10
0
def test_duplicates():
    datapath = os.path.join('data', 'testdata', 'duplicate', 'general')
    datadir = pkg_resources.resource_filename('gmprocess', datapath)
    streams = directory_to_streams(datadir)[0]

    sc_bad = StreamCollection(streams=streams, handle_duplicates=False)
    # Check that we begin with having three streams
    assert len(sc_bad) == 3

    sc = StreamCollection(streams=streams, handle_duplicates=True)
    # Check that we now only have two streams in the StreamCollection
    assert len(sc) == 2
    assert len(sc[0]) == 3
    assert len(sc[1]) == 3

    # Check that we kept the 'CE' network and not the '--' network
    assert sc.select(station='23837')[0][0].stats.network == 'CE'

    # Now try changing the process levels of one of the streams
    for tr in sc_bad.select(network='--')[0]:
        tr.stats.standard.process_level = 'uncorrected physical units'
    for tr in sc_bad.select(network='CE')[0]:
        tr.stats.standard.process_level = 'corrected physical units'

    sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True)
    # Now, we should have kept the '--' network and not the 'CE' network
    assert sc.select(station='23837')[0][0].stats.network == '--'

    # Now change the process preference order to see if we get back the
    # original results
    sc = StreamCollection(streams=sc_bad.streams,
                          handle_duplicates=True,
                          process_level_preference=['V2', 'V1'])
    assert sc.select(station='23837')[0][0].stats.network == 'CE'

    # Check that decreasing the distance tolerance results in streams now being
    # treated as different streams
    sc = StreamCollection(streams=streams,
                          max_dist_tolerance=10,
                          handle_duplicates=True)
    assert len(sc) == 3

    # Change the streams to have the same processing level
    for st in sc_bad:
        for tr in st:
            tr.stats.standard.process_level = 'uncorrected physical units'

    # Try changing the preferred format order
    sc = StreamCollection(streams=sc_bad.streams,
                          handle_duplicates=True,
                          format_preference=['dmg', 'cosmos'])
    assert sc.select(station='23837')[0][0].stats.network == '--'

    sc = StreamCollection(streams=sc_bad.streams,
                          handle_duplicates=True,
                          format_preference=['cosmos', 'dmg'])
    assert sc.select(station='23837')[0][0].stats.network == 'CE'

    # Set process level and format to be he same
    for st in sc_bad:
        for tr in st:
            tr.stats.standard.source_format = 'cosmos'

    # Check that we keep the CE network due to the bad starttime on --
    sczz = sc_bad.select(station='23837', network='--')
    for st in sczz:
        for tr in st:
            tr.stats.starttime = UTCDateTime(0)
    sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True)
    assert sc.select(station='23837')[0][0].stats.network == 'CE'

    for tr in sc_bad.select(network='CE')[0]:
        tr.stats.starttime = UTCDateTime(0)
    for tr in sc_bad.select(network='--')[0]:
        tr.stats.starttime = UTCDateTime(2018, 8, 29, 2, 33, 0)

    sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True)
    assert sc.select(station='23837')[0][0].stats.network == '--'

    for tr in sc_bad.select(network='--')[0]:
        tr.stats.starttime = UTCDateTime(0)
        tr.trim(endtime=UTCDateTime(5))

    sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True)
    assert sc.select(station='23837')[0][0].stats.network == 'CE'

    for tr in sc_bad.select(network='CE')[0]:
        tr.trim(endtime=UTCDateTime(2))

    sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True)
    assert sc.select(station='23837')[0][0].stats.network == '--'

    for tr in sc_bad.select(network='--')[0]:
        tr.trim(endtime=UTCDateTime(2))
        tr.resample(20)

    sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True)
    assert sc.select(station='23837')[0][0].stats.network == 'CE'

    for tr in sc_bad.select(network='--')[0]:
        tr.resample(10)

    sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True)
    assert sc.select(station='23837')[0][0].stats.network == 'CE'

    # New test for some Hawaii data.
    datapath = os.path.join('data', 'testdata', 'duplicate', 'hawaii')
    datadir = pkg_resources.resource_filename('gmprocess', datapath)
    streams = directory_to_streams(datadir)[0]
    sc = StreamCollection(streams=streams, handle_duplicates=True)
    assert len(sc) == 1

    # New test for some Alaska data.
    datapath = os.path.join('data', 'testdata', 'duplicate', 'alaska')
    datadir = pkg_resources.resource_filename('gmprocess', datapath)
    streams = directory_to_streams(datadir)[0]
    sc = StreamCollection(streams=streams,
                          handle_duplicates=True,
                          preference_order=['location_code'])
    assert len(sc) == 1
    for st in sc:
        for tr in st:
            assert tr.stats.location == 'D0'
Пример #11
0
def test_StreamCollection():

    # read usc data
    dpath = os.path.join('data', 'testdata', 'usc', 'ci3144585')
    directory = pkg_resources.resource_filename('gmprocess', dpath)
    usc_streams, unprocessed_files, unprocessed_file_errors = \
        directory_to_streams(directory)
    assert len(usc_streams) == 7

    usc_sc = StreamCollection(usc_streams)

    # Use print method
    print(usc_sc)

    # Use len method
    assert len(usc_sc) == 3

    # Use nonzero method
    assert bool(usc_sc)

    # Slice
    lengths = [len(usc_sc[0]), len(usc_sc[1]), len(usc_sc[2])]
    sort_lengths = np.sort(lengths)
    assert sort_lengths[0] == 1
    assert sort_lengths[1] == 3
    assert sort_lengths[2] == 3

    # read dmg data
    dpath = os.path.join('data', 'testdata', 'dmg', 'ci3144585')
    directory = pkg_resources.resource_filename('gmprocess', dpath)
    dmg_streams, unprocessed_files, unprocessed_file_errors = \
        directory_to_streams(directory)
    assert len(dmg_streams) == 1

    dmg_sc = StreamCollection(dmg_streams)

    # Has one station
    assert len(dmg_sc) == 1
    # With 3 channels
    assert len(dmg_sc[0]) == 3

    # So this should have 4 stations
    test1 = dmg_sc + usc_sc
    assert len(test1) == 4

    test_copy = dmg_sc.copy()
    assert test_copy[0][0].stats['standard']['process_level'] == \
        'uncorrected physical units'

    stream1 = test_copy[0]
    test_append = usc_sc.append(stream1)
    assert len(test_append) == 4

    # Change back to unique values for station/network
    for tr in dmg_sc[0]:
        tr.stats['network'] = 'LALALA'
        tr.stats['station'] = '575757'
    stream2 = dmg_sc[0]
    test_append = usc_sc.append(stream2)
    assert len(test_append) == 4

    # Check the from_directory method
    sc_test = StreamCollection.from_directory(directory)
    assert len(sc_test) == 1

    # Test to_dataframe
    jsonfile = os.path.join(directory, 'event.json')
    with open(jsonfile, 'rt', encoding='utf-8') as f:
        origin = json.load(f)
    dmg_df = sc_test.to_dataframe(origin)
    np.testing.assert_allclose(dmg_df['H1']['PGA'], 0.145615, atol=1e5)

    # Check the from_traces method
    traces = []
    for st in sc_test:
        for tr in st:
            traces.append(tr)
    sc_test = StreamCollection.from_traces(traces)
    assert len(sc_test) == 1
def test_duplicates():
    datapath = os.path.join("data", "testdata", "duplicate", "general")
    datadir = pkg_resources.resource_filename("gmprocess", datapath)
    streams = directory_to_streams(datadir)[0]

    sc_bad = StreamCollection(streams=streams, handle_duplicates=False)
    # Check that we begin with having three streams
    assert len(sc_bad) == 3

    sc = StreamCollection(streams=streams, handle_duplicates=True)
    # Check that we now only have two streams in the StreamCollection
    assert len(sc) == 2
    assert len(sc[0]) == 3
    assert len(sc[1]) == 3

    # Check that we kept the 'CE' network and not the '--' network
    assert sc.select(station="23837")[0][0].stats.network == "CE"

    # Now try changing the process levels of one of the streams
    for tr in sc_bad.select(network="--")[0]:
        tr.stats.standard.process_level = "uncorrected physical units"
    for tr in sc_bad.select(network="CE")[0]:
        tr.stats.standard.process_level = "corrected physical units"

    sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True)
    # Now, we should have kept the '--' network and not the 'CE' network
    assert sc.select(station="23837")[0][0].stats.network == "--"

    # Now change the process preference order to see if we get back the
    # original results
    sc = StreamCollection(
        streams=sc_bad.streams,
        handle_duplicates=True,
        process_level_preference=["V2", "V1"],
    )
    assert sc.select(station="23837")[0][0].stats.network == "CE"

    # Check that decreasing the distance tolerance results in streams now being
    # treated as different streams
    sc = StreamCollection(streams=streams,
                          max_dist_tolerance=10,
                          handle_duplicates=True)
    assert len(sc) == 3

    # Change the streams to have the same processing level
    for st in sc_bad:
        for tr in st:
            tr.stats.standard.process_level = "uncorrected physical units"

    # Try changing the preferred format order
    sc = StreamCollection(
        streams=sc_bad.streams,
        handle_duplicates=True,
        format_preference=["dmg", "cosmos"],
    )
    assert sc.select(station="23837")[0][0].stats.network == "--"

    sc = StreamCollection(
        streams=sc_bad.streams,
        handle_duplicates=True,
        format_preference=["cosmos", "dmg"],
    )
    assert sc.select(station="23837")[0][0].stats.network == "CE"

    # Set process level and format to be he same
    for st in sc_bad:
        for tr in st:
            tr.stats.standard.source_format = "cosmos"

    # Check that we keep the CE network due to the bad starttime on --
    sczz = sc_bad.select(station="23837", network="--")
    for st in sczz:
        for tr in st:
            tr.stats.starttime = UTCDateTime(0)
    sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True)
    assert sc.select(station="23837")[0][0].stats.network == "CE"

    for tr in sc_bad.select(network="CE")[0]:
        tr.stats.starttime = UTCDateTime(0)
    for tr in sc_bad.select(network="--")[0]:
        tr.stats.starttime = UTCDateTime(2018, 8, 29, 2, 33, 0)

    sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True)
    assert sc.select(station="23837")[0][0].stats.network == "--"

    for tr in sc_bad.select(network="--")[0]:
        tr.stats.starttime = UTCDateTime(0)
        tr.trim(endtime=UTCDateTime(5))

    sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True)
    assert sc.select(station="23837")[0][0].stats.network == "CE"

    for tr in sc_bad.select(network="CE")[0]:
        tr.trim(endtime=UTCDateTime(2))

    sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True)
    assert sc.select(station="23837")[0][0].stats.network == "--"

    for tr in sc_bad.select(network="--")[0]:
        tr.trim(endtime=UTCDateTime(2))
        tr.resample(20)

    sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True)
    assert sc.select(station="23837")[0][0].stats.network == "CE"

    for tr in sc_bad.select(network="--")[0]:
        tr.resample(10)

    sc = StreamCollection(streams=sc_bad.streams, handle_duplicates=True)
    assert sc.select(station="23837")[0][0].stats.network == "CE"

    # New test for some Hawaii data.
    datapath = os.path.join("data", "testdata", "duplicate", "hawaii")
    datadir = pkg_resources.resource_filename("gmprocess", datapath)
    streams = directory_to_streams(datadir)[0]
    sc = StreamCollection(streams=streams, handle_duplicates=True)
    assert len(sc) == 1

    # New test for some Alaska data.
    datapath = os.path.join("data", "testdata", "duplicate", "alaska")
    datadir = pkg_resources.resource_filename("gmprocess", datapath)
    streams = directory_to_streams(datadir)[0]
    sc = StreamCollection(streams=streams,
                          handle_duplicates=True,
                          preference_order=["location_code"])
    assert len(sc) == 1
    for st in sc:
        for tr in st:
            assert tr.stats.location == "D0"
def test_StreamCollection():

    # read usc data
    dpath = os.path.join("data", "testdata", "usc", "ci3144585")
    directory = pkg_resources.resource_filename("gmprocess", dpath)
    usc_streams, unprocessed_files, unprocessed_file_errors = directory_to_streams(
        directory)
    assert len(usc_streams) == 7

    usc_sc = StreamCollection(usc_streams)

    # Use print method
    print(usc_sc)

    # Use len method
    assert len(usc_sc) == 3

    # Use nonzero method
    assert bool(usc_sc)

    # Slice
    lengths = [len(usc_sc[0]), len(usc_sc[1]), len(usc_sc[2])]
    sort_lengths = np.sort(lengths)
    assert sort_lengths[0] == 1
    assert sort_lengths[1] == 3
    assert sort_lengths[2] == 3

    # read dmg data
    dpath = os.path.join("data", "testdata", "dmg", "ci3144585")
    directory = pkg_resources.resource_filename("gmprocess", dpath)
    dmg_streams, unprocessed_files, unprocessed_file_errors = directory_to_streams(
        directory)
    assert len(dmg_streams) == 1

    dmg_sc = StreamCollection(dmg_streams)

    # Has one station
    assert len(dmg_sc) == 1
    # With 3 channels
    assert len(dmg_sc[0]) == 3

    # So this should have 4 stations
    test1 = dmg_sc + usc_sc
    assert len(test1) == 4

    test_copy = dmg_sc.copy()
    assert (test_copy[0][0].stats["standard"]["process_level"] ==
            "uncorrected physical units")

    stream1 = test_copy[0]
    test_append = usc_sc.append(stream1)
    assert len(test_append) == 4

    # Change back to unique values for station/network
    for tr in dmg_sc[0]:
        tr.stats["network"] = "LALALA"
        tr.stats["station"] = "575757"
    stream2 = dmg_sc[0]
    test_append = usc_sc.append(stream2)
    assert len(test_append) == 4

    # Check the from_directory method
    sc_test = StreamCollection.from_directory(directory)
    assert len(sc_test) == 1

    # Test to_dataframe
    jsonfile = os.path.join(directory, "event.json")
    with open(jsonfile, "rt", encoding="utf-8") as f:
        origin = json.load(f)
    dmg_df = sc_test.to_dataframe(origin)
    np.testing.assert_allclose(dmg_df["H1"]["PGA"], 0.145615, atol=1e5)

    # Check the from_traces method
    traces = []
    for st in sc_test:
        for tr in st:
            traces.append(tr)
    sc_test = StreamCollection.from_traces(traces)
    assert len(sc_test) == 1
Пример #14
0
def assemble(event, config, directory, gmprocess_version):
    """Download data or load data from local directory, turn into Streams.

    Args:
        event (ScalarEvent):
            Object containing basic event hypocenter, origin time, magnitude.
        config (dict):
            Dictionary with gmprocess configuration information.
        directory (str):
            Path where data already exists. Must be organized in a 'raw'
            directory, within directories with names as the event ids. For
            example, if `directory` is 'proj_dir' and you have data for
            event id 'abc123' then the raw data to be read in should be
            located in `proj_dir/abc123/raw/`.
        gmprocess_version (str):
            Software version for gmprocess.

    Returns:
        tuple:
            - StreamWorkspace: Contains the event and raw streams.
            - str: Name of workspace HDF file.
            - StreamCollection: Raw data StationStreams.
            - str: Path to the rupture file.
    """

    # Make raw directory
    in_event_dir = os.path.join(directory, event.id)
    in_raw_dir = get_rawdir(in_event_dir)
    logging.debug(f"in_raw_dir: {in_raw_dir}")
    streams, unprocessed_files, unprocessed_file_errors = directory_to_streams(
        in_raw_dir, config=config)
    # Write errors to a csv file
    failures_file = Path(in_raw_dir) / "read_failures.csv"
    colnames = ["File", "Failure"]
    with open(failures_file, "w", newline="") as f:
        writer = csv.writer(f, delimiter=",", quoting=csv.QUOTE_MINIMAL)
        writer.writerow(colnames)
        for ufile, uerror in zip(unprocessed_files, unprocessed_file_errors):
            writer.writerow([ufile, uerror])

    logging.debug("streams:")
    logging.debug(streams)

    if config["read"]["use_streamcollection"]:
        stream_array = StreamCollection(streams, **config["duplicate"])
    else:
        stream_array = StreamArray(streams)

    logging.info("stream_array.describe_string():")
    logging.info(stream_array.describe_string())

    # Create the workspace file and put the unprocessed waveforms in it
    workname = os.path.join(in_event_dir, WORKSPACE_NAME)

    # Remove any existing workspace file
    if os.path.isfile(workname):
        os.remove(workname)

    workspace = StreamWorkspace(workname)
    workspace.addEvent(event)
    logging.debug("workspace.dataset.events:")
    logging.debug(workspace.dataset.events)
    workspace.addGmprocessVersion(gmprocess_version)
    workspace.addConfig()
    workspace.addStreams(event,
                         stream_array,
                         label="unprocessed",
                         gmprocess_version=gmprocess_version)
    logging.debug("workspace.dataset.waveforms.list():")
    logging.debug(workspace.dataset.waveforms.list())
    logging.debug("workspace.dataset.config")

    return workspace
def main():
    desc = '''Convert a directory of strong motion data files into any ObsPy
    supported format.

https://docs.obspy.org/packages/autogen/obspy.core.stream.Stream.write.html#supported-formats

    The inventory information will be written as an
    accompanying file in station XML format.

    To convert a single file in the NIED KNET format to MiniSEED:

    gmconvert AOM0011801241951.EW

    The following files will be written to the current directory:
        - BO.AOM001.--.HN2.mseed
        - BO.AOM001.--.HN2.xml

    To convert the three files that make up the BO.AOM001 station data into
    one MiniSEED file:

    gmconvert AOM0011801241951.*

    The following files will be written to the current directory:
        - BO.AOM001.HN.mseed
        - BO.AOM001.HN.xml

    To convert a directory "indatadir" full of files to SAC format, and write
    to a directory called "outdatadir":

    gmconvert -i datadir -o outdatadir -f SAC

    Note: The data files in "indatadir" can be distributed through
    subdirectories and gmconvert will find them.

    '''
    parser = argparse.ArgumentParser(
        description=desc,
        formatter_class=CustomFormatter)
    parser.add_argument('files', help='List of files to convert.',
                        nargs='*', default=None)
    parser.add_argument('-i', '--indir',
                        help='Directory containing input files to convert.')
    parser.add_argument('-o', '--outdir',
                        help='Output directory.', default=os.getcwd())
    parser.add_argument('-f', '--format',
                        help='Output strong motion data format.',
                        choices=FORMATS, default='MSEED')

    # Shared arguments
    parser = add_shared_args(parser)

    args = parser.parse_args()

    setup_logger(args)
    logging.info("Running gmconvert.")

    # gather arguments
    indir = args.indir
    outdir = args.outdir
    oformat = args.format

    has_files = args.files is not None and len(args.files)

    if has_files and args.indir is not None:
        print('Specify input files or an input directory, not both.')
        sys.exit(1)

    if args.files is None and args.indir is None:
        print('You must specify input files or an input directory.')
        sys.exit(1)

    if not os.path.isdir(outdir):
        os.mkdir(outdir)

    if args.files:
        # read all the data files, gather up a list of obspy Stream objects
        allstreams = []
        error_dict = {}
        for dfile in args.files:
            logging.info('Parsing %s...' % dfile)
            try:
                streams = read_data(dfile)
            except BaseException as e:
                error_dict[dfile] = str(e)
                continue
            allstreams += streams
    else:
        # grab all the files in the input directory
        allstreams, unprocessed, errors = directory_to_streams(indir)
        error_dict = dict(zip(unprocessed, errors))

    sc = StreamCollection(allstreams)

    for stream in sc:
        streamid = stream.get_id()
        if len(stream) == 1:
            streamid = stream[0].get_id()
        outfile = os.path.join(outdir, '%s.%s' % (streamid, oformat.lower()))
        invfile = os.path.join(outdir, '%s.xml' % (streamid))
        inv_format = 'STATIONXML'
        inv = stream.getInventory()
        logging.info('Writing data file %s...' % outfile)
        stream.write(outfile, format=oformat)
        logging.info('Writing inventory file %s...' % invfile)
        inv.write(invfile, format=inv_format)

    print('Wrote %i streams to %s' % (len(sc), outdir))
    if len(error_dict):
        print('\nThe following files could not be read:')
        for fname, error in error_dict.items():
            print('\t%s - "%s"' % (fname, error))
def test_StreamCollection():

    # read usc data
    dpath = os.path.join('data', 'testdata', 'usc', 'ci3144585')
    directory = pkg_resources.resource_filename('gmprocess', dpath)
    usc_streams, unprocessed_files, unprocessed_file_errors = \
        directory_to_streams(directory)
    assert len(usc_streams) == 7

    usc_sc = StreamCollection(usc_streams)

    # Use print method
    print(usc_sc)

    # Use len method
    assert len(usc_sc) == 3

    # Use nonzero method
    assert bool(usc_sc)

    # Slice
    lengths = [
        len(usc_sc[0]),
        len(usc_sc[1]),
        len(usc_sc[2])
    ]
    sort_lengths = np.sort(lengths)
    assert sort_lengths[0] == 1
    assert sort_lengths[1] == 3
    assert sort_lengths[2] == 3

    # read dmg data
    dpath = os.path.join('data', 'testdata', 'dmg', 'ci3144585')
    directory = pkg_resources.resource_filename('gmprocess', dpath)
    dmg_streams, unprocessed_files, unprocessed_file_errors = \
        directory_to_streams(directory)
    assert len(dmg_streams) == 1

    dmg_sc = StreamCollection(dmg_streams)

    # Has one station
    assert len(dmg_sc) == 1
    # With 3 channels
    assert len(dmg_sc[0]) == 3

    # So this should have 4 stations
    test1 = dmg_sc + usc_sc
    assert len(test1) == 4

    test_copy = dmg_sc.copy()
    assert test_copy[0][0].stats['standard']['process_level'] == \
        'corrected physical units'

    stream1 = test_copy[0]
    test_append = usc_sc.append(stream1)
    assert len(test_append) == 4

    # Change back to unique values for station/network
    for tr in dmg_sc[0]:
        tr.stats['network'] = 'LALALA'
        tr.stats['station'] = '575757'
    stream2 = dmg_sc[0]
    test_append = usc_sc.append(stream2)
    assert len(test_append) == 4

    # Check the from_directory method
    sc_test = StreamCollection.from_directory(directory)
    assert len(sc_test) == 1

    # Test to_dataframe
    jsonfile = os.path.join(directory, 'event.json')
    with open(jsonfile, 'rt') as f:
        origin = json.load(f)
    dmg_df = sc_test.to_dataframe(origin)
    np.testing.assert_allclose(
        dmg_df['HN1']['PGA'],
        0.145615,
        atol=1e5)